This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Bug uprobes/13539] occasional oops, kernel SEGV, RHEL5, :uprobes:uprobe_free_process+0xba/0x131


While I still cannot see a reason how uprobe_{free,put}_process can 
race uprobe_report_{exit,exec}, I certainly think somebit of cleanup 
can be done.  However I am dont think we need to do a utask or uproc lookup
from the table. Especially in case of callbacks.

Mostly similar to what Jim proposed.
I havent tested this patch myself and I couldnt reproduce the problem.

---

 runtime/uprobes/uprobes.c  |  106 ++++++++++++++++++++++++++++++++-----------
 runtime/uprobes2/uprobes.c |  105 ++++++++++++++++++++++++++++---------------
 2 files changed, 148 insertions(+), 63 deletions(-)

diff --git a/runtime/uprobes/uprobes.c b/runtime/uprobes/uprobes.c
index 47fb1c9..ed1e94b 100644
--- a/runtime/uprobes/uprobes.c
+++ b/runtime/uprobes/uprobes.c
@@ -114,33 +114,39 @@ struct delayed_signal {
 	siginfo_t info;
 };
 
-static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk)
+static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct uprobe_task *utask;
-	unsigned long flags;
 
 	head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)];
-	lock_utask_table(flags);
 	hlist_for_each_entry(utask, node, head, hlist) {
-		if (utask->tsk == tsk) {
-			unlock_utask_table(flags);
+		if (utask->tsk == tsk)
 			return utask;
-		}
 	}
-	unlock_utask_table(flags);
 	return NULL;
 }
 
+static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk)
+{
+        struct uprobe_task *utask;
+        unsigned long flags;
+
+        lock_utask_table(flags);
+        utask = uprobe_find_utask_locked(tsk);
+        unlock_utask_table(flags);
+        return utask;
+}
+
 static void uprobe_hash_utask(struct uprobe_task *utask)
 {
 	struct hlist_head *head;
 	unsigned long flags;
 
 	INIT_HLIST_NODE(&utask->hlist);
-	head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)];
 	lock_utask_table(flags);
+	head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)];
 	hlist_add_head(&utask->hlist, head);
 	unlock_utask_table(flags);
 }
@@ -154,9 +160,11 @@ static void uprobe_unhash_utask(struct uprobe_task *utask)
 	unlock_utask_table(flags);
 }
 
-static inline void uprobe_get_process(struct uprobe_process *uproc)
+static struct uprobe_process * uprobe_get_process(struct uprobe_process *uproc)
 {
-	atomic_inc(&uproc->refcount);
+	if (atomic_inc_not_zero(&uproc->refcount))
+		return uproc;
+	return NULL;
 }
 
 /*
@@ -186,8 +194,9 @@ static struct uprobe_process *uprobe_find_process(pid_t tgid)
 	head = &uproc_table[hash_long(tgid, UPROBE_HASH_BITS)];
 	hlist_for_each_entry(uproc, node, head, hlist) {
 		if (uproc->tgid == tgid && !uproc->finished) {
-			uprobe_get_process(uproc);
-			down_write(&uproc->rwsem);
+			uproc = uprobe_get_process(uproc);
+			if (uproc)
+				down_write(&uproc->rwsem);
 			return uproc;
 		}
 	}
@@ -476,6 +485,12 @@ static void uprobe_free_task(struct uprobe_task *utask)
 	struct deferred_registration *dr, *d;
 	struct delayed_signal *ds, *ds2;
 
+        printk(KERN_INFO "uprobe_free_task %p (tid %ld), caller %pS, ctid %ld\n", utask, utask->tsk->pid, _RET_IP_, current->pid);
+
+        /*
+         * Do this first, since a utask that's still in the utask_table
+         * is assumed (e.g., by uprobe_report_exit) to be valid.
+         */
 	uprobe_unhash_utask(utask);
 	list_del(&utask->list);
 	list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) {
@@ -487,7 +502,6 @@ static void uprobe_free_task(struct uprobe_task *utask)
 		list_del(&ds->list);
 		kfree(ds);
 	}
-
 	utask_free_uretprobe_instances(utask);
 
 	kfree(utask);
@@ -499,12 +513,13 @@ static void uprobe_free_process(struct uprobe_process *uproc)
 	struct uprobe_task *utask, *tmp;
 	struct uprobe_ssol_area *area = &uproc->ssol_area;
 
+        printk(KERN_INFO "uprobe_free_process %p (pid %ld), caller %pS, ctid %ld\n", uproc, uproc->tgid, _RET_IP_, current->pid);
+
 	if (!uproc->finished)
 		uprobe_release_ssol_vma(uproc);
 	if (area->slots)
 		kfree(area->slots);
-	if (!hlist_unhashed(&uproc->hlist))
-		hlist_del(&uproc->hlist);
+	hlist_del(&uproc->hlist);
 	list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list) {
 		/*
 		 * utrace_detach() is OK here (required, it seems) even if
@@ -515,6 +530,7 @@ static void uprobe_free_process(struct uprobe_process *uproc)
 		uprobe_free_task(utask);
 	}
 	up_write(&uproc->rwsem);	// So kfree doesn't complain
+        printk(KERN_INFO "uprobe_free_process zap %p\n", uproc);
 	kfree(uproc);
 }
 
@@ -539,7 +555,9 @@ static int uprobe_put_process(struct uprobe_process *uproc)
 			/*
 			 * The works because uproc_mutex is held any
 			 * time the ref count can go from 0 to 1 -- e.g.,
-			 * register_uprobe() sneaks in with a new probe.
+			 * register_uprobe() snuck in with a new probe,
+                         * or a callback such as uprobe_report_exit()
+                         * just started.
 			 */
 			up_write(&uproc->rwsem);
 		} else {
@@ -852,7 +870,7 @@ static int uprobe_validate_vma(struct task_struct *t, unsigned long vaddr)
 	mmput(mm);
 	return ret;
 }
-	
+
 /* Probed address must be in an executable VM area, outside the SSOL area. */
 static int uprobe_validate_vaddr(struct task_struct *p, unsigned long vaddr,
 	struct uprobe_process *uproc)
@@ -1961,9 +1979,15 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine,
 	struct uprobe_task *utask;
 	struct uprobe_process *uproc;
 
+	rcu_read_lock();
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
 	BUG_ON(!utask);
-	uproc = utask->uproc;
+	uproc = uprobe_get_process(utask->uproc);
+	rcu_read_unlock();
+
+	if (!uproc)
+		return UTRACE_ACTION_DETACH|UTRACE_ACTION_RESUME;
+
 	if (current == utask->quiesce_master) {
 		/*
 		 * tsk was already quiescent when quiesce_all_threads()
@@ -1977,8 +2001,11 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine,
 	}
 
 	BUG_ON(utask->active_probe);
+
 	down_write(&uproc->rwsem);
 
+        printk(KERN_INFO "uprobe_report_quiesce2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
+
 	/*
 	 * When a thread hits a breakpoint or single-steps, utrace calls
 	 * this quiesce callback before our signal callback.  We must
@@ -1998,6 +2025,8 @@ static u32 uprobe_report_quiesce(struct utrace_attached_engine *engine,
 	check_uproc_quiesced(uproc, tsk);
 done:
 	up_write(&uproc->rwsem);
+	uprobe_put_process(uproc);
+        printk(KERN_INFO "uprobe_report_quiesce3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
 	return UTRACE_ACTION_RESUME;
 }
 
@@ -2074,8 +2103,14 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine,
 	int utask_quiescing;
 
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
-	uproc = utask->uproc;
-	uprobe_get_process(uproc);
+	if (utask)
+		uproc = uprobe_get_process(utask->uproc);
+
+	if (!utask || !uproc)
+		/* uprobe_free_process() has probably clobbered utask->proc. */
+		return UTRACE_ACTION_DETACH;
+
+        printk(KERN_INFO "uprobe_report_exit %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
 
 	ppt = utask->active_probe;
 	if (ppt) {
@@ -2108,6 +2143,9 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine,
 	}
 
 	down_write(&uproc->rwsem);
+
+        printk(KERN_INFO "uprobe_report_exit2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
+
 	utask_quiescing = utask->quiescing;
 	uprobe_free_task(utask);
 
@@ -2129,6 +2167,8 @@ static u32 uprobe_report_exit(struct utrace_attached_engine *engine,
 		uprobe_cleanup_process(uproc);
 	}
 	up_write(&uproc->rwsem);
+        printk(KERN_INFO "uprobe_report_exit3 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
+
 	uprobe_put_process(uproc);
 
 	return UTRACE_ACTION_DETACH;
@@ -2241,7 +2281,7 @@ static int uprobe_fork_uproc(struct uprobe_process *parent_uproc,
 	child_utask = uprobe_find_utask(child_tsk);
 	BUG_ON(!child_utask);
 	ret = uprobe_fork_uretprobe_instances(parent_utask, child_utask);
-	
+
 	hlist_add_head(&child_uproc->hlist,
 			&uproc_table[hash_long(child_uproc->tgid,
 			UPROBE_HASH_BITS)]);
@@ -2271,6 +2311,7 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine,
 	ptask = (struct uprobe_task *)rcu_dereference(engine->data);
 	uproc = ptask->uproc;
 
+        printk(KERN_INFO "uprobe_report_clone %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
 	/*
 	 * Lock uproc so no new uprobes can be installed 'til all
 	 * report_clone activities are completed.  Lock uproc_table
@@ -2280,6 +2321,8 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine,
 	down_write(&uproc->rwsem);
 	get_task_struct(child);
 
+        printk(KERN_INFO "uprobe_report_clone2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
+
 	if (clone_flags & (CLONE_THREAD|CLONE_VM)) {
 		/* New thread in the same process (CLONE_THREAD) or
 		 * processes sharing the same memory space (CLONE_VM). */
@@ -2323,7 +2366,7 @@ static u32 uprobe_report_clone(struct utrace_attached_engine *engine,
 				}
 			}
 		}
-		
+
 		if (!hlist_empty(&ptask->uretprobe_instances))
 			(void) uprobe_fork_uproc(uproc, ptask, child);
 	}
@@ -2360,8 +2403,14 @@ static u32 uprobe_report_exec(struct utrace_attached_engine *engine,
 	u32 ret = UTRACE_ACTION_RESUME;
 
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
-	uproc = utask->uproc;
-	uprobe_get_process(uproc);
+	if (utask)
+		uproc = uprobe_get_process(utask->uproc);
+
+	if (!utask || !uproc)
+		/* uprobe_free_process() has probably clobbered utask->proc. */
+		return UTRACE_ACTION_DETACH;
+
+        printk(KERN_INFO "uprobe_report_exec %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
 
 	/*
 	 * Only cleanup if we're the last thread.  If we aren't,
@@ -2390,11 +2439,14 @@ static u32 uprobe_report_exec(struct utrace_attached_engine *engine,
                 ret = UTRACE_ACTION_DETACH;
 	}
 	up_write(&uproc->rwsem);
+        printk(KERN_INFO "uprobe_report_exec2 %p %ld=%ld\n", uproc, uproc->tgid, current->pid);
 
 	/* If any [un]register_uprobe is pending, it'll clean up. */
 	if (uprobe_put_process(uproc))
 		ret = UTRACE_ACTION_DETACH;
 
+        printk(KERN_INFO "uprobe_report_exec4 %p %ld=%ld ret=%lu\n", uproc, uproc->tgid, current->pid, (unsigned long)ret);
+
 	return ret;
 }
 
@@ -2631,7 +2683,7 @@ static inline unsigned long lookup_uretprobe(struct hlist_node *r,
 {
 	struct uretprobe_instance *ret_inst;
 	unsigned long trampoline_addr;
-	
+
 	if (IS_ERR(uproc->uretprobe_trampoline_addr))
 	  return pc;
 	trampoline_addr = (unsigned long)uproc->uretprobe_trampoline_addr;
@@ -2672,7 +2724,7 @@ unsigned long uprobe_get_pc(struct uretprobe_instance *ri, unsigned long pc,
 		if (!uk)
 			return 0;
 		uproc = uk->ppt->uproc;
-		r = &ri->hlist;		
+		r = &ri->hlist;
 	}
 	return lookup_uretprobe(r, uproc, pc, sp);
 }
@@ -2685,7 +2737,7 @@ unsigned long uprobe_get_pc_task(struct task_struct *task, unsigned long pc,
 	struct uprobe_task *utask;
 	struct uprobe_process *uproc;
 	unsigned long result;
-		
+
 	utask = uprobe_find_utask(task);
 	if (!utask) {
 		return pc;
diff --git a/runtime/uprobes2/uprobes.c b/runtime/uprobes2/uprobes.c
index b3c6e57..953ddf3 100644
--- a/runtime/uprobes2/uprobes.c
+++ b/runtime/uprobes2/uprobes.c
@@ -114,33 +114,39 @@ struct delayed_signal {
 	siginfo_t info;
 };
 
-static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk)
+static struct uprobe_task *uprobe_find_utask_locked(struct task_struct *tsk)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 	struct uprobe_task *utask;
-	unsigned long flags;
 
 	head = &utask_table[hash_ptr(tsk, UPROBE_HASH_BITS)];
-	lock_utask_table(flags);
 	hlist_for_each_entry(utask, node, head, hlist) {
-		if (utask->tsk == tsk) {
-			unlock_utask_table(flags);
+		if (utask->tsk == tsk)
 			return utask;
-		}
 	}
-	unlock_utask_table(flags);
 	return NULL;
 }
 
+static struct uprobe_task *uprobe_find_utask(struct task_struct *tsk)
+{
+	struct uprobe_task *utask;
+	unsigned long flags;
+
+	lock_utask_table(flags);
+	utask = uprobe_find_utask_locked(tsk);
+	unlock_utask_table(flags);
+	return utask;
+}
+
 static void uprobe_hash_utask(struct uprobe_task *utask)
 {
 	struct hlist_head *head;
 	unsigned long flags;
 
 	INIT_HLIST_NODE(&utask->hlist);
-	head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)];
 	lock_utask_table(flags);
+	head = &utask_table[hash_ptr(utask->tsk, UPROBE_HASH_BITS)];
 	hlist_add_head(&utask->hlist, head);
 	unlock_utask_table(flags);
 }
@@ -154,9 +160,11 @@ static void uprobe_unhash_utask(struct uprobe_task *utask)
 	unlock_utask_table(flags);
 }
 
-static inline void uprobe_get_process(struct uprobe_process *uproc)
+static inline uprobe_process * uprobe_get_process(struct uprobe_process *uproc)
 {
-	atomic_inc(&uproc->refcount);
+	if (atomic_inc_not_zero(&uproc->refcount))
+		return uproc;
+	return NULL;
 }
 
 /*
@@ -186,8 +194,9 @@ static struct uprobe_process *uprobe_find_process(struct pid *tg_leader)
 	head = &uproc_table[hash_ptr(tg_leader, UPROBE_HASH_BITS)];
 	hlist_for_each_entry(uproc, node, head, hlist) {
 		if (uproc->tg_leader == tg_leader && !uproc->finished) {
-			uprobe_get_process(uproc);
-			down_write(&uproc->rwsem);
+			uproc = uprobe_get_process(uproc);
+			if (uproc)
+				down_write(&uproc->rwsem);
 			return uproc;
 		}
 	}
@@ -547,24 +556,32 @@ static void uprobe_free_task(struct uprobe_task *utask, bool in_callback)
 	struct deferred_registration *dr, *d;
 	struct delayed_signal *ds, *ds2;
 
-	if (utask->engine && (utask->tsk != current || !in_callback)) {
-		int result;
+	/*
+	 * Do this first, since a utask that's still in the utask_table
+	 * is assumed (e.g., by uprobe_report_exit) to be valid.
+	 */
+	uprobe_unhash_utask(utask);
 
+	if (utask->engine && (utask->tsk != current || !in_callback)) {
 		/*
-		 * No other tasks in this process should be running
-		 * uprobe_report_* callbacks.  (If they are, utrace_barrier()
-		 * here could deadlock.)
+		 * If we're racing with (say) uprobe_report_exit() here,
+		 * utrace_control_pid() may fail with -EINPROGRESS.  That's
+		 * OK.  The callback will abort with UTRACE_DETACH after
+		 * we're done.  It is NOT OK to call utrace_barrier() here,
+		 * since the callback would probably deadlock awaiting
+		 * uproc->rwsem.
+		 *
 		 * utrace_control_pid calls task_pid() so we should hold the
 		 * rcu_read_lock.  */
 		rcu_read_lock();
-		result = utrace_control_pid(utask->pid, utask->engine,
-					    UTRACE_DETACH);
+		if (utrace_control_pid(utask->pid, utask->engine,
+						UTRACE_DETACH) != 0)
+			/* Ignore it. */
+			;
 		rcu_read_unlock();
-		BUG_ON(result == -EINPROGRESS);
 	}
 	put_pid(utask->pid);	/* null pid OK */
 
-	uprobe_unhash_utask(utask);
 	list_del(&utask->list);
 	list_for_each_entry_safe(dr, d, &utask->deferred_registrations, list) {
 		list_del(&dr->list);
@@ -594,8 +611,7 @@ static void uprobe_free_process(struct uprobe_process *uproc, int in_callback)
 
 	if (area->slots)
 		kfree(area->slots);
-	if (!hlist_unhashed(&uproc->hlist))
-		hlist_del(&uproc->hlist);
+	hlist_del(&uproc->hlist);
 	list_for_each_entry_safe(utask, tmp, &uproc->thread_list, list)
 		uprobe_free_task(utask, in_callback);
 	put_pid(uproc->tg_leader);
@@ -622,9 +638,9 @@ static int uprobe_put_process(struct uprobe_process *uproc, bool in_callback)
 		down_write(&uproc->rwsem);
 		if (unlikely(atomic_read(&uproc->refcount) != 0)) {
 			/*
-			 * The works because uproc_mutex is held any
-			 * time the ref count can go from 0 to 1 -- e.g.,
-			 * register_uprobe() sneaks in with a new probe.
+			 * register_uprobe() snuck in with a new probe,
+			 * or a callback such as uprobe_report_exit()
+			 * just started.
 			 */
 			up_write(&uproc->rwsem);
 		} else {
@@ -1771,9 +1787,10 @@ static int utask_fake_quiesce(struct uprobe_task *utask)
 	} else {
 		utask->state = UPTASK_SLEEPING;
 		uproc->n_quiescent_threads++;
-		up_write(&uproc->rwsem);
+
 		/* We ref-count sleepers. */
 		uprobe_get_process(uproc);
+		up_write(&uproc->rwsem);
 
 		wait_event(uproc->waitq, !utask->quiescing);
 
@@ -1921,9 +1938,15 @@ static u32 uprobe_report_signal(u32 action,
 	rcu_read_lock();
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
 	BUG_ON(!utask);
-	uproc = utask->uproc;
+	/* Keep uproc intact until just before we return. */
+	uproc = uprobe_get_process(utask->uproc);
+
 	rcu_read_unlock();
 
+	if (!uproc)
+		/* uprobe_free_process() has probably clobbered utask->proc. */
+		return UTRACE_SIGNAL_IGN | UTRACE_DETACH;
+
 	/*
 	 * We may need to re-assert UTRACE_SINGLESTEP if this signal
 	 * is not associated with the breakpoint.
@@ -1933,9 +1956,6 @@ static u32 uprobe_report_signal(u32 action,
 	else
 		resume_action = UTRACE_RESUME;
 
-	/* Keep uproc intact until just before we return. */
-	uprobe_get_process(uproc);
-
 	if (unlikely(signal_action == UTRACE_SIGNAL_REPORT)) {
 		/* This thread was quiesced using UTRACE_INTERRUPT. */
 		bool done_quiescing;
@@ -2188,7 +2208,7 @@ static u32 uprobe_report_quiesce(
 		return UTRACE_SINGLESTEP;
 
 	BUG_ON(utask->active_probe);
-	uproc = utask->uproc;
+	uproc = uprobe_get_process(utask->uproc);
 	down_write(&uproc->rwsem);
 #if 0
 	// TODO: Is this a concern any more?
@@ -2211,6 +2231,7 @@ static u32 uprobe_report_quiesce(
 	done_quiescing = utask_quiesce(utask);
 // done:
 	up_write(&uproc->rwsem);
+	uprobe_put_process(utask->uproc);
 	return (done_quiescing ? UTRACE_RESUME : UTRACE_STOP);
 }
 
@@ -2295,9 +2316,15 @@ static u32 uprobe_report_exit(enum utrace_resume_action action,
 
 	rcu_read_lock();
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
-	uproc = utask->uproc;
+	BUG_ON(!utask);
+	/* Keep uproc intact until just before we return. */
+	uproc = uprobe_get_process(utask->uproc);
+
 	rcu_read_unlock();
-	uprobe_get_process(uproc);
+
+	if (!uproc)
+		/* uprobe_free_process() has probably clobbered utask->proc. */
+		return UTRACE_DETACH;
 
 	ppt = utask->active_probe;
 	if (ppt) {
@@ -2626,9 +2653,15 @@ static u32 uprobe_report_exec(
 
 	rcu_read_lock();
 	utask = (struct uprobe_task *)rcu_dereference(engine->data);
-	uproc = utask->uproc;
+	BUG_ON(!utask);
+	/* Keep uproc intact until just before we return. */
+	uproc = uprobe_get_process(utask->uproc);
+
 	rcu_read_unlock();
-	uprobe_get_process(uproc);
+
+	if (!uproc)
+		/* uprobe_free_process() has probably clobbered utask->proc. */
+		return UTRACE_DETACH;
 
 	/*
 	 * Only cleanup if we're the last thread.  If we aren't,


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]