This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC 3/3] Kprobes: userspace probes providing callback functions exection


This patch adds the feature of calling the registered callback
functions when probes is hit.
	Each userspace probe is uniquely identified by the
the combination of inode and offset, hence during registeration
the inode and offset combination is added to kprobes hash table.
Initially when breakpoint instruction is hit. the kprobes
hash table is searched for matching inode and offset combination
through get_uprobe() routine. The kprobes routine is identified
corresponding to matching inode and offset. The pre_handlers
is called in sequence if multiple probes are registered. Now single
stepping is achieved by mapping the page containing the breakpoint
instruction and replacing the breakpoint instruction by original
instruction. After the processor single steps on-line, the registered
callback functions post_handlers() are executed in sequence. After 
the post handlers are executed the breakpoint is insereted at that 
location and the page is unmapped. Single sepping in-line has a 
drawback, that the probes might get missed during single stepping in 
SMP environment. The next patch overcomes this limitation of probes 
misses in SMP environment.

Signed-of-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>


---

 linux-2.6.13-prasanna/arch/i386/kernel/kprobes.c |   59 ++++++++--
 linux-2.6.13-prasanna/include/linux/kprobes.h    |    1 
 linux-2.6.13-prasanna/kernel/kprobes.c           |  126 ++++++++++++++++++++++-
 3 files changed, 172 insertions(+), 14 deletions(-)

diff -puN kernel/kprobes.c~kprobes_userspace_probes-handlers kernel/kprobes.c
--- linux-2.6.13/kernel/kprobes.c~kprobes_userspace_probes-handlers	2005-09-14 11:49:28.780123648 +0530
+++ linux-2.6.13-prasanna/kernel/kprobes.c	2005-09-14 11:49:28.835115288 +0530
@@ -52,7 +52,8 @@ static struct list_head uprobe_module_li
 unsigned int kprobe_cpu = NR_CPUS;
 static DEFINE_SPINLOCK(kprobe_lock);
 static struct kprobe *curr_kprobe;
-
+extern struct uprobe *current_uprobe;
+static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 /*
  * kprobe->ainsn.insn points to the copy of the instruction to be
  * single-stepped. x86_64, POWER4 and above have no-exec support and
@@ -166,12 +167,124 @@ void unlock_kprobes(void)
 	spin_unlock(&kprobe_lock);
 }
 
+/*
+ * Walk through the kprobe hlist and get the matching userspace probe
+ * structure with the given inode and offset.
+ */
+static struct kprobe *get_uprobe_at(struct inode *inode, unsigned long offset)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct kprobe *p;
+
+	head = &kprobe_table[hash_long((unsigned long)inode * offset,
+				       KPROBE_HASH_BITS)];
+	hlist_for_each_entry(p, node, head, hlist) {
+		if (p->pre_handler == aggr_pre_handler)
+			return p;
+		else {
+			struct uprobe *user = container_of(p,
+							struct uprobe, kp);
+			if (user->inode == inode && user->offset == offset)
+				return &user->kp;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * This leaves with page, and kmap held. They will be released in
+ * put_kprobe_user. Not sure if holding page_table_lock is also
+ * needed, it is a very small, probably can't happen, race where
+ * vma could be gone by the time we complete the single-step.
+ */
+void get_user_page(struct uprobe *u)
+{
+	kprobe_opcode_t *addr;
+	u->page = find_get_page(u->inode->i_mapping,
+						u->offset >> PAGE_CACHE_SHIFT);
+	lock_page(u->page);
+	addr = (kprobe_opcode_t *) kmap_atomic(u->page, KM_USER0);
+	u->kp.addr = (kprobe_opcode_t *) ((unsigned long)addr +
+				 (unsigned long)(u->offset & ~PAGE_MASK));
+}
+
+void put_user_page(struct uprobe *u)
+{
+	kunmap_atomic(u->kp.addr, KM_USER0);
+	unlock_page(u->page);
+	page_cache_release(u->page);
+}
+
+static struct uprobe *get_aggr_uprobe(struct inode *inode,
+				      unsigned long offset, struct kprobe *p)
+{
+	struct kprobe *kp;
+	list_for_each_entry(kp, &p->list, list) {
+		if (p != kp) {
+			struct uprobe *user = container_of(kp,
+							struct uprobe, kp);
+			if (user->inode == inode && user->offset == offset)
+				return user;
+		}
+	}
+	return NULL;
+}
+/*
+ * We need to look up the inode and offset from the vma. We can't depend on
+ * the page->(mapping, index) as that would be incorrect if we ever swap this
+ * page out (possible for pages which are dirtied by GDB breakpoints etc)
+ *
+ * We acquire page_table_lock here to ensure that:
+ *	- current page doesn't go away from under us (kswapd)
+ *	- mm->mmap consistancy (vma are always added under this lock)
+ *
+ * We will never deadlock on page_table_lock, we always come here due to a
+ * probe in user space, no kernel code could have executed to take the
+ * page_table_lock.
+ */
+static struct kprobe *get_uprobe(void *addr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct inode *inode;
+	unsigned long offset;
+	struct uprobe *up;
+	struct kprobe *p;
+
+	spin_lock(&mm->page_table_lock);
+	vma = find_vma(mm, (unsigned long)addr);
+	offset =
+	    (unsigned long)addr - vma->vm_start + (vma->vm_pgoff << PAGE_SHIFT);
+	if (!vma->vm_file) {
+		spin_unlock(&mm->page_table_lock);
+		return NULL;
+	}
+	inode = vma->vm_file->f_dentry->d_inode;
+	spin_unlock(&mm->page_table_lock);
+
+	p = get_uprobe_at(inode, offset);
+	if (p->pre_handler == aggr_pre_handler)
+                up = get_aggr_uprobe(inode, offset, p);
+	else
+                up = container_of(p, struct uprobe, kp);
+	if (up) {
+		current_uprobe = up;
+		get_user_page(up);
+		up->vma = vma;
+	}
+	return p;
+}
+
 /* You have to be holding the kprobe_lock */
 struct kprobe *get_kprobe(void *addr)
 {
 	struct hlist_head *head;
 	struct hlist_node *node;
 
+	if (!kernel_text_address((unsigned long)addr))
+		return get_uprobe(addr);
+
 	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
 	hlist_for_each(node, head) {
 		struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
@@ -377,7 +490,12 @@ static int add_new_kprobe(struct kprobe 
 static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	copy_kprobe(p, ap);
-	ap->addr = p->addr;
+	if (!kernel_text_address((unsigned long)p->addr)) {
+		struct uprobe *up = container_of(p, struct uprobe, kp);
+		ap->addr = (kprobe_opcode_t *)((unsigned long)(up->inode) *
+						(unsigned long)(up->offset));
+	} else
+		ap->addr = p->addr;
 	ap->pre_handler = aggr_pre_handler;
 	ap->post_handler = aggr_post_handler;
 	ap->fault_handler = aggr_fault_handler;
@@ -755,8 +873,8 @@ int up_readpage(struct file *file, struc
 }
 
 /*
- * Walk the path and get the inode. Check for matching inode with the module
- * list.
+ * Walk the path and get the inode. Check for matching inode with the
+ * module list.
  */
 static struct uprobe_module *get_module_by_name(struct uprobe *p)
 {
diff -puN arch/i386/kernel/kprobes.c~kprobes_userspace_probes-handlers arch/i386/kernel/kprobes.c
--- linux-2.6.13/arch/i386/kernel/kprobes.c~kprobes_userspace_probes-handlers	2005-09-14 11:49:28.783123192 +0530
+++ linux-2.6.13-prasanna/arch/i386/kernel/kprobes.c	2005-09-14 11:50:14.328199288 +0530
@@ -43,6 +43,8 @@ static struct kprobe *kprobe_prev;
 static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
 static struct pt_regs jprobe_saved_regs;
 static long *jprobe_saved_esp;
+static kprobe_opcode_t *uprobe_addr;
+struct uprobe *current_uprobe;
 /* copy of the kernel stack at the probe fire time */
 static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
 void jprobe_return_end(void);
@@ -91,14 +93,14 @@ void arch_arm_uprobe(struct uprobe *up)
 {
 	*up->kp.addr = BREAKPOINT_INSTRUCTION;
 	flush_icache_user_range(up->vma, up->page,
-			(unsigned long) up->kp.addr, sizeof(kprobe_opcode_t));
+			(unsigned long) uprobe_addr, sizeof(kprobe_opcode_t));
 }
 
 void arch_disarm_uprobe(struct uprobe *up)
 {
 	*up->kp.addr = up->kp.opcode;
 	flush_icache_user_range(up->vma, up->page,
-			(unsigned long) up->kp.addr, sizeof(kprobe_opcode_t));
+			(unsigned long) uprobe_addr, sizeof(kprobe_opcode_t));
 }
 
 void arch_remove_kprobe(struct kprobe *p)
@@ -137,8 +139,14 @@ static inline void prepare_singlestep(st
 	/*single step inline if the instruction is an int3*/
 	if (p->opcode == BREAKPOINT_INSTRUCTION)
 		regs->eip = (unsigned long)p->addr;
-	else
-		regs->eip = (unsigned long)&p->ainsn.insn;
+	else {
+		if (!kernel_text_address((unsigned long)p->addr)) {
+			arch_disarm_uprobe(current_uprobe);
+			regs->eip = (unsigned long)uprobe_addr;
+		} else
+			regs->eip = (unsigned long)&p->ainsn.insn;
+
+	}
 }
 
 void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
@@ -169,21 +177,32 @@ static int kprobe_handler(struct pt_regs
 	struct kprobe *p;
 	int ret = 0;
 	kprobe_opcode_t *addr = NULL;
+	unsigned seg = regs->xcs & 0xffff;
 	unsigned long *lp;
 
-	/* We're in an interrupt, but this is clear and BUG()-safe. */
-	preempt_disable();
 	/* Check if the application is using LDT entry for its code segment and
 	 * calculate the address by reading the base address from the LDT entry.
 	 */
-	if ((regs->xcs & 4) && (current->mm)) {
+	/* This code is already present in 2.6.13-mm2 */
+	if (regs->eflags & VM_MASK) {
+		addr = (kprobe_opcode_t *)(((seg << 4) + regs->eip -
+			sizeof(kprobe_opcode_t)) & 0xffff);
+	} else if ((regs->xcs & 4) && (current->mm)) {
+		local_irq_enable();
+		down(&current->mm->context.sem);
 		lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8)
 					+ (char *) current->mm->context.ldt);
 		addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip -
 						sizeof(kprobe_opcode_t));
-	} else {
+		up(&current->mm->context.sem);
+		local_irq_disable();
+	} else
 		addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t));
-	}
+
+	/* We're in an interrupt, but this is clear and BUG()-safe. */
+	preempt_disable();
+	uprobe_addr = addr;
+
 	/* Check we're not actually recursing */
 	if (kprobe_running()) {
 		/* We *are* holding lock here, so this is safe.
@@ -254,6 +273,8 @@ ss_probe:
 	return 1;
 
 no_kprobe:
+	if (!kernel_text_address((unsigned long) p->addr))
+		put_user_page(current_uprobe);
 	preempt_enable_no_resched();
 	return ret;
 }
@@ -401,6 +422,17 @@ static void resume_execution(struct kpro
 	}
 }
 
+static void resume_execution_user(struct uprobe *p, struct pt_regs *regs)
+{
+	unsigned long delta;
+	/*TODO : need to fixup special instructions as done with kernel probes */
+	delta = (unsigned long) regs->eip - (unsigned long)uprobe_addr;
+	regs->eip = (unsigned long) (uprobe_addr + delta);
+	arch_arm_uprobe(p);
+	p->kp.addr = uprobe_addr;
+	regs->eflags &= ~TF_MASK;
+}
+
 /*
  * Interrupts are disabled on entry as trap1 is an interrupt gate and they
  * remain disabled thoroughout this function.  And we hold kprobe lock.
@@ -415,7 +447,10 @@ static inline int post_kprobe_handler(st
 		current_kprobe->post_handler(current_kprobe, regs, 0);
 	}
 
-	resume_execution(current_kprobe, regs);
+	if (!kernel_text_address((unsigned long)current_kprobe->addr))
+		resume_execution_user(current_uprobe, regs);
+	else
+		resume_execution(current_kprobe, regs);
 	regs->eflags |= kprobe_saved_eflags;
 
 	/*Restore back the original saved kprobes variables and continue. */
@@ -425,6 +460,8 @@ static inline int post_kprobe_handler(st
 	}
 	unlock_kprobes();
 out:
+	if (!kernel_text_address((unsigned long)current_kprobe->addr))
+		put_user_page(current_uprobe);
 	preempt_enable_no_resched();
 
 	/*
@@ -449,6 +486,8 @@ static inline int kprobe_fault_handler(s
 		resume_execution(current_kprobe, regs);
 		regs->eflags |= kprobe_old_eflags;
 
+		if (!kernel_text_address((unsigned long)current_kprobe->addr))
+			put_user_page(current_uprobe);
 		unlock_kprobes();
 		preempt_enable_no_resched();
 	}
diff -puN include/linux/kprobes.h~kprobes_userspace_probes-handlers include/linux/kprobes.h
--- linux-2.6.13/include/linux/kprobes.h~kprobes_userspace_probes-handlers	2005-09-14 11:49:28.828116352 +0530
+++ linux-2.6.13-prasanna/include/linux/kprobes.h	2005-09-14 11:49:28.838114832 +0530
@@ -204,6 +204,7 @@ struct kretprobe_instance *get_free_rp_i
 void add_rp_inst(struct kretprobe_instance *ri);
 void kprobe_flush_task(struct task_struct *tk);
 void recycle_rp_inst(struct kretprobe_instance *ri);
+void put_user_page(struct uprobe *u);
 #else /* CONFIG_KPROBES */
 static inline int kprobe_running(void)
 {

_
-- 

Prasanna S Panchamukhi
Linux Technology Center
India Software Labs, IBM Bangalore
Ph: 91-80-25044636
<prasanna@in.ibm.com>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]