This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [Patch 2/5][Djprobe]Djprobe Coexist with Kprobes
- From: Keshavamurthy Anil S <anil dot s dot keshavamurthy at intel dot com>
- To: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- Cc: systemtap at sources dot redhat dot com, Satoshi Oshima <soshima at redhat dot com>, Hideo Aoki <haoki at redhat dot com>, sugita at sdl dot hitachi dot co dot jp
- Date: Mon, 3 Oct 2005 16:29:17 -0700
- Subject: Re: [Patch 2/5][Djprobe]Djprobe Coexist with Kprobes
- References: <433BE533.9080501@sdl.hitachi.co.jp>
- Reply-to: Keshavamurthy Anil S <anil dot s dot keshavamurthy at intel dot com>
On Thu, Sep 29, 2005 at 05:59:31AM -0700, Masami Hiramatsu wrote:
I see your patch has lots of line wrap and hard to review.
With great difficulty, I have reviewed your code,
please see my comments below.
Main highlights:
1) You code does not work if a new CPU comes online or existing cpu goes offline
2) suggestion: Create djprobe.c and djprobe.h files instead of pushing them into existing files
3) Use good hashing algorithm to search djprobe instances.
4)Handle the case where ARCH does not support djprobes transparently.
5) optimize djprobe data struct
6) Not convinced how you are atomically updating 5 Bytes (jmp inst ) guaranteeing that
none of the other processor are executing near those address.
-Anil Keshavamurthy
Open Source Technology Center
Intel Corp.
E-mail: anil.s.keshavamurthy@intel.com
>
> Hi,
>
> This patch is an architecture common code of
> djprobe.
>
> --
> Masami HIRAMATSU
> 2nd Research Dept.
> Hitachi, Ltd., Systems Development Laboratory
> E-mail: hiramatu@sdl.hitachi.co.jp
>
> include/linux/kprobes.h | 56 ++++++++++
> kernel/kprobes.c | 247
> ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 303 insertions(+)
>
> diff -Narup linux-2.6.13-mm1.djp.1/include/linux/kprobes.h
> linux-2.6.13-mm1.djp.2/include/linux/kprobes.h
> --- linux-2.6.13-mm1.djp.1/include/linux/kprobes.h 2005-09-05
> 19:11:21.000000000 +0900
> +++ linux-2.6.13-mm1.djp.2/include/linux/kprobes.h 2005-09-12
> 14:05:36.000000000 +0900
> @@ -28,6 +28,8 @@
> * 2005-May Hien Nguyen <hien@us.ibm.com> and Jim Keniston
> * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
> * <prasanna@in.ibm.com> added function-return probes.
> + * 2005-Aug Masami HIRAMATSU <hiramatu@sdl.hitachi.co.jp> added
> direct
> + * jump probe (djprobe) interface to reduce overhead.
> */
I think for clarity sake, it would be better to create new files called
include/linux/djpropbes.h and kernel/djprobes.c and move all the arch
independent djprobe functionality into those files.
> #include <linux/config.h>
> #include <linux/list.h>
> @@ -141,6 +143,41 @@ struct kretprobe_instance {
> struct task_struct *task;
> };
>
> +/* djprobe's instance (internal use)*/
> +struct djprobe_instance {
> + struct djprobe *djp;
> +#ifdef ARCH_SUPPORTS_DJPROBES
> + struct arch_djprobe_stub stub;
> +#endif /* ARCH_SUPPORTS_DJPROBES */
Move arch_djprobe_stub to the end of this data structure.
> + struct kprobe kp;
> + struct list_head list; /* list of djprobe_instances */
> + cpumask_t checked_cpus;
> +};
> +#define DJPI_EMPTY(djpi) (djpi->djp==NULL)
> +#define DJPI_CHECKED(djpi) (cpus_equal(djpi->checked_cpus,
> cpu_online_map))
Humm...Your code will not work for CPU hotplug case as the
cpu_online_map will change based on the online cpus. Also I think,
suspend/resume on i386 will depend on cpu hotplug as they will
be offlining all the cpus except BSP before suspending.
> +
> +struct djprobe;
> +typedef void (*djprobe_handler_t)(struct djprobe *, struct pt_regs
> *);
> +/*
> + * Direct Jump probe interface structure
> + */
> +struct djprobe {
> +#ifndef ARCH_SUPPORTS_DJPROBES
> + struct kprobe kp;
> +#endif /* ARCH_SUPPORTS_DJPROBES */
Try not to have ARCH_SUPPORTS_DJPROBES in the exported data structure,
as this will break the binary compatibility. Handle the case
where ARCH does not support DJPROBES transparently.
> + /* location of the probe point */
> + void * addr;
> +
> + /* sum of length of the replacing codes */
> + int size;
> +
> + /* probing handler (pre-executed) */
> + djprobe_handler_t handler;
> +
> + /* pointer for instance */
> + struct djprobe_instance * inst;
> +};
> +
In the above djprobe struct, I see lots of repetition of fields, please
optimize djprobe data structure as some of the fields defined for this structure
are already defined in kprobes struct.
> #ifdef CONFIG_KPROBES
> /* Locks kprobe: irq must be disabled */
> void lock_kprobes(void);
> @@ -182,6 +219,18 @@ struct kretprobe_instance *get_free_rp_i
> void add_rp_inst(struct kretprobe_instance *ri);
> void kprobe_flush_task(struct task_struct *tk);
> void recycle_rp_inst(struct kretprobe_instance *ri);
> +
> +#ifdef ARCH_SUPPORTS_DJPROBES
> +extern int arch_prepare_djprobe_instance(struct djprobe_instance
> *djpi,
> + unsigned long size);
> +extern int djprobe_bypass_handler(struct kprobe * kp, struct pt_regs
> * regs);
> +extern void arch_install_djprobe_instance(struct djprobe_instance
> *djpi);
> +extern void arch_uninstall_djprobe_instance(struct djprobe_instance
> *djpi);
> +#endif /* ARCH_SUPPORTS_DJPROBES */
> +
> +int register_djprobe(struct djprobe *p);
> +void unregister_djprobe(struct djprobe *p);
> +
> #else /* CONFIG_KPROBES */
> static inline int kprobe_running(void)
> {
> @@ -214,5 +263,12 @@ static inline void unregister_kretprobe(
> static inline void kprobe_flush_task(struct task_struct *tk)
> {
> }
> +static inline int register_djprobe(struct djprobe *p)
> +{
> + return -ENOSYS;
> +}
> +static inline void unregister_djprobe(struct djprobe *p)
> +{
> +}
> #endif /* CONFIG_KPROBES */
> #endif /* _LINUX_KPROBES_H */
> diff -Narup linux-2.6.13-mm1.djp.1/kernel/kprobes.c
> linux-2.6.13-mm1.djp.2/kernel/kprobes.c
> --- linux-2.6.13-mm1.djp.1/kernel/kprobes.c 2005-09-05
> 19:11:21.000000000 +0900
> +++ linux-2.6.13-mm1.djp.2/kernel/kprobes.c 2005-09-28
> 20:10:26.000000000 +0900
> @@ -30,6 +30,8 @@
> * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
> * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
> * <prasanna@in.ibm.com> added function-return probes.
> + * 2005-Aug Masami HIRAMATSU <hiramatu@sdl.hitachi.co.jp> added
> direct
> + * jump probe (djprobe) interface to reduce overhead.
> */
> #include <linux/kprobes.h>
> #include <linux/spinlock.h>
> @@ -75,6 +77,9 @@ struct kprobe_insn_page_list {
> static struct kprobe_insn_page_list kprobe_insn_pages = {
> HLIST_HEAD_INIT, MAX_INSN_SIZE};
>
> +static struct djprobe_instance *
> + __kprobes get_djprobe_instance(void *addr, int size);
> +
> /**
> * __get_insn_slot() - Find a slot on an executable page for an
> instruction.
> * We allocate an executable page if there's no room on existing
> ones.
> @@ -474,6 +479,11 @@ int __kprobes register_kprobe(struct kpr
>
> if ((ret = in_kprobes_functions((unsigned long) p->addr)) !=
> 0)
> return ret;
> +#ifdef ARCH_SUPPORTS_DJPROBES
> + if (p->pre_handler != djprobe_bypass_handler &&
> + get_djprobe_instance(p->addr, 1) != NULL )
> + return -EEXIST;
> +#endif
> if ((ret = arch_prepare_kprobe(p)) != 0)
> goto rm_kprobe;
>
> @@ -597,6 +607,238 @@ void __kprobes unregister_kretprobe(stru
> spin_unlock_irqrestore(&kprobe_lock, flags);
> }
>
> +#ifdef ARCH_SUPPORTS_DJPROBES
> +/*
> + * The djprobe do not refer instances list when probe function
> called.
> + * This list is operated on registering and unregistering djprobe.
> + */
> +static LIST_HEAD(djprobe_list);
> +static DEFINE_SPINLOCK(djprobe_lock);
> +/* Instruction pages for djprobe's stub code */
> +static struct kprobe_insn_page_list djprobe_insn_pages = {
> + HLIST_HEAD_INIT, 0};
> +
> +static inline void __free_djprobe_instance(struct djprobe_instance
> *djpi)
> +{
> + list_del(&djpi->list);
> + if (djpi->kp.addr) unregister_kprobe(&(djpi->kp));
> + __free_insn_slot(&djprobe_insn_pages, djpi->stub.insn);
> + kfree(djpi);
> +}
> +
> +static inline struct djprobe_instance *
> + __get_djprobe_instance(void *addr, int size)
> +{
> + struct djprobe_instance *djpi;
> + list_for_each_entry(djpi, &djprobe_list, list) {
Use good hashing algorithms here instead of plain linked lists.
> + if (((long)addr < (long)djpi->kp.addr +
> DJPI_ARCH_SIZE(djpi))
> + && ((long)djpi->kp.addr < (long)addr + size)) {
> + return djpi;
> + }
> + }
> + return NULL;
> +}
> +
> +static struct djprobe_instance *
> + __kprobes get_djprobe_instance(void *addr, int size)
> +{
> + struct djprobe_instance *djpi;
> + spin_lock(&djprobe_lock);
> + djpi = __get_djprobe_instance(addr, size);
> + spin_unlock(&djprobe_lock);
> + return djpi;
> +}
> +
> +#ifdef CONFIG_SMP
> +static void __kprobes work_check_djprobe_instances(void *data);
> +static DEFINE_PER_CPU(struct work_struct, djprobe_check_works);
> +
> +static inline void init_djprobe(void)
> +{
> + int cpu;
> + struct work_struct *wk;
> + djprobe_insn_pages.insn_size = ARCH_STUB_SIZE;
> + for_each_cpu(cpu) { /* we should initialize all percpu data */
> + wk = &per_cpu(djprobe_check_works, cpu);
> + INIT_WORK(wk, work_check_djprobe_instances, NULL);
> + }
> +}
> +
> +static void __kprobes work_check_djprobe_instances(void *data)
> +{
I guess this function gets called on each cpu :-)
> + struct list_head *pos;
> + struct djprobe_instance *djpi;
> +
> + spin_lock(&djprobe_lock);
With the above lock held, you are serialzing this function, which indeed gets scheduled
on multiple cpus.
> + list_for_each(pos, &djprobe_list) {
Use good hashing algorithm here.
> + djpi = container_of(pos, struct djprobe_instance,
> list);
> + if (DJPI_CHECKED(djpi))
DJPI_CHECKED() will fail if new cpu has come online or some existing cpu has gone offline.
Djprobe must work in connjuction with CONFIG_CPU_HOTPUG
> + continue; /* already checked */
> + cpu_set(smp_processor_id(), djpi->checked_cpus);
> + if (DJPI_CHECKED(djpi)) {
> + if (DJPI_EMPTY(djpi)) {
> + pos = pos->prev; /* pos is going to be
> freed */
> + __free_djprobe_instance(djpi);
> + } else {
> + arch_install_djprobe_instance(djpi);
Humm..You are simply making a DJPI_CHECKED(djpi), which checks and tells that
all the cpus have seen djpi and has cpu_set(smp_processor_id(), djpi_checked_cpus).
But this does not guarantee that the other cpus are not executing the
instruction where you are modifying 5 bytes by calling arch_install_djprobe_instances(djpi).
You are trying to insert "jmp" opcode jmp address, i,e 5 bytes
in arch_install_djprobe_instance() at kp.addr with out stopping all the other
cpus from executing the instructions near that address. I am not convinced, please
clarify how are you making sure you are updating 5 bytes automically.
> + }
> + }
> + }
> + spin_unlock(&djprobe_lock);
> +}
> +
> +static inline void schedule_check_djprobe_instances(void)
> +{
> + int cpu;
> + struct work_struct *wk;
> + cpus_clear(djpi->checked_cpus);
> + for_each_online_cpu(cpu) {
How do you account for new online CPUs?
> + wk = &per_cpu(djprobe_check_works, cpu);
> + if (list_empty(&wk->entry)) /* not scheduled yet */
> + schedule_delayed_work_on(cpu, wk, 0);
> + }
> +}
> +
> +#define __install_djprobe_instance(djpi) \
> + schedule_check_djprobe_instance(djpi)
> +
> +#define __uninstall_djprobe_instance(djpi) \
> + schedule_check_djprobe_instance(djpi)
> +
> +#else
> +#define init_djprobe() \
> + djprobe_insn_pages.insn_size = ARCH_STUB_SIZE;
> +
> +#define __install_djprobe_instance(djpi) \
> + arch_install_djprobe_instance(djpi)
> +
> +#define __uninstall_djprobe_instance(djpi) \
> + __free_djprobe_instance(djpi)
> +
> +#endif /*CONFIG_SMP*/
> +
> +/* Use kprobe to check safety and install */
> +static int __kprobes install_djprobe_instance(struct djprobe_instance
> *djpi)
> +{
> + int ret;
> + ret = register_kprobe(&(djpi->kp));
> + if (ret == 0) {
> + __install_djprobe_instance(djpi);
> + }
> + return ret;
> +}
> +
> +/* Use kprobe to check safety and release */
> +static void __kprobes uninstall_djprobe_instance(struct
> djprobe_instance *djpi)
> +{
> + arch_uninstall_djprobe_instance(djpi);
> + __uninstall_djprobe_instance(djpi);
> +}
> +
> +int __kprobes register_djprobe(struct djprobe * djp)
> +{
> + struct djprobe_instance *djpi;
> + struct kprobe *kp;
> + int ret = 0, i;
> +
> + if (djp == NULL || djp->addr == NULL ||
> + djp->size > ARCH_STUB_INSN_MAX ||
> + djp->size < ARCH_STUB_INSN_MIN ||
> + djp->inst != NULL)
> + return -EINVAL;
> +
> + if ((ret = in_kprobes_functions((unsigned long) djp->addr)) !=
> 0)
> + return ret;
> +
> + spin_lock(&djprobe_lock);
> + /* check confliction with other djprobes */
> + djpi = __get_djprobe_instance(djp->addr, djp->size);
> + if (djpi) {
> + if (djpi->kp.addr == djp->addr && DJPI_EMPTY(djpi)) {
> + djp->inst = djpi;
> + djpi->djp = djp; /*TODO: use list*/
> + goto out;
> + } else {
> + ret = -EEXIST; /* a djprobe were inserted */
> + goto out;
> + }
> + }
> + /* check confliction with kprobes */
> + for ( i=0; i < djp->size; i++) {
> + kp = get_kprobe((void*)((long)djp->addr+i));
> + if (kp != NULL) {
> + ret = -EEXIST; /* a kprobes were inserted */
> + goto out;
> + }
> + }
This function is way too long.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> + /* make a new instance */
> + djpi = kmalloc(sizeof(struct djprobe_instance),GFP_KERNEL);
> + if (djpi == NULL) {
> + ret = -ENOMEM; /* memory allocation error */
> + goto out;
> + }
> + memset(djpi, 0, sizeof(struct djprobe_instance)); /* for
> kprobe */
> + /* attach */
> + djp->inst = djpi;
> + djpi->djp = djp; /*TODO: use list*/
> + djpi->kp.addr = djp->addr;
> + INIT_LIST_HEAD(&djpi->list);
> + list_add(&djpi->list, &djprobe_list);
+++++++ Move the above code into djprobe_make_new_instance() function
> +
> + /* prepare stub */
> + djpi->stub.insn = __get_insn_slot(&djprobe_insn_pages);
> + if (djpi->stub.insn == NULL) {
> + kfree(djpi);
You are freeing djpi without removing from the list which you have inserted above.
> + ret = -ENOMEM; /* memory allocation error */
> + goto out;
> + }
> + djpi->kp.pre_handler = djprobe_bypass_handler;
> + arch_prepare_djprobe_instance(djpi, djp->size); /*TODO :
> remove size*/
> +
> + ret = install_djprobe_instance(djpi);
> + if (ret < 0) { /* failed to install */
> + djp->inst = NULL;
> + djpi->kp.addr = NULL;
> + __free_djprobe_instance(djpi);
> + }
> +out:
> + spin_unlock(&djprobe_lock);
> + return ret;
> +}
> +
> +void __kprobes unregister_djprobe(struct djprobe * djp)
> +{
> + struct djprobe_instance *djpi;
> + if (djp == NULL || djp->inst == NULL)
> + return ;
> +
> + djpi = djp->inst;
> + spin_lock(&djprobe_lock);
> + djp->inst = NULL;
> + djpi->djp = NULL; /*TODO: use list*/
> + if (DJPI_EMPTY(djpi)) {
djpi->djp is set to NULL and why are you checking DJPI_EMPTY() again?
> + uninstall_djprobe_instance(djpi);
> + }
> + spin_unlock(&djprobe_lock);
> +}
> +
> +#else /* ARCH_SUPPORTS_DJPROBES */
> +int __kprobes register_djprobe(struct djprobe *p)
> +{
> + if (p!=NULL) {
> + p->kp.addr = p->addr;
> + p->kp.pre_handler = (kprobe_pre_handler_t)p->handler;
> + return register_kprobe(&p->kp);
> + }
> + return -EINVAL;
> +}
> +
> +void __kprobes unregister_djprobe(struct djprobe *p)
> +{
> + unregister_kprobe(&p->kp);
> +}
> +#endif /* ARCH_SUPPORTS_DJPROBES */
> +
> static int __init init_kprobes(void)
> {
> int i, err = 0;
> @@ -608,6 +850,9 @@ static int __init init_kprobes(void)
> INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
> }
>
> +#if defined(ARCH_SUPPORTS_DJPROBES)
> + init_djprobe();
> +#endif
> err = arch_init_kprobes();
> if (!err)
> err = register_die_notifier(&kprobe_exceptions_nb);
> @@ -624,4 +869,6 @@ EXPORT_SYMBOL_GPL(unregister_jprobe);
> EXPORT_SYMBOL_GPL(jprobe_return);
> EXPORT_SYMBOL_GPL(register_kretprobe);
> EXPORT_SYMBOL_GPL(unregister_kretprobe);
> +EXPORT_SYMBOL_GPL(register_djprobe);
> +EXPORT_SYMBOL_GPL(unregister_djprobe);