This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch 2/5][Djprobe]Djprobe Coexist with Kprobes


On Thu, Sep 29, 2005 at 05:59:31AM -0700, Masami Hiramatsu wrote:

	I see your patch has lots of line wrap and hard to review.
With great difficulty, I have reviewed your code, 
please see my comments below.

Main highlights:
1) You code does not work if a new CPU comes online or existing cpu goes offline
2) suggestion: Create djprobe.c and djprobe.h files instead of pushing them into existing files
3) Use good hashing algorithm to search djprobe instances.
4)Handle the case where ARCH does not support djprobes transparently.
5) optimize djprobe data struct
6) Not convinced how you are atomically updating 5 Bytes (jmp inst ) guaranteeing that
none of the other processor are executing near those address.


-Anil Keshavamurthy
Open Source Technology Center
Intel Corp.
E-mail: anil.s.keshavamurthy@intel.com


> 
>    Hi,
> 
>    This patch is an architecture common code of
>    djprobe.
> 
>    --
>    Masami HIRAMATSU
>    2nd Research Dept.
>    Hitachi, Ltd., Systems Development Laboratory
>    E-mail: hiramatu@sdl.hitachi.co.jp
> 
>     include/linux/kprobes.h |   56 ++++++++++
>     kernel/kprobes.c        |  247
>    ++++++++++++++++++++++++++++++++++++++++++++++++
>     2 files changed, 303 insertions(+)
> 
>    diff       -Narup       linux-2.6.13-mm1.djp.1/include/linux/kprobes.h
>    linux-2.6.13-mm1.djp.2/include/linux/kprobes.h
>    ---   linux-2.6.13-mm1.djp.1/include/linux/kprobes.h        2005-09-05
>    19:11:21.000000000 +0900
>    +++   linux-2.6.13-mm1.djp.2/include/linux/kprobes.h        2005-09-12
>    14:05:36.000000000 +0900
>    @@ -28,6 +28,8 @@
>      * 2005-May    Hien Nguyen <hien@us.ibm.com> and Jim Keniston
>      *             <jkenisto@us.ibm.com>  and Prasanna S Panchamukhi
>      *             <prasanna@in.ibm.com> added function-return probes.
>    +  *  2005-Aug     Masami HIRAMATSU <hiramatu@sdl.hitachi.co.jp> added
>    direct
>    + *             jump probe (djprobe) interface to reduce overhead.
>      */

I think for clarity sake, it would be better to create new files called 
include/linux/djpropbes.h and kernel/djprobes.c and move all the arch 
independent djprobe functionality into those files.


>     #include <linux/config.h>
>     #include <linux/list.h>
>    @@ -141,6 +143,41 @@ struct kretprobe_instance {
>            struct task_struct *task;
>     };
> 
>    +/* djprobe's instance (internal use)*/
>    +struct djprobe_instance {
>    +       struct djprobe *djp;
>    +#ifdef ARCH_SUPPORTS_DJPROBES
>    +       struct arch_djprobe_stub stub;
>    +#endif /* ARCH_SUPPORTS_DJPROBES */
Move arch_djprobe_stub to the end of this data structure.


>    +       struct kprobe kp;
>    +       struct list_head list; /* list of djprobe_instances */
>    +       cpumask_t checked_cpus;
>    +};
>    +#define DJPI_EMPTY(djpi)  (djpi->djp==NULL)
>    +#define       DJPI_CHECKED(djpi)      (cpus_equal(djpi->checked_cpus,
>    cpu_online_map))

	Humm...Your code will not work for CPU hotplug case as the
	cpu_online_map will change based on the online cpus. Also I think,
	suspend/resume on i386 will depend on cpu hotplug as they will
	be offlining all the cpus except BSP before suspending.
	
>    +
>    +struct djprobe;
>    +typedef  void  (*djprobe_handler_t)(struct  djprobe *, struct pt_regs
>    *);
>    +/*
>    + * Direct Jump probe interface structure
>    + */
>    +struct djprobe {
>    +#ifndef ARCH_SUPPORTS_DJPROBES
>    +       struct kprobe kp;
>    +#endif /* ARCH_SUPPORTS_DJPROBES */
Try not to have ARCH_SUPPORTS_DJPROBES in the exported data structure,
as this will break the binary compatibility. Handle the case
where ARCH does not support DJPROBES transparently.


>    +       /* location of the probe point */
>    +       void * addr;
>    +
>    +       /* sum of length of the replacing codes */
>    +       int size;
>    +
>    +       /* probing handler (pre-executed) */
>    +       djprobe_handler_t handler;
>    +
>    +       /* pointer for instance */
>    +       struct djprobe_instance * inst;
>    +};
>    +
In the above djprobe struct, I see lots of repetition of fields, please
optimize djprobe data structure as some of the fields defined for this structure
are already defined in kprobes struct.

>     #ifdef CONFIG_KPROBES
>     /* Locks kprobe: irq must be disabled */
>     void lock_kprobes(void);
>    @@ -182,6 +219,18 @@ struct kretprobe_instance *get_free_rp_i
>     void add_rp_inst(struct kretprobe_instance *ri);
>     void kprobe_flush_task(struct task_struct *tk);
>     void recycle_rp_inst(struct kretprobe_instance *ri);
>    +
>    +#ifdef ARCH_SUPPORTS_DJPROBES
>    +extern   int   arch_prepare_djprobe_instance(struct  djprobe_instance
>    *djpi,
>    +                                        unsigned long size);
>    +extern  int djprobe_bypass_handler(struct kprobe * kp, struct pt_regs
>    * regs);
>    +extern   void  arch_install_djprobe_instance(struct  djprobe_instance
>    *djpi);
>    +extern  void  arch_uninstall_djprobe_instance(struct djprobe_instance
>    *djpi);
>    +#endif /* ARCH_SUPPORTS_DJPROBES */
>    +
>    +int register_djprobe(struct djprobe *p);
>    +void unregister_djprobe(struct djprobe *p);
>    +
>     #else /* CONFIG_KPROBES */
>     static inline int kprobe_running(void)
>     {
>    @@ -214,5 +263,12 @@ static inline void unregister_kretprobe(
>     static inline void kprobe_flush_task(struct task_struct *tk)
>     {
>     }
>    +static inline int register_djprobe(struct djprobe *p)
>    +{
>    +       return -ENOSYS;
>    +}
>    +static inline void unregister_djprobe(struct djprobe *p)
>    +{
>    +}
>     #endif                         /* CONFIG_KPROBES */
>     #endif                         /* _LINUX_KPROBES_H */
>    diff           -Narup          linux-2.6.13-mm1.djp.1/kernel/kprobes.c
>    linux-2.6.13-mm1.djp.2/kernel/kprobes.c
>    ---       linux-2.6.13-mm1.djp.1/kernel/kprobes.c           2005-09-05
>    19:11:21.000000000 +0900
>    +++       linux-2.6.13-mm1.djp.2/kernel/kprobes.c           2005-09-28
>    20:10:26.000000000 +0900
>    @@ -30,6 +30,8 @@
>      * 2005-May    Hien Nguyen <hien@us.ibm.com>, Jim Keniston
>      *             <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
>      *             <prasanna@in.ibm.com> added function-return probes.
>    +  *  2005-Aug     Masami HIRAMATSU <hiramatu@sdl.hitachi.co.jp> added
>    direct
>    + *             jump probe (djprobe) interface to reduce overhead.
>      */
>     #include <linux/kprobes.h>
>     #include <linux/spinlock.h>
>    @@ -75,6 +77,9 @@ struct kprobe_insn_page_list {
>     static struct kprobe_insn_page_list kprobe_insn_pages = {
>            HLIST_HEAD_INIT, MAX_INSN_SIZE};
> 
>    +static struct djprobe_instance *
>    +       __kprobes get_djprobe_instance(void *addr, int size);
>    +
>     /**
>       *  __get_insn_slot()  -  Find  a  slot on an executable page for an
>    instruction.
>       *  We  allocate  an  executable page if there's no room on existing
>    ones.
>    @@ -474,6 +479,11 @@ int __kprobes register_kprobe(struct kpr
> 
>             if  ((ret = in_kprobes_functions((unsigned long) p->addr)) !=
>    0)
>                    return ret;
>    +#ifdef ARCH_SUPPORTS_DJPROBES
>    +       if (p->pre_handler != djprobe_bypass_handler &&
>    +           get_djprobe_instance(p->addr, 1) != NULL )
>    +               return -EEXIST;
>    +#endif
>            if ((ret = arch_prepare_kprobe(p)) != 0)
>                    goto rm_kprobe;
> 
>    @@ -597,6 +607,238 @@ void __kprobes unregister_kretprobe(stru
>            spin_unlock_irqrestore(&kprobe_lock, flags);
>     }
> 
>    +#ifdef ARCH_SUPPORTS_DJPROBES
>    +/*
>    +  *  The  djprobe  do  not  refer  instances list when probe function
>    called.
>    + * This list is operated on registering and unregistering djprobe.
>    + */
>    +static LIST_HEAD(djprobe_list);
>    +static DEFINE_SPINLOCK(djprobe_lock);
>    +/* Instruction pages for djprobe's stub code */
>    +static struct kprobe_insn_page_list djprobe_insn_pages = {
>    +       HLIST_HEAD_INIT, 0};
>    +
>    +static  inline  void  __free_djprobe_instance(struct djprobe_instance
>    *djpi)
>    +{
>    +       list_del(&djpi->list);
>    +       if (djpi->kp.addr) unregister_kprobe(&(djpi->kp));
>    +       __free_insn_slot(&djprobe_insn_pages, djpi->stub.insn);
>    +       kfree(djpi);
>    +}
>    +
>    +static inline struct djprobe_instance *
>    +       __get_djprobe_instance(void *addr, int size)
>    +{
>    +       struct djprobe_instance *djpi;
>    +       list_for_each_entry(djpi, &djprobe_list, list) {

Use good hashing algorithms here instead of plain linked lists.

>    +                     if    (((long)addr   <   (long)djpi->kp.addr   +
>    DJPI_ARCH_SIZE(djpi))
>    +                   && ((long)djpi->kp.addr < (long)addr + size)) {
>    +                       return djpi;
>    +               }
>    +       }
>    +       return NULL;
>    +}
>    +
>    +static struct djprobe_instance *
>    +       __kprobes get_djprobe_instance(void *addr, int size)
>    +{
>    +       struct djprobe_instance *djpi;
>    +       spin_lock(&djprobe_lock);
>    +       djpi = __get_djprobe_instance(addr, size);
>    +       spin_unlock(&djprobe_lock);
>    +       return djpi;
>    +}
>    +
>    +#ifdef CONFIG_SMP
>    +static void __kprobes work_check_djprobe_instances(void *data);
>    +static DEFINE_PER_CPU(struct work_struct, djprobe_check_works);
>    +
>    +static inline void init_djprobe(void)
>    +{
>    +       int cpu;
>    +       struct work_struct *wk;
>    +       djprobe_insn_pages.insn_size = ARCH_STUB_SIZE;
>    +       for_each_cpu(cpu) { /* we should initialize all percpu data */
>    +               wk = &per_cpu(djprobe_check_works, cpu);
>    +               INIT_WORK(wk, work_check_djprobe_instances, NULL);
>    +       }
>    +}
>    +

>    +static void __kprobes work_check_djprobe_instances(void *data)
>    +{

I guess this function gets called on each cpu :-)

>    +       struct list_head *pos;
>    +       struct djprobe_instance *djpi;
>    +
>    +       spin_lock(&djprobe_lock);
With the above lock held, you are serialzing this function, which indeed gets scheduled
on multiple cpus.

>    +       list_for_each(pos, &djprobe_list) {
Use good hashing algorithm here.

>    +                 djpi  =  container_of(pos,  struct djprobe_instance,
>    list);
>    +               if (DJPI_CHECKED(djpi))

DJPI_CHECKED() will fail if new cpu has come online or some existing cpu has gone offline.
Djprobe must work in connjuction with CONFIG_CPU_HOTPUG

>    +                       continue; /* already checked */
>    +               cpu_set(smp_processor_id(), djpi->checked_cpus);
>    +               if (DJPI_CHECKED(djpi)) {
>    +                       if (DJPI_EMPTY(djpi)) {
>    +                               pos = pos->prev; /* pos is going to be
>    freed */
>    +                               __free_djprobe_instance(djpi);
>    +                       } else {
>    +                               arch_install_djprobe_instance(djpi);
Humm..You are simply making a DJPI_CHECKED(djpi), which checks and tells that
all the cpus have seen djpi and has cpu_set(smp_processor_id(), djpi_checked_cpus).
But this does not guarantee that the other cpus are not executing the
instruction where you are modifying 5 bytes by calling arch_install_djprobe_instances(djpi).

You are trying to insert "jmp" opcode jmp address, i,e 5 bytes
in arch_install_djprobe_instance() at kp.addr with out stopping all the other
cpus from executing the instructions near that address. I am not convinced, please
clarify how are you making sure you are updating 5 bytes automically.

 
>    +                       }
>    +               }
>    +       }
>    +       spin_unlock(&djprobe_lock);
>    +}
>    +
>    +static inline void schedule_check_djprobe_instances(void)
>    +{
>    +       int cpu;
>    +       struct work_struct *wk;
>    +       cpus_clear(djpi->checked_cpus);
>    +       for_each_online_cpu(cpu) {
How do you account for new online CPUs?

>    +               wk = &per_cpu(djprobe_check_works, cpu);
>    +               if (list_empty(&wk->entry)) /* not scheduled yet */
>    +                       schedule_delayed_work_on(cpu, wk, 0);
>    +       }
>    +}
>    +
>    +#define __install_djprobe_instance(djpi) \
>    +       schedule_check_djprobe_instance(djpi)
>    +
>    +#define __uninstall_djprobe_instance(djpi) \
>    +       schedule_check_djprobe_instance(djpi)
>    +
>    +#else
>    +#define init_djprobe() \
>    +       djprobe_insn_pages.insn_size = ARCH_STUB_SIZE;
>    +
>    +#define __install_djprobe_instance(djpi) \
>    +       arch_install_djprobe_instance(djpi)
>    +
>    +#define __uninstall_djprobe_instance(djpi) \
>    +       __free_djprobe_instance(djpi)
>    +
>    +#endif /*CONFIG_SMP*/
>    +
>    +/* Use kprobe to check safety and install */
>    +static int __kprobes install_djprobe_instance(struct djprobe_instance
>    *djpi)
>    +{
>    +       int ret;
>    +       ret = register_kprobe(&(djpi->kp));
>    +       if (ret == 0) {
>    +               __install_djprobe_instance(djpi);
>    +       }
>    +       return ret;
>    +}
>    +
>    +/* Use kprobe to check safety and release */
>    +static      void      __kprobes     uninstall_djprobe_instance(struct
>    djprobe_instance *djpi)
>    +{
>    +       arch_uninstall_djprobe_instance(djpi);
>    +       __uninstall_djprobe_instance(djpi);
>    +}
>    +
>    +int __kprobes register_djprobe(struct djprobe * djp)
>    +{
>    +       struct djprobe_instance *djpi;
>    +       struct kprobe *kp;
>    +       int ret = 0, i;
>    +
>    +       if (djp == NULL || djp->addr == NULL ||
>    +           djp->size > ARCH_STUB_INSN_MAX ||
>    +           djp->size < ARCH_STUB_INSN_MIN ||
>    +           djp->inst != NULL)
>    +               return -EINVAL;
>    +
>    +       if ((ret = in_kprobes_functions((unsigned long) djp->addr)) !=
>    0)
>    +               return ret;
>    +
>    +       spin_lock(&djprobe_lock);
>    +       /* check confliction with other djprobes */
>    +       djpi = __get_djprobe_instance(djp->addr, djp->size);
>    +       if (djpi) {
>    +               if (djpi->kp.addr == djp->addr && DJPI_EMPTY(djpi)) {
>    +                       djp->inst = djpi;
>    +                       djpi->djp = djp; /*TODO: use list*/
>    +                       goto out;
>    +               } else {
>    +                       ret = -EEXIST; /* a djprobe were inserted */
>    +                       goto out;
>    +               }
>    +       }
>    +       /* check confliction with kprobes */
>    +       for ( i=0; i < djp->size; i++) {
>    +               kp = get_kprobe((void*)((long)djp->addr+i));
>    +               if (kp != NULL) {
>    +                       ret = -EEXIST; /* a kprobes were inserted */
>    +                       goto out;
>    +               }
>    +       }
This function is way too long.

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>    +       /* make a new instance */
>    +       djpi = kmalloc(sizeof(struct djprobe_instance),GFP_KERNEL);
>    +       if (djpi == NULL) {
>    +               ret = -ENOMEM; /* memory allocation error */
>    +               goto out;
>    +       }
>    +        memset(djpi,  0,  sizeof(struct  djprobe_instance));  /*  for
>    kprobe */
>    +       /* attach */
>    +       djp->inst = djpi;
>    +       djpi->djp = djp; /*TODO: use list*/
>    +       djpi->kp.addr = djp->addr;
>    +       INIT_LIST_HEAD(&djpi->list);
>    +       list_add(&djpi->list, &djprobe_list);

+++++++ Move the above code into djprobe_make_new_instance() function

>    +
>    +       /* prepare stub */
>    +       djpi->stub.insn = __get_insn_slot(&djprobe_insn_pages);
>    +       if (djpi->stub.insn == NULL) {
>    +               kfree(djpi);
You are freeing djpi without removing from the list which you have inserted above.

>    +               ret = -ENOMEM; /* memory allocation error */
>    +               goto out;
>    +       }
>    +       djpi->kp.pre_handler = djprobe_bypass_handler;
>    +         arch_prepare_djprobe_instance(djpi,   djp->size);  /*TODO  :
>    remove size*/
>    +
>    +       ret = install_djprobe_instance(djpi);
>    +       if (ret < 0) { /* failed to install */
>    +               djp->inst = NULL;
>    +               djpi->kp.addr = NULL;
>    +               __free_djprobe_instance(djpi);
>    +       }
>    +out:
>    +       spin_unlock(&djprobe_lock);
>    +       return ret;
>    +}
>    +
>    +void __kprobes unregister_djprobe(struct djprobe * djp)
>    +{
>    +       struct djprobe_instance *djpi;
>    +       if (djp == NULL || djp->inst == NULL)
>    +               return ;
>    +
>    +       djpi = djp->inst;
>    +       spin_lock(&djprobe_lock);
>    +       djp->inst = NULL;
>    +       djpi->djp = NULL; /*TODO: use list*/
>    +       if (DJPI_EMPTY(djpi)) {
djpi->djp is set to NULL and why are you checking DJPI_EMPTY() again?

>    +               uninstall_djprobe_instance(djpi);
>    +       }
>    +       spin_unlock(&djprobe_lock);
>    +}
>    +
>    +#else /* ARCH_SUPPORTS_DJPROBES */
>    +int __kprobes register_djprobe(struct djprobe *p)
>    +{
>    +       if (p!=NULL) {
>    +               p->kp.addr = p->addr;
>    +               p->kp.pre_handler = (kprobe_pre_handler_t)p->handler;
>    +               return register_kprobe(&p->kp);
>    +       }
>    +       return -EINVAL;
>    +}
>    +
>    +void __kprobes unregister_djprobe(struct djprobe *p)
>    +{
>    +       unregister_kprobe(&p->kp);
>    +}
>    +#endif /* ARCH_SUPPORTS_DJPROBES */
>    +
>     static int __init init_kprobes(void)
>     {
>            int i, err = 0;
>    @@ -608,6 +850,9 @@ static int __init init_kprobes(void)
>                    INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
>            }
> 
>    +#if defined(ARCH_SUPPORTS_DJPROBES)
>    +       init_djprobe();
>    +#endif
>            err = arch_init_kprobes();
>            if (!err)
>                    err = register_die_notifier(&kprobe_exceptions_nb);
>    @@ -624,4 +869,6 @@ EXPORT_SYMBOL_GPL(unregister_jprobe);
>     EXPORT_SYMBOL_GPL(jprobe_return);
>     EXPORT_SYMBOL_GPL(register_kretprobe);
>     EXPORT_SYMBOL_GPL(unregister_kretprobe);
>    +EXPORT_SYMBOL_GPL(register_djprobe);
>    +EXPORT_SYMBOL_GPL(unregister_djprobe);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]