This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PR6580; patch for review] first step of unwind_cache enhancement


The first incremental step to the runtime tweaks required to sanely implement PR6580. Namely, the kernel-side backtrace can be unwound one step at a time using _stp_stack_kernel_get, with the already unwound portion of the backtrace cached within the probe context.

This allows us to implement stack(n) (which returns the PC n levels deep within the stack) without having to resort to tokenizing a backtrace string each time.

The patch itself seems to be sound, but testing it drew attention to memory corruption errors in the existing DWARF unwinder (see PR14546).
diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h
index df07b90..61c1b26 100644
--- a/runtime/common_probe_context.h
+++ b/runtime/common_probe_context.h
@@ -118,5 +118,12 @@ cycles_t cycles_sum;
 
 /* Current state of the unwinder (as used in the unwind.c dwarf unwinder). */
 #if defined(STP_NEED_UNWIND_DATA)
-struct unwind_context uwcontext;
+// Invariants (uwcontext currently refers to uwcontext_kernel):
+//   uwcache.depth == 0 --> uwcontext needs to be initialized
+//   uwcache.depth > 0  --> uwcontext is state after uwcache.depth unwind steps
+//   uwcache.depth >= n --> uwcache.data[n] contains PC at depth n
+//     EXCEPT WHEN n == 0, uwcache.data[n] == 0 (current PC not yet retrieved)
+struct unwind_cache uwcache;
+struct unwind_context uwcontext_user;
+struct unwind_context uwcontext_kernel;
 #endif
diff --git a/runtime/stack-dwarf.c b/runtime/stack-dwarf.c
index 9c55997..b00ac1d 100644
--- a/runtime/stack-dwarf.c
+++ b/runtime/stack-dwarf.c
@@ -35,6 +35,55 @@ static int _stp_valid_pc_addr(unsigned long addr, struct task_struct *tsk)
 #endif
 }
 
+/* NB: Returns 0 for depth == 0 -- DWARF unwinder does not fetch
+ * current PC.  Moreover, uwcontext is assumed to have been
+ * initialized by the caller. */
+unsigned long __stp_dwarf_stack_kernel_get(struct pt_regs *regs, int depth,
+                                           struct unwind_context *uwcontext,
+                                           struct unwind_cache *uwcache)
+{
+        struct unwind_frame_info *info = &uwcontext->info;
+        int ret;
+  
+        /* check if answer is already defined in cache; this is probably
+         * redundant, being already handled for us by the caller */
+        if (unlikely(uwcache->depth >= depth))
+                return uwcache->data[depth];
+
+        /* check if unwind cannot proceed further */
+        if (uwcache->done)
+                return 0;
+
+        if (uwcache->depth == 0) /* need to initialize uwcontext->info */
+                arch_unw_init_frame_info(info, regs, 0);
+
+        dbug_unwind(1, "RESUMING from %d for %d more levels",
+                    uwcache->depth, depth - uwcache->depth);
+
+        while (uwcache->depth < depth) {
+                ret = unwind(uwcontext, 0);
+                dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
+
+                /* check if unwind hit an error */
+                if (ret || ! _stp_valid_pc_addr(UNW_PC(info), NULL)) {
+                        uwcache->done = 1; break;
+                }
+
+                uwcache->depth++;
+                uwcache->data[uwcache->depth] = UNW_PC(info);
+
+                if (UNW_PC(info) == _stp_kretprobe_trampoline) {
+                        uwcache->done = 1; break;
+                        /* XXX: if there a way to unwind across kretprobe trampolines? PR9999 */
+                }
+        }
+
+        /* unwind may not have reached the desired depth */
+        return uwcache->depth >= depth ? uwcache->data[depth] : 0;    
+}
+
+
+// XXX: rewrite in terms of _get
 static void __stp_dwarf_stack_kernel_print(struct pt_regs *regs, int verbose,
 			      int levels, struct unwind_context *uwcontext)
 {
diff --git a/runtime/stack.c b/runtime/stack.c
index 413ef1e..1834298 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -165,7 +165,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
       else if (c->uregs != NULL && c->kregs != NULL
 	       && ! (c->probe_flags & _STP_PROBE_STATE_USER_MODE))
 	{
-	  struct unwind_frame_info *info = &c->uwcontext.info;
+	  struct unwind_frame_info *info = &c->uwcontext_user.info;
 	  int ret = 0;
 	  int levels;
 
@@ -189,7 +189,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 	  while (levels > 0 && ret == 0 && UNW_PC(info) != REG_IP(c->uregs))
 	    {
 	      levels--;
-	      ret = unwind(&c->uwcontext, 0);
+	      ret = unwind(&c->uwcontext_user, 0);
 	      dbug_unwind(1, "unwind levels: %d, ret: %d, pc=0x%lx\n",
 			  levels, ret, UNW_PC(info));
 	    }
@@ -213,6 +213,66 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
   return c->uregs;
 }
 
+/** Returns the PC at given depth in the stack.
+ * @param depth Depth to which to descend in the stack. Depth 0 gives the current PC.
+ *
+ * Unwinds the backtrace at most once and uses a cache for subsequent
+ * queries.  Returns 0 on error, or beyond the end of the visible
+ * stack.
+ */
+static unsigned long _stp_stack_kernel_get(struct context *c, unsigned depth)
+{
+        /* TODOXXX add appropriate dbug_unwind messages */
+        struct pt_regs *regs = NULL;
+
+        if (depth >= MAXBACKTRACE)
+                return 0; /* special case which cannot query the cache */
+
+        /* We always start by fetching the current PC if it's not yet known. */
+        if (c->uwcache.depth == 0 && c->uwcache.data[0] == 0) {
+                if (! c->kregs) {
+                        /* Even the current PC is unknown; so we have absolutely no data
+                         * at any depth. Note that unlike _stp_stack_kernel_print(), we
+                         * can't fall back to calling dump_trace() to obtain the
+                         * backtrace. */
+                        c->uwcache.done = 1; return 0;
+                } else if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE
+                           && c->ips.krp.pi) {
+                        c->uwcache.data[0] = (unsigned long)_stp_ret_addr_r(c->ips.krp.pi);
+                } else {
+                        c->uwcache.data[0] = REG_IP(c->kregs);
+                }
+        }
+
+        /* check if answer is already defined in cache */
+        if (c->uwcache.depth >= depth)
+                return c->uwcache.data[depth];
+
+        /* check if unwind cannot proceed further */
+        if (c->uwcache.done)
+                return 0;
+
+#ifdef STP_USE_DWARF_UNWINDER
+        if (unlikely (! c->kregs)) { /* XXX a bit of extra paranoia */
+                c->uwcache.done = 1; return 0;
+        }
+        regs = c->kregs;
+
+        if (c->uwcache.depth == 0) { /* need to initialize uwcontext */
+                if (c->uwcache.depth == 0 && c->uregs == &c->uwcontext_kernel.info.regs) {
+                        /* Unwinder needs the reg state, clear uregs ref. */
+                        c->uregs = NULL;
+                        c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+                }
+        }
+        return __stp_dwarf_stack_kernel_get(regs, depth, &c->uwcontext_kernel, &c->uwcache);
+#else
+        /* don't use arch specific fallbacks */
+        c->uwcache.done = 1;
+        return 0;
+#endif
+}
+
 /** Prints the stack backtrace
  * @param regs A pointer to the struct pt_regs.
  * @param verbose _STP_SYM_FULL or _STP_SYM_BRIEF
@@ -220,10 +280,14 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 
 static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 {
-	struct pt_regs *regs = NULL;
+        unsigned n, remaining;
+        unsigned long l;
+
+        if (! c->kregs) {
+                /* This is a fatal block for _stp_stack_kernel_get,
+                   but when printing a backtrace we can use this
+                   inexact fallback.
 
-	if (! c->kregs) {
-		/* For the kernel we can use an inexact fallback.
 		   When compiled with frame pointers we can do
 		   a pretty good guess at the stack value,
 		   otherwise let dump_stack guess it
@@ -248,36 +312,33 @@ static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 			_stp_print("\n");
 #endif
 		return;
-	} else {
-		regs = c->kregs;
-	}
+        }
+
+        /* print the current address */
+        if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi
+            && (sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
+                _stp_print("Returning from: ");
+                _stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
+                                sym_flags, NULL);
+                _stp_print("Returning to  : ");
+        }
+        _stp_print_addr(_stp_stack_kernel_get(c, 0), sym_flags, NULL);
 
-	/* print the current address */
-	if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi) {
-		if ((sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
-			_stp_print("Returning from: ");
-			_stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
-					sym_flags, NULL);
-			_stp_print("Returning to  : ");
-		}
-		_stp_print_addr((unsigned long)_stp_ret_addr_r(c->ips.krp.pi),
-				sym_flags, NULL);
-	} else {
-		_stp_print_addr(REG_IP(regs), sym_flags, NULL);
-	}
-
-	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
-		/* Unwinder needs the reg state, clear uregs ref. */
-		c->uregs = NULL;
-		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
-	}
-	__stp_dwarf_stack_kernel_print(regs, sym_flags, MAXBACKTRACE,
-				       &c->uwcontext);
+        for (n = 1; n < MAXBACKTRACE; n++) {
+                l = _stp_stack_kernel_get(c, n);
+                if (l == 0) {
+                        remaining = MAXBACKTRACE - n;
+                        _stp_stack_print_fallback(UNW_SP(&c->uwcontext_kernel.info),
+                                                  sym_flags, remaining, 0);
+                        break;
+                } else {
+                        _stp_print_addr(l, sym_flags, NULL);
+                }
+        }
 #else
 	/* Arch specific fallback for kernel backtraces. */
-	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);
+	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);       
 #endif
 }
 
@@ -326,13 +387,13 @@ static void _stp_stack_user_print(struct context *c, int sym_flags)
 
 	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
+	if (c->uregs == &c->uwcontext_user.info.regs) {
 		/* Unwinder needs the reg state, clear uregs ref. */
 		c->uregs = NULL;
 		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
 	}
 	__stp_dwarf_stack_user_print(regs, sym_flags, MAXBACKTRACE,
-				     &c->uwcontext, ri, uregs_valid);
+				     &c->uwcontext_user, ri, uregs_valid);
 #else
 	/* User stack traces only supported for arches with dwarf unwinder. */
 	if (sym_flags & _STP_SYM_SYMBOL)
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index a11dc48..2d367cb 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -336,4 +336,15 @@ struct unwind_context {
 
 static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 
+#ifndef MAXBACKTRACE
+#define MAXBACKTRACE 20
+#endif
+
+struct unwind_cache {
+  // XXX use more compact representation?
+  unsigned depth;
+  unsigned done; // if nonzero, unwind cannot proceed
+  unsigned long data[MAXBACKTRACE];
+};
+
 #endif /*_STP_UNWIND_H_*/
diff --git a/tapset/linux/context-symbols.stp b/tapset/linux/context-symbols.stp
index 78a0ae3..d488c5f 100644
--- a/tapset/linux/context-symbols.stp
+++ b/tapset/linux/context-symbols.stp
@@ -12,13 +12,19 @@
 //processor.
 // </tapsetdescription>
 
+function __stack_raw (n:long) %{ /* pragma:unwind */
+         STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
+%}
 
 /**
  *  sfunction stack - return PC at stack unwind level n
  *  @n: number of levels to descend in the stack.
  */
 function stack:long (n:long) {
-         /* XXX this is the naive method */
+         r = __stack_raw(n);
+         if (r != 0) return r
+
+         /* fallback: parse backtrace() to go deeper in the stack */
          b = backtrace (); orig_n = n;
          sym = tokenize (b, " ");
          if (sym == "") @__context_unwind_error(orig_n);
diff --git a/tapsets.cxx b/tapsets.cxx
index 5070485..196c87d 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -183,6 +183,12 @@ common_probe_entryfn_prologue (translator_output* o, string statestr,
   o->newline() << "c->cycles_base = 0;";
   o->newline() << "#endif";
   */
+
+  o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
+  o->newline() << "c->uwcache.depth = 0;";
+  o->newline() << "c->uwcache.done = 0;";
+  o->newline() << "c->uwcache.data[0] = 0;";
+  o->newline() << "#endif";
 }
 
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]