This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
[PATCH] PR13475: Fix ARM SDT_V3 operand parsing
- From: Wade Farnsworth <wade_farnsworth at mentor dot com>
- To: <systemtap at sourceware dot org>
- Cc: Mark Wielaard <mjw at redhat dot com>
- Date: Thu, 8 Mar 2012 07:53:20 -0700
- Subject: [PATCH] PR13475: Fix ARM SDT_V3 operand parsing
* Include regular expressions to parse ARM operands
* Add ARM register data
* Allow for whitespace in ARM operands containing []'s
Signed-off-by: Wade Farnsworth <wade_farnsworth@mentor.com>
---
tapsets.cxx | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 files changed, 116 insertions(+), 19 deletions(-)
diff --git a/tapsets.cxx b/tapsets.cxx
index 716e23c..4215166 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -5028,10 +5028,51 @@ dwarf_derived_probe_group::emit_module_exit (systemtap_session& s)
s.op->newline(-1) << "}";
}
+/* ARM tokens may contain space chars, e.g., [REG, OFFSET] */
+void arm_tokenize(const string& str, vector<string>& tokens)
+{
+ // Skip delimiters at beginning.
+ string::size_type lastPos = str.find_first_not_of(" ", 0);
+ // Find first "non-delimiter".
+ string::size_type pos = str.find_first_of(" ", lastPos);
+ string::size_type cpos;
+ string temp, sub, tok;
+ bool push = true;
+
+ while (pos != string::npos || lastPos != string::npos)
+ {
+ sub = str.substr(lastPos, pos - lastPos);
+ if ((cpos = sub.find('[')) != string::npos)
+ {
+ if (sub.find(']', cpos) != string::npos)
+ tokens.push_back(sub);
+ else
+ {
+ push = false;
+ temp = sub;
+ }
+ }
+ else if (sub.find(']') != string::npos)
+ {
+ push = true;
+ tokens.push_back(temp + " " + sub);
+ }
+ else if (push)
+ tokens.push_back(sub);
+ else
+ temp = temp + " " + sub;
+
+ // Skip delimiters. Note the "not_of"
+ lastPos = str.find_first_not_of(" ", pos);
+ // Find next "non-delimiter"
+ pos = str.find_first_of(" ", lastPos);
+ }
+}
struct sdt_kprobe_var_expanding_visitor: public var_expanding_visitor
{
- sdt_kprobe_var_expanding_visitor(const string & process_name,
+ sdt_kprobe_var_expanding_visitor(int elf_machine,
+ const string & process_name,
const string & provider_name,
const string & probe_name,
const string & arg_string,
@@ -5039,7 +5080,10 @@ struct sdt_kprobe_var_expanding_visitor: public var_expanding_visitor
process_name (process_name), provider_name (provider_name), probe_name (probe_name),
arg_count (arg_count)
{
- tokenize(arg_string, arg_tokens, " ");
+ if (elf_machine == EM_ARM)
+ arm_tokenize(arg_string, arg_tokens);
+ else
+ tokenize(arg_string, arg_tokens, " ");
assert(arg_count <= 10);
}
const string & process_name;
@@ -5200,13 +5244,34 @@ struct sdt_uprobe_var_expanding_visitor: public var_expanding_visitor
DRI ("%r13", 13, DI);
DRI ("%r14", 14, DI);
DRI ("%r15", 15, DI);
+ } else if (elf_machine == EM_ARM) {
+ DRI ("r0", 0, SI);
+ DRI ("r1", 1, SI);
+ DRI ("r2", 2, SI);
+ DRI ("r3", 3, SI);
+ DRI ("r4", 4, SI);
+ DRI ("r5", 5, SI);
+ DRI ("r6", 6, SI);
+ DRI ("r7", 7, SI);
+ DRI ("r8", 8, SI);
+ DRI ("r9", 9, SI);
+ DRI ("sl", 10, SI);
+ DRI ("fp", 11, SI);
+ DRI ("ip", 12, SI);
+ DRI ("sp", 13, SI);
+ DRI ("lr", 14, SI);
+ DRI ("pc", 15, SI);
} else if (arg_count) {
/* permit this case; just fall back to dwarf */
}
#undef DRI
need_debug_info = false;
- tokenize(arg_string, arg_tokens, " ");
+ if (elf_machine == EM_ARM)
+ arm_tokenize(arg_string, arg_tokens);
+ else
+ tokenize(arg_string, arg_tokens, " ");
+
if (probe_type == uprobe3_type)
assert(arg_count <= 12);
else
@@ -5371,7 +5436,7 @@ sdt_uprobe_var_expanding_visitor::visit_target_symbol_arg (target_symbol *e)
// anyway. With -mregnames, we could, if gcc somehow
// communicated to us the presence of that option, but alas it
// doesn't. http://gcc.gnu.org/PR44995.
- rc = regexp_match (asmarg, "^[i\\$][-]?[0-9][0-9]*$", matches);
+ rc = regexp_match (asmarg, "^[i\\$#][-]?[0-9][0-9]*$", matches);
if (! rc)
{
string sn = matches[0].substr(1);
@@ -5420,11 +5485,12 @@ sdt_uprobe_var_expanding_visitor::visit_target_symbol_arg (target_symbol *e)
}
// clip off leading |
regnames = regnames.substr(1);
- percent_regnames = percent_regnames.substr(1);
+ if (percent_regnames != "")
+ percent_regnames = percent_regnames.substr(1);
// test for REGISTER
// NB: Because PR11821, we must use percent_regnames here.
- if (elf_machine == EM_PPC || elf_machine == EM_PPC64)
+ if (elf_machine == EM_PPC || elf_machine == EM_PPC64 || elf_machine == EM_ARM)
rc = regexp_match (asmarg, string("^(")+regnames+string(")$"), matches);
else
rc = regexp_match (asmarg, string("^(")+percent_regnames+string(")$"), matches);
@@ -5473,22 +5539,34 @@ sdt_uprobe_var_expanding_visitor::visit_target_symbol_arg (target_symbol *e)
// invalid register name, fall through
}
+ int reg, offset1;
// test for OFFSET(REGISTER) where OFFSET is +-N+-N+-N
// NB: Despite PR11821, we can use regnames here, since the parentheses
// make things unambiguous. (Note: gdb/stap-probe.c also parses this)
- rc = regexp_match (asmarg, string("^([+-]?[0-9]*)([+-][0-9]*)?([+-][0-9]*)?[(](")+regnames+string(")[)]$"), matches);
+ // On ARM test for [REGISTER, OFFSET]
+ if (elf_machine == EM_ARM)
+ {
+ rc = regexp_match (asmarg, string("^\\[(")+regnames+string("), #([+-]?[0-9]+)([+-][0-9]*)?([+-][0-9]*)?\\]$"), matches);
+ reg = 1;
+ offset1 = 2;
+ }
+ else
+ {
+ rc = regexp_match (asmarg, string("^([+-]?[0-9]*)([+-][0-9]*)?([+-][0-9]*)?[(](")+regnames+string(")[)]$"), matches);
+ reg = 4;
+ offset1 = 1;
+ }
if (! rc)
{
string regname;
int64_t disp = 0;
-
- if (matches[4].length())
- regname = matches[4];
+ if (matches[reg].length())
+ regname = matches[reg];
if (dwarf_regs.find (regname) == dwarf_regs.end())
goto not_matched;
- for (int i=1; i <= 3; i++)
+ for (int i=offset1; i <= (offset1 + 2); i++)
if (matches[i].length())
try
{
@@ -5995,11 +6073,19 @@ sdt_query::handle_probe_entry()
Dwarf_Addr bias;
Elf* elf = dwfl_module_getelf (dw.mod_info->mod, &bias);
+ /* Figure out the architecture of this particular ELF file.
+ The dwarfless register-name mappings depend on it. */
+ GElf_Ehdr ehdr_mem;
+ GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
+ if (em == 0) { dwfl_assert ("dwfl_getehdr", dwfl_errno()); }
+ int elf_machine = em->e_machine;
+
if (have_kprobe())
{
convert_probe(new_base);
// Expand the local variables in the probe body
- sdt_kprobe_var_expanding_visitor svv (module_val,
+ sdt_kprobe_var_expanding_visitor svv (elf_machine,
+ module_val,
provider_name,
probe_name,
arg_string,
@@ -6008,12 +6094,6 @@ sdt_query::handle_probe_entry()
}
else
{
- /* Figure out the architecture of this particular ELF file.
- The dwarfless register-name mappings depend on it. */
- GElf_Ehdr ehdr_mem;
- GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
- if (em == 0) { dwfl_assert ("dwfl_getehdr", dwfl_errno()); }
- int elf_machine = em->e_machine;
sdt_uprobe_var_expanding_visitor svv (sess, elf_machine,
module_val,
provider_name,
@@ -6191,10 +6271,27 @@ sdt_query::setup_note_probe_entry (int type, const char *data, size_t len)
return;
arg_string = args;
+
+ /* ARM args may contain space chars, e.g. [REG, OFFSET] */
+ GElf_Ehdr ehdr_mem;
+ GElf_Ehdr* em = gelf_getehdr (elf, &ehdr_mem);
+ if (em == 0) { dwfl_assert ("dwfl_getehdr", dwfl_errno()); }
+ int elf_machine = em->e_machine;
+
+ bool incr_count = true;
arg_count = 0;
for (unsigned i = 0; i < arg_string.length(); i++)
- if (arg_string[i] == ' ')
+ {
+ if (elf_machine == EM_ARM)
+ {
+ if (arg_string[i] == '[')
+ incr_count = false;
+ else if (arg_string[i] == ']')
+ incr_count = true;
+ }
+ if (incr_count && arg_string[i] == ' ')
arg_count += 1;
+ }
if (arg_string.length() != 0)
arg_count += 1;
--
1.7.0.4