This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] speed up grouping subexpressions by ~30%


This patch includes several speedups that total to a 30% speedup of my new favorite test s|\(.*\)/\(.*\)|\1 \2|. On a 48 MB test file, my results are (user times):

without patch    158.77 seconds
with patch       109.14 seconds (-31.26%)

There is also a measurable speedup in sed's `make check'. These figures however include the time spent in sh, cmp etc.:

without patch    39.12 seconds
with patch       36.42 seconds (-7.23%)

The speedups are obtained by six smaller changes. Items 1-5 are worth 1-2% each (6.4% overall), while item 6 alone is responsible for the remaining 25% performance improvement. All changes were timed separately (on top of each other).

1) Doing expensive checks in check_node_accept after trivial rejections. (saves over 50% of the time spent in check_node_accept).

2) Simplifying some hairy code in proceed_next_node (saves 25% of the time spent in proceed_next_node).

3) Marking several functions as pure.

4) Using unsigned iteration variables in re_node_set_contains to make division by 2 less expensive (this is because of C's rounding rules; yes, it is worth 15% of the time spent in re_node_set_contains).

5) Caching in each DFA state the non-epsilon nodes, and avoiding skips of epsilon nodes in build_sifted_states and check_arrival, again worth over 30% of the time spent in build_sifted_states (the other occurrence of this optimization is not high enough in the profile to be measurable).

6) Caching the overall inveclosure of several nodes: the top function in the profile is re_node_set_add_intersect, which is called from add_epsilon_src_nodes's loop. If however we do the merges first and a single intersection later, we can kick it several places back: for this to be profitable we store these merges into a hash table, which I did using the same functions and table that is used to store DFA states. The initialization is done lazily.

Tested with the glibc and sed testsuites, respectively on powerpc-linux and powerpc-darwin.

The patch is on top of patch 1 from my previous submissions (i.e. speeding up OP_PERIOD in single-byte character sets) and on bug #501's attached patch. It should however apply on CVS glibc cleanly except for offsets.

Paolo
2004-12-07  Paolo Bonzini  <bonzini@gnu.org>

	* regexec.c (proceed_next_node): Simplify treatment of epsilon
	nodes.  Pass the pushed node to push_fail_stack.
	(push_fail_stack): Accept a single node rather than an array
	of two epsilon destinations.
	(build_sifted_states): Only walk non-epsilon nodes.
	(check_arrival): Don't pass epsilon nodes to
	check_arrival_add_next_nodes.
	(check_arrival_add_next_nodes) [DEBUG]: Abort if an epsilon node is
	found.
	(check_node_accept): Do expensive checks later.
	(add_epsilon_src_nodes): Cache result of merging the inveclosures.
	* regex-internal.h (re_dfastate_t): Add non_eps_nodes and inveclosure.
	(re_string_elem_size_at, re_string_char_size_at, re_string_wchar_at,
	re_string_context_at, re_string_peek_byte_case,
	re_string_fetch_byte_case, re_node_set_compare, re_node_set_contains):
	Declare as pure.
	* regex-internal.c (create_newstate_common): Remove.
	(register_state): Move part of it here.  Initialize non_eps_nodes.
	(free_state): Free inveclosure and non_eps_nodes.
	(create_cd_newstate, create_ci_newstate): Allocate the new
	re_dfastate_t here.

--- orig/lib/regex_internal.c
+++ mod/lib/regex_internal.c
@@ -26,9 +26,6 @@ static void re_string_construct_common (
 static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
 				 wint_t *last_wc) internal_function;
 #endif /* RE_ENABLE_I18N */
-static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
-					      const re_node_set *nodes,
-					      unsigned int hash) internal_function;
 static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
 				     unsigned int hash) internal_function;
 static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
@@ -1298,7 +1295,7 @@ re_node_set_contains (set, elem)
      const re_node_set *set;
      int elem;
 {
-  int idx, right, mid;
+  unsigned int idx, right, mid;
   if (set->nelem <= 0)
     return 0;
 
@@ -1489,43 +1486,32 @@ re_acquire_state_context (err, dfa, node
     }
 }
 
-/* Allocate memory for DFA state and initialize common properties.
-   Return the new state if succeeded, otherwise return NULL.  */
+/* Finish initialization of the new state NEWSTATE, and using its hash value
+   HASH put in the appropriate bucket of DFA's state table.  Return value
+   indicates the error code if failed.  */
 
-static re_dfastate_t *
-create_newstate_common (dfa, nodes, hash)
+static reg_errcode_t
+register_state (dfa, newstate, hash)
      re_dfa_t *dfa;
-     const re_node_set *nodes;
+     re_dfastate_t *newstate;
      unsigned int hash;
 {
-  re_dfastate_t *newstate;
+  struct re_state_table_entry *spot;
   reg_errcode_t err;
-  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
-  if (BE (newstate == NULL, 0))
-    return NULL;
-  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  int i;
+
+  newstate->hash = hash;
+  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
   if (BE (err != REG_NOERROR, 0))
+    return REG_ESPACE;
+  for (i = 0; i < newstate->nodes.nelem; i++)
     {
-      re_free (newstate);
-      return NULL;
+      int elem = newstate->nodes.elems[i];
+      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
+        re_node_set_insert_last (&newstate->non_eps_nodes, elem);
     }
-  newstate->trtable = NULL;
-  newstate->hash = hash;
-  return newstate;
-}
 
-/* Store the new state NEWSTATE whose hash value is HASH in appropriate
-   position.  Return value indicate the error code if failed.  */
-
-static reg_errcode_t
-register_state (dfa, newstate, hash)
-     re_dfa_t *dfa;
-     re_dfastate_t *newstate;
-     unsigned int hash;
-{
-  struct re_state_table_entry *spot;
   spot = dfa->state_table + (hash & dfa->state_hash_mask);
-
   if (BE (spot->alloc <= spot->num, 0))
     {
       int new_alloc = 2 * spot->num + 2;
@@ -1552,11 +1538,18 @@ create_ci_newstate (dfa, nodes, hash)
   int i;
   reg_errcode_t err;
   re_dfastate_t *newstate;
-  newstate = create_newstate_common (dfa, nodes, hash);
+
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
   if (BE (newstate == NULL, 0))
     return NULL;
-  newstate->entrance_nodes = &newstate->nodes;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
 
+  newstate->entrance_nodes = &newstate->nodes;
   for (i = 0 ; i < nodes->nelem ; i++)
     {
       re_token_t *node = dfa->nodes + nodes->elems[i];
@@ -1597,9 +1590,16 @@ create_cd_newstate (dfa, nodes, context,
   reg_errcode_t err;
   re_dfastate_t *newstate;
 
-  newstate = create_newstate_common (dfa, nodes, hash);
+  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
   if (BE (newstate == NULL, 0))
     return NULL;
+  err = re_node_set_init_copy (&newstate->nodes, nodes);
+  if (BE (err != REG_NOERROR, 0))
+    {
+      re_free (newstate);
+      return NULL;
+    }
+
   newstate->context = context;
   newstate->entrance_nodes = &newstate->nodes;
 
@@ -1660,6 +1660,8 @@ static void
 free_state (state)
      re_dfastate_t *state;
 {
+  re_node_set_free (&state->non_eps_nodes);
+  re_node_set_free (&state->inveclosure);
   if (state->entrance_nodes != &state->nodes)
     {
       re_node_set_free (state->entrance_nodes);


--- orig/lib/regex_internal.h
+++ mod/lib/regex_internal.h
@@ -387,18 +387,20 @@ static void re_string_translate_buffer (
 static void re_string_destruct (re_string_t *pstr) internal_function;
 # ifdef RE_ENABLE_I18N
 static int re_string_elem_size_at (const re_string_t *pstr, int idx)
-     internal_function;
+     internal_function __attribute ((pure));
 static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
-     internal_function;
+     internal_function __attribute ((pure));
 static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
-     internal_function;
+     internal_function __attribute ((pure));
 # endif /* RE_ENABLE_I18N */
 static unsigned int re_string_context_at (const re_string_t *input, int idx,
-					  int eflags) internal_function;
+					  int eflags)
+     internal_function __attribute ((pure));
 static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
-					       int idx) internal_function;
+					       int idx)
+     internal_function __attribute ((pure));
 static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
-     internal_function;
+     internal_function __attribute ((pure));
 #endif
 #define re_string_peek_byte(pstr, offset) \
   ((pstr)->mbs[(pstr)->cur_idx + offset])
@@ -480,6 +482,8 @@ struct re_dfastate_t
 {
   unsigned int hash;
   re_node_set nodes;
+  re_node_set non_eps_nodes;
+  re_node_set inveclosure;
   re_node_set *entrance_nodes;
   struct re_dfastate_t **trtable, **word_trtable;
   unsigned int context : 4;
@@ -663,8 +667,10 @@ static int re_node_set_insert (re_node_s
 static int re_node_set_insert_last (re_node_set *set,
 				    int elem) internal_function;
 static int re_node_set_compare (const re_node_set *set1,
-				const re_node_set *set2) internal_function;
-static int re_node_set_contains (const re_node_set *set, int elem) internal_function;
+				const re_node_set *set2)
+     internal_function __attribute ((pure));
+static int re_node_set_contains (const re_node_set *set, int elem)
+     internal_function __attribute ((pure));
 static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
 #define re_node_set_remove(set,id) \
   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))


--- orig/lib/regexec.c
+++ mod/lib/regexec.c
@@ -73,7 +73,7 @@ static int proceed_next_node (const re_m
 			      int *pidx, int node, re_node_set *eps_via_nodes,
 			      struct re_fail_stack_t *fs) internal_function;
 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
-				      int str_idx, int *dests, int nregs,
+				      int str_idx, int dest_node, int nregs,
 				      regmatch_t *regs,
 				      re_node_set *eps_via_nodes) internal_function;
 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
@@ -1223,30 +1223,38 @@ proceed_next_node (mctx, nregs, regs, pi
   if (IS_EPSILON_NODE (dfa->nodes[node].type))
     {
       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
-      int ndest, dest_nodes[2];
+      re_node_set *edests = &dfa->edests[node];
+      int dest_node;
       err = re_node_set_insert (eps_via_nodes, node);
       if (BE (err < 0, 0))
 	return -2;
-      /* Pick up valid destinations.  */
-      for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
+      /* Pick up a valid destination, or return -1 if none is found.  */
+      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
 	{
-	  int candidate = dfa->edests[node].elems[i];
+	  int candidate = edests->elems[i];
 	  if (!re_node_set_contains (cur_nodes, candidate))
 	    continue;
-	  dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
-	  dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
-	  ++ndest;
-	}
-      if (ndest <= 1)
-	return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
-      /* In order to avoid infinite loop like "(a*)*".  */
-      if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
-	return dest_nodes[1];
-      if (fs != NULL
-	  && push_fail_stack (fs, *pidx, dest_nodes, nregs, regs,
-			      eps_via_nodes))
-	return -2;
-      return dest_nodes[0];
+          if (dest_node == -1)
+	    dest_node = candidate;
+
+          else
+	    {
+	      /* In order to avoid infinite loop like "(a*)*", return the second
+	         epsilon-transition if the first was already considered.  */
+	      if (re_node_set_contains (eps_via_nodes, dest_node))
+	        return candidate;
+
+	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
+	      else if (fs != NULL
+		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
+				           eps_via_nodes))
+		return -2;
+
+	      /* We know we are going to exit.  */
+	      break;
+	    }
+	}
+      return dest_node;
     }
   else
     {
@@ -1304,9 +1312,9 @@ proceed_next_node (mctx, nregs, regs, pi
 }
 
 static reg_errcode_t
-push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes)
+push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes)
      struct re_fail_stack_t *fs;
-     int str_idx, *dests, nregs;
+     int str_idx, dest_node, nregs;
      regmatch_t *regs;
      re_node_set *eps_via_nodes;
 {
@@ -1323,7 +1331,7 @@ push_fail_stack (fs, str_idx, dests, nre
       fs->stack = new_array;
     }
   fs->stack[num].idx = str_idx;
-  fs->stack[num].node = dests[1];
+  fs->stack[num].node = dest_node;
   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
   if (fs->stack[num].regs == NULL)
     return REG_ESPACE;
@@ -1600,7 +1608,7 @@ build_sifted_states (mctx, sctx, str_idx
      re_node_set *cur_dest;
 {
   re_dfa_t *const dfa = mctx->dfa;
-  re_node_set *cur_src = &mctx->state_log[str_idx]->nodes;
+  re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
   int i;
 
   /* Then build the next sifted state.
@@ -1608,16 +1616,18 @@ build_sifted_states (mctx, sctx, str_idx
      `sifted_states[str_idx]' with `cur_dest'.
      Note:
      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
-     `cur_src' points the node_set of the old `state_log[str_idx]'.  */
+     `cur_src' points the node_set of the old `state_log[str_idx]'
+     (with the epsilon nodes pre-filtered out).  */
   for (i = 0; i < cur_src->nelem; i++)
     {
       int prev_node = cur_src->elems[i];
       int naccepted = 0;
-      re_token_type_t type = dfa->nodes[prev_node].type;
       int ret;
 
-      if (IS_EPSILON_NODE (type))
-	continue;
+#ifdef DEBUG
+      re_token_type_t type = dfa->nodes[prev_node].type;
+      assert (!IS_EPSILON_NODE (type));
+#endif
 #ifdef RE_ENABLE_I18N
       /* If the node may accept `multi byte'.  */
       if (dfa->nodes[prev_node].accept_mb)
@@ -1764,26 +1774,24 @@ add_epsilon_src_nodes (dfa, dest_nodes, 
      re_node_set *dest_nodes;
      const re_node_set *candidates;
 {
-  reg_errcode_t err;
-  int src_idx;
-  re_node_set src_copy;
+  reg_errcode_t err = REG_NOERROR;
+  int i;
 
-  err = re_node_set_init_copy (&src_copy, dest_nodes);
+  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
   if (BE (err != REG_NOERROR, 0))
     return err;
-  for (src_idx = 0; src_idx < src_copy.nelem; ++src_idx)
+
+  if (!state->inveclosure.alloc)
     {
-      err = re_node_set_add_intersect (dest_nodes, candidates,
-				       dfa->inveclosures
-				       + src_copy.elems[src_idx]);
+      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
       if (BE (err != REG_NOERROR, 0))
-	{
-	  re_node_set_free (&src_copy);
-	  return err;
-	}
+        return REG_ESPACE;
+      for (i = 0; i < dest_nodes->nelem; i++)
+        re_node_set_merge (&state->inveclosure,
+			   dfa->inveclosures + dest_nodes->elems[i]);
     }
-  re_node_set_free (&src_copy);
-  return REG_NOERROR;
+  return re_node_set_add_intersect (dest_nodes, candidates,
+				    &state->inveclosure);
 }
 
 static reg_errcode_t
@@ -2936,7 +2944,7 @@ check_arrival (mctx, path, top_node, top
       if (cur_state)
 	{
 	  err = check_arrival_add_next_nodes (mctx, str_idx,
-					      &cur_state->nodes, &next_nodes);
+					      &cur_state->non_eps_nodes, &next_nodes);
 	  if (BE (err != REG_NOERROR, 0))
 	    {
 	      re_node_set_free (&next_nodes);
@@ -3010,9 +3018,10 @@ check_arrival_add_next_nodes (mctx, str_
     {
       int naccepted = 0;
       int cur_node = cur_nodes->elems[cur_idx];
+#ifdef DEBUG
       re_token_type_t type = dfa->nodes[cur_node].type;
-      if (IS_EPSILON_NODE (type))
-	continue;
+      assert (!IS_EPSILON_NODE (type));
+#endif
 #ifdef RE_ENABLE_I18N
       /* If the node may accept `multi byte'.  */
       if (dfa->nodes[cur_node].accept_mb)
@@ -3989,24 +3998,20 @@ check_node_accept (mctx, node, idx)
     const re_token_t *node;
     int idx;
 {
-  re_dfa_t *const dfa = mctx->dfa;
   unsigned char ch;
-  if (node->constraint)
-    {
-      /* The node has constraints.  Check whether the current context
-	 satisfies the constraints.  */
-      unsigned int context = re_string_context_at (&mctx->input, idx,
-						   mctx->eflags);
-      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
-	return 0;
-    }
   ch = re_string_byte_at (&mctx->input, idx);
   switch (node->type)
     {
     case CHARACTER:
-      return node->opr.c == ch;
+      if (node->opr.c != ch)
+        return 0;
+      break;
+
     case SIMPLE_BRACKET:
-      return bitset_contain (node->opr.sbcset, ch);
+      if (!bitset_contain (node->opr.sbcset, ch))
+        return 0;
+      break;
+
 #ifdef RE_ENABLE_I18N
     case OP_UTF8_PERIOD:
       if (ch >= 0x80)
@@ -4014,11 +4019,26 @@ check_node_accept (mctx, node, idx)
       /* FALLTHROUGH */
 #endif
     case OP_PERIOD:
-      return !((ch == '\n' && !(dfa->syntax & RE_DOT_NEWLINE))
-	       || (ch == '\0' && (dfa->syntax & RE_DOT_NOT_NULL)));
+      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
+	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
+	return 0;
+      break;
+
     default:
       return 0;
     }
+
+  if (node->constraint)
+    {
+      /* The node has constraints.  Check whether the current context
+	 satisfies the constraints.  */
+      unsigned int context = re_string_context_at (&mctx->input, idx,
+						   mctx->eflags);
+      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
+	return 0;
+    }
+
+  return 1;
 }
 
 /* Extend the buffers, if the buffers have run out.  */




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]