This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PR regex/8


This one is about quadratic behavior that happens for regexes like a*b when the matching fails (or succeeds only after many tries).

Paolo

2004-02-02  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regexec.c (check_matching): Add P_MATCH_FIRST
	parameter.
	(re_search_internal): Pass new parameter to check_matching.
	(check_matching): Unless a parenthesized group is found at the
	beginning of the regexp, advance P_MATCH_FIRST until we entered
	a state different from the initial state.

Index: regexec.c
===================================================================
RCS file: /cvs/glibc/libc/posix/regexec.c,v
retrieving revision 1.56
diff -u -p -r1.56 regexec.c
--- regexec.c	19 Jan 2004 20:16:45 -0000	1.56
+++ regexec.c	2 Feb 2004 14:57:38 -0000
@@ -60,7 +60,8 @@ static inline re_dfastate_t *acquire_ini
      __attribute ((always_inline)) internal_function;
 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
      internal_function;
-static int check_matching (re_match_context_t *mctx, int fl_longest_match)
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+			   int *p_match_first)
      internal_function;
 static int check_halt_node_context (const re_dfa_t *dfa, int node,
 				    unsigned int context) internal_function;
@@ -745,7 +746,8 @@ re_search_internal (preg, string, length
 	  /* It seems to be appropriate one, then use the matcher.  */
 	  /* We assume that the matching starts from 0.  */
 	  mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
-	  match_last = check_matching (&mctx, fl_longest_match);
+	  match_last = check_matching (&mctx, fl_longest_match,
+				       range >= 0 ? &match_first : NULL);
 	  if (match_last != -1)
 	    {
 	      if (BE (match_last == -2, 0))
@@ -966,13 +968,16 @@ acquire_init_state_context (err, mctx, i
    and return the index where the matching end, return -1 if not match,
    or return -2 in case of an error.
    FL_LONGEST_MATCH means we want the POSIX longest matching.
+   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
+   next place where we may want to try matching.
    Note that the matcher assume that the maching starts from the current
    index of the buffer.  */
 
 static int
-check_matching (mctx, fl_longest_match)
+check_matching (mctx, fl_longest_match, p_match_first)
     re_match_context_t *mctx;
     int fl_longest_match;
+    int *p_match_first;
 {
   re_dfa_t *const dfa = mctx->dfa;
   reg_errcode_t err;
@@ -980,6 +985,7 @@ check_matching (mctx, fl_longest_match)
   int match_last = -1;
   int cur_str_idx = re_string_cur_idx (&mctx->input);
   re_dfastate_t *cur_state;
+  int at_init_state = p_match_first != NULL, skipped = 0;
 
   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
   /* An initial state must not be NULL(invalid state).  */
@@ -992,6 +998,7 @@ check_matching (mctx, fl_longest_match)
      later.  E.g. Processing back references.  */
   if (BE (dfa->nbackref, 0))
     {
+      at_init_state = 0;
       err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
       if (BE (err != REG_NOERROR, 0))
 	return err;
@@ -1022,7 +1029,16 @@ check_matching (mctx, fl_longest_match)
 
   while (!re_string_eoi (&mctx->input))
     {
+      re_dfastate_t *old_state = cur_state;
       cur_state = transit_state (&err, mctx, cur_state);
+      if (at_init_state)
+	{
+	  if (old_state == cur_state)
+	    skipped++;
+	  else
+	    at_init_state = 0;
+	}
+
       if (cur_state == NULL) /* Reached at the invalid state or an error.  */
 	{
 	  cur_str_idx = re_string_cur_idx (&mctx->input);
@@ -1062,6 +1078,10 @@ check_matching (mctx, fl_longest_match)
 	    }
 	}
    }
+
+  if (match_last == -1 && skipped)
+    *p_match_first += skipped;
+
   return match_last;
 }
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]