This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Small regex tweaks


Hi!

Small regex tweaks:
1) regexec would happily return success and registers outside of string
   limits in some cases.
2) Avoids segfault on bug-regex11.c with uncommented failing tests.
   From what I can say, state_log[i] == NULL is completely legal situation
   and so prune_impossible_nodes needs to cope with it.
3) We don't need to set has_plural_match if creating just COMPLEX_BRACKET
   and not SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET.
4) build_charclass_op creates SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET,
   but did not set has_plural_match.  Looks like a bug to me.

2003-11-25  Jakub Jelinek  <jakub@redhat.com>

	* posix/regexec.c (re_search_internal): If prune_impossible_nodes
	returned REG_NOMATCH, set match_last to -1.  Don't initialize
	pmatch[0] needlessly.  Fix comment.
	(prune_impossible_nodes): Don't segfault on NULL state_log entry.
	(set_regs): Fix comment.
	* posix/regcomp.c (parse_bracket_exp): Only set has_plural_match
	if adding both SIMPLE_BRACKET and COMPLEX_BRACKET.
	(build_charclass_op): Set has_plural_match if adding both
	SIMPLE_BRACKET and COMPLEX_BRACKET.
	* posix/bug-regex11.c (tests): Fix register values for one commented
	out test.  Add new tests.

--- libc/posix/regexec.c.jj	2003-11-25 13:06:02.000000000 +0100
+++ libc/posix/regexec.c	2003-11-25 20:02:56.000000000 +0100
@@ -766,6 +766,7 @@ re_search_internal (preg, string, length
 			break;
 		      if (BE (err != REG_NOMATCH, 0))
 			goto free_return;
+		      match_last = -1;
 		    }
 		  else
 		    break; /* We found a match.  */
@@ -785,7 +786,7 @@ re_search_internal (preg, string, length
       int reg_idx;
 
       /* Initialize registers.  */
-      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
 
       /* Set the points where matching start/end.  */
@@ -801,7 +802,8 @@ re_search_internal (preg, string, length
 	}
 
       /* At last, add the offset to the each registers, since we slided
-	 the buffers so that We can assume that the matching starts from 0.  */
+	 the buffers so that we could assume that the matching starts
+	 from 0.  */
       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
 	if (pmatch[reg_idx].rm_so != -1)
 	  {
@@ -869,7 +871,8 @@ prune_impossible_nodes (preg, mctx)
 		  ret = REG_NOMATCH;
 		  goto free_return;
 		}
-	    } while (!mctx->state_log[match_last]->halt);
+	    } while (mctx->state_log[match_last] == NULL
+		     || !mctx->state_log[match_last]->halt);
 	  halt_node = check_halt_state_context (preg,
 						mctx->state_log[match_last],
 						mctx, match_last);
@@ -1236,7 +1239,7 @@ pop_fail_stack (fs, pidx, nregs, regs, e
 /* Set the positions where the subexpressions are starts/ends to registers
    PMATCH.
    Note: We assume that pmatch[0] is already set, and
-   pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1).  */
+   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
 
 static reg_errcode_t
 set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
--- libc/posix/regcomp.c.jj	2003-11-24 23:49:53.000000000 +0100
+++ libc/posix/regcomp.c	2003-11-25 19:59:13.000000000 +0100
@@ -3213,7 +3213,6 @@ parse_bracket_exp (regexp, dfa, token, s
       int sbc_idx;
       /* Build a tree for complex bracket.  */
       dfa->has_mb_node = 1;
-      dfa->has_plural_match = 1;
       for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
 	if (sbcset[sbc_idx])
 	  break;
@@ -3233,6 +3232,7 @@ parse_bracket_exp (regexp, dfa, token, s
 	goto parse_bracket_exp_espace;
       /* Then join them by ALT node.  */
       alt_token.type = OP_ALT;
+      dfa->has_plural_match = 1;
       work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
       if (BE (mbc_tree != NULL, 1))
 	return work_tree;
@@ -3627,6 +3627,7 @@ build_charclass_op (dfa, trans, class_na
 	goto build_word_op_espace;
       /* Then join them by ALT node.  */
       alt_token.type = OP_ALT;
+      dfa->has_plural_match = 1;
       tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token);
       if (BE (mbc_tree != NULL, 1))
 	return tree;
--- libc/posix/bug-regex11.c.jj	2003-11-21 23:49:48.000000000 +0100
+++ libc/posix/bug-regex11.c	2003-11-25 20:08:39.000000000 +0100
@@ -54,13 +54,22 @@ struct
   { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
   { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } },
   /* More tests on backreferences.  */
+  { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
+  { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } },
   { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } },
   { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
   { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } },
+  { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } },
+  { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
+  { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } },
+  { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
 #if 0
   /* XXX Not used since they fail so far.  */
-  { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 1, 2 } } },
+  { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
   { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } },
+  { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
+  { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } },
+  { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } },
   { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } },
   { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
     "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]