This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |
Other format: | [Raw text] |
Hi! Small regex tweaks: 1) regexec would happily return success and registers outside of string limits in some cases. 2) Avoids segfault on bug-regex11.c with uncommented failing tests. From what I can say, state_log[i] == NULL is completely legal situation and so prune_impossible_nodes needs to cope with it. 3) We don't need to set has_plural_match if creating just COMPLEX_BRACKET and not SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET. 4) build_charclass_op creates SIMPLE_BRACKET OP_ALT COMPLEX_BRACKET, but did not set has_plural_match. Looks like a bug to me. 2003-11-25 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (re_search_internal): If prune_impossible_nodes returned REG_NOMATCH, set match_last to -1. Don't initialize pmatch[0] needlessly. Fix comment. (prune_impossible_nodes): Don't segfault on NULL state_log entry. (set_regs): Fix comment. * posix/regcomp.c (parse_bracket_exp): Only set has_plural_match if adding both SIMPLE_BRACKET and COMPLEX_BRACKET. (build_charclass_op): Set has_plural_match if adding both SIMPLE_BRACKET and COMPLEX_BRACKET. * posix/bug-regex11.c (tests): Fix register values for one commented out test. Add new tests. --- libc/posix/regexec.c.jj 2003-11-25 13:06:02.000000000 +0100 +++ libc/posix/regexec.c 2003-11-25 20:02:56.000000000 +0100 @@ -766,6 +766,7 @@ re_search_internal (preg, string, length break; if (BE (err != REG_NOMATCH, 0)) goto free_return; + match_last = -1; } else break; /* We found a match. */ @@ -785,7 +786,7 @@ re_search_internal (preg, string, length int reg_idx; /* Initialize registers. */ - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; /* Set the points where matching start/end. */ @@ -801,7 +802,8 @@ re_search_internal (preg, string, length } /* At last, add the offset to the each registers, since we slided - the buffers so that We can assume that the matching starts from 0. */ + the buffers so that we could assume that the matching starts + from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) if (pmatch[reg_idx].rm_so != -1) { @@ -869,7 +871,8 @@ prune_impossible_nodes (preg, mctx) ret = REG_NOMATCH; goto free_return; } - } while (!mctx->state_log[match_last]->halt); + } while (mctx->state_log[match_last] == NULL + || !mctx->state_log[match_last]->halt); halt_node = check_halt_state_context (preg, mctx->state_log[match_last], mctx, match_last); @@ -1236,7 +1239,7 @@ pop_fail_stack (fs, pidx, nregs, regs, e /* Set the positions where the subexpressions are starts/ends to registers PMATCH. Note: We assume that pmatch[0] is already set, and - pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */ + pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ static reg_errcode_t set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) --- libc/posix/regcomp.c.jj 2003-11-24 23:49:53.000000000 +0100 +++ libc/posix/regcomp.c 2003-11-25 19:59:13.000000000 +0100 @@ -3213,7 +3213,6 @@ parse_bracket_exp (regexp, dfa, token, s int sbc_idx; /* Build a tree for complex bracket. */ dfa->has_mb_node = 1; - dfa->has_plural_match = 1; for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) if (sbcset[sbc_idx]) break; @@ -3233,6 +3232,7 @@ parse_bracket_exp (regexp, dfa, token, s goto parse_bracket_exp_espace; /* Then join them by ALT node. */ alt_token.type = OP_ALT; + dfa->has_plural_match = 1; work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token); if (BE (mbc_tree != NULL, 1)) return work_tree; @@ -3627,6 +3627,7 @@ build_charclass_op (dfa, trans, class_na goto build_word_op_espace; /* Then join them by ALT node. */ alt_token.type = OP_ALT; + dfa->has_plural_match = 1; tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token); if (BE (mbc_tree != NULL, 1)) return tree; --- libc/posix/bug-regex11.c.jj 2003-11-21 23:49:48.000000000 +0100 +++ libc/posix/bug-regex11.c 2003-11-25 20:08:39.000000000 +0100 @@ -54,13 +54,22 @@ struct { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, /* More tests on backreferences. */ + { "()\\1", "x", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, + { "()x\\1", "x", REG_EXTENDED, 2, { { 0, 1 }, { 0, 0 } } }, { "()\\1*\\1*", "", REG_EXTENDED, 2, { { 0, 0 }, { 0, 0 } } }, { "([0-9]).*\\1(a*)", "7;7a6", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, { "([0-9]).*\\1(a*)", "7;7a", REG_EXTENDED, 3, { { 0, 4 }, { 0, 1 }, { 3, 4 } } }, + { "(b)()c\\1", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 1 }, { 1, 1 } } }, + { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, + { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } }, + { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, #if 0 /* XXX Not used since they fail so far. */ - { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 1, 2 } } }, + { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } }, { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } }, + { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } }, + { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } }, + { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } }, { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } }, { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$", "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } }, Jakub
Index Nav: | [Date Index] [Subject Index] [Author Index] [Thread Index] | |
---|---|---|
Message Nav: | [Date Prev] [Date Next] | [Thread Prev] [Thread Next] |