This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] regex tests from BOOST


Hi!

7 tests are FAILing and are commented out for now (with XXX and FIXME,
so that it is not forgotten).

2003-12-03  Jakub Jelinek  <jakub@redhat.com>

	* posix/Makefile (distribute): Add BOOST.tests.
	(tests): Add tst-boost, depend on tst-boost-mem.
	(generated): Add tst-boost-mem and tst-boost.mtrace.
	(tst-boost-ARGS, tst-boost-ENV): Set.
	($(objpfx)tst-boost-mem): New.
	* posix/tst-boost.c: New test.
	* posix/BOOST.tests: New file.

--- libc/posix/Makefile.jj	2003-12-03 10:19:32.000000000 +0100
+++ libc/posix/Makefile	2003-12-03 10:48:40.000000000 +0100
@@ -35,7 +35,7 @@ distribute := confstr.h TESTS TESTS2C.se
 	      globtest.c globtest.sh wordexp-tst.sh annexc.c fnmatch_loop.c   \
 	      spawn_int.h tst-getconf.sh regcomp.c regexec.c regex_internal.c \
 	      regex_internal.h fork.h rxspencer/tests rxspencer/COPYRIGHT     \
-	      PCRE.tests
+	      PCRE.tests BOOST.tests
 
 routines :=								      \
 	uname								      \
@@ -80,7 +80,7 @@ tests		:= tstgetopt testfnm runtests run
 		   bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
 		   bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
 		   bug-regex21 tst-nice tst-nanosleep transbug tst-rxspencer \
-		   tst-pcre
+		   tst-pcre tst-boost
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
 tests           += wordexp-test tst-exec tst-spawn
@@ -97,7 +97,7 @@ generated := $(addprefix wordexp-test-re
 	     bug-regex2.mtrace bug-regex14-mem bug-regex14.mtrace \
 	     bug-regex21-mem bug-regex21.mtrace \
 	     tst-rxspencer-mem tst-rxspencer.mtrace tst-getconf.out \
-	     tst-pcre-mem tst-pcre.mtrace
+	     tst-pcre-mem tst-pcre.mtrace tst-boost-mem tst-boost.mtrace
 
 include ../Rules
 
@@ -165,6 +165,7 @@ bug-regex20-ENV = LOCPATH=$(common-objpf
 tst-rxspencer-ARGS = --utf8 rxspencer/tests
 tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
 tst-pcre-ARGS = PCRE.tests
+tst-boost-ARGS = BOOST.tests
 
 testcases.h: TESTS TESTS2C.sed
 	sed -f TESTS2C.sed < $< > $@T
@@ -186,7 +187,7 @@ tests: $(objpfx)annexc.out
 ifeq (no,$(cross-compiling))
 tests: $(objpfx)bug-regex2-mem $(objpfx)bug-regex14-mem \
   $(objpfx)bug-regex21-mem $(objpfx)tst-rxspencer-mem \
-  $(objpfx)tst-pcre-mem $(objpfx)tst-getconf.out
+  $(objpfx)tst-pcre-mem $(objpfx)tst-boost-mem $(objpfx)tst-getconf.out
 endif
 
 $(objpfx)annexc.out: $(objpfx)annexc
@@ -225,6 +226,10 @@ tst-pcre-ENV = MALLOC_TRACE=$(objpfx)tst
 $(objpfx)tst-pcre-mem: $(objpfx)tst-pcre.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-pcre.mtrace > $@
 
+tst-boost-ENV = MALLOC_TRACE=$(objpfx)tst-boost.mtrace
+$(objpfx)tst-boost-mem: $(objpfx)tst-boost.out
+	$(common-objpfx)malloc/mtrace $(objpfx)tst-boost.mtrace > $@
+
 $(objpfx)tst-getconf.out: tst-getconf.sh $(objpfx)getconf
 	$(SHELL) -e $< $(common-objpfx) $(elf-objpfx) $(rtld-installed-name)
 
--- libc/posix/tst-boost.c.jj	2003-12-02 00:13:18.000000000 +0100
+++ libc/posix/tst-boost.c	2003-12-03 10:43:31.000000000 +0100
@@ -0,0 +1,227 @@
+/* Regular expression tests.
+   Copyright (C) 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sys/types.h>
+#include <mcheck.h>
+#include <regex.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void
+frob_escapes (char *src, int pattern)
+{
+  char *dst;
+
+  for (dst = src; *src != '\0'; dst++, src++)
+    {
+      if (*src == '\\')
+	{
+	  switch (src[1])
+	    {
+	    case 't':
+	      src++;
+	      *dst = '\t';
+	      continue;
+	    case 'n':
+	      src++;
+	      *dst = '\n';
+	      continue;
+	    case 'r':
+	      src++;
+	      *dst = '\r';
+	      continue;
+	    case '\\':
+	    case '^':
+	    case '{':
+	    case '|':
+	    case '}':
+	      if (!pattern)
+		{
+		  src++;
+		  *dst = *src;
+		  continue;
+		}
+	      break;
+	    }
+	}
+      if (src != dst)
+	*dst = *src;
+    }
+  *dst = '\0';
+}
+
+int
+main (int argc, char **argv)
+{
+  int ret = 0, n;
+  char *line = NULL;
+  size_t line_len = 0;
+  ssize_t len;
+  FILE *f;
+  char *pattern, *string;
+  int flags = REG_EXTENDED;
+  int eflags = 0;
+  regex_t re;
+  regmatch_t rm[20];
+
+  mtrace ();
+
+  if (argc < 2)
+    {
+      fprintf (stderr, "Missing test filename\n");
+      return 1;
+    }
+
+  f = fopen (argv[1], "r");
+  if (f == NULL)
+    {
+      fprintf (stderr, "Couldn't open %s\n", argv[1]);
+      return 1;
+    }
+
+  while ((len = getline (&line, &line_len, f)) > 0)
+    {
+      char *p, *q;
+      int i;
+
+      if (line[len - 1] == '\n')
+	line[--len] = '\0';
+
+      puts (line);
+
+      if (line[0] == ';')
+	continue;
+
+      if (line[0] == '\0')
+	continue;
+
+      if (line[0] == '-')
+	{
+	  if (strstr (line, "REG_BASIC"))
+	    flags = 0;
+	  else
+	    flags = REG_EXTENDED;
+	  if (strstr (line, "REG_ICASE"))
+	    flags |= REG_ICASE;
+	  if (strstr (line, "REG_NEWLINE"))
+	    flags |= REG_NEWLINE;
+	  eflags = 0;
+	  if (strstr (line, "REG_NOTBOL"))
+	    eflags |= REG_NOTBOL;
+	  if (strstr (line, "REG_NOTEOL"))
+	    eflags |= REG_NOTEOL;
+	  continue;
+	}
+
+      pattern = line + strspn (line, " \t");
+      if (*pattern == '\0')
+	continue;
+      p = pattern + strcspn (pattern, " \t");
+      if (*p == '\0')
+	continue;
+      *p++ = '\0';
+
+      string = p + strspn (p, " \t");
+      if (*string == '\0')
+	continue;
+      if (*string == '"')
+	{
+	  string++;
+	  p = strchr (string, '"');
+	  if (p == NULL)
+	    continue;
+	  *p++ = '\0';
+	}
+      else
+	{
+	  p = string + strcspn (string, " \t");
+	  if (*string == '!')
+	    string = NULL;
+	  else if (*p == '\0')
+	    continue;
+	  else
+	    *p++ = '\0';
+	}
+
+      frob_escapes (pattern, 1);
+      if (string != NULL)
+	frob_escapes (string, 0);
+
+      n = regcomp (&re, pattern, flags);
+      if (n != 0)
+	{
+	  if (string != NULL)
+	    {
+	      char buf[500];
+	      regerror (n, &re, buf, sizeof (buf));
+	      printf ("FAIL regcomp unexpectedly failed: %s\n",
+		      buf);
+	      ret = 1;
+	    }
+	  continue;
+	}
+      else if (string == NULL)
+	{
+	  regfree (&re);
+	  puts ("FAIL regcomp unpexpectedly succeeded");
+	  ret = 1;
+	  continue;
+	}
+
+      if (regexec (&re, string, 20, rm, eflags))
+	{
+	  for (i = 0; i < 20; ++i)
+	    {
+	      rm[i].rm_so = -1;
+	      rm[i].rm_eo = -1;
+	    }
+	}
+
+      regfree (&re);
+
+      for (i = 0; i < 20 && *p != '\0'; ++i)
+	{
+	  int rm_so, rm_eo;
+
+	  rm_so = strtol (p, &q, 10);
+	  if (p == q)
+	    break;
+	  p = q;
+
+	  rm_eo = strtol (p, &q, 10);
+	  if (p == q)
+	    break;
+	  p = q;
+
+	  if (rm[i].rm_so != rm_so || rm[i].rm_eo != rm_eo)
+	    {
+	      printf ("FAIL rm[%d] %d..%d != expected %d..%d\n",
+		      i, rm[i].rm_so, rm[i].rm_eo, rm_so, rm_eo);
+	      ret = 1;
+	      break;
+	    }
+	}
+    }
+
+  free (line);
+  fclose (f);
+  return ret;
+}
--- libc/posix/BOOST.tests.jj	2003-12-02 00:12:18.000000000 +0100
+++ libc/posix/BOOST.tests	2003-12-03 11:27:29.000000000 +0100
@@ -0,0 +1,834 @@
+; 
+; 
+; this file contains a script of tests to run through regress.exe
+;
+; comments start with a semicolon and proceed to the end of the line
+;
+; changes to regular expression compile flags start with a "-" as the first
+; non-whitespace character and consist of a list of the printable names
+; of the flags, for example "match_default"
+;
+; Other lines contain a test to perform using the current flag status
+; the first token contains the expression to compile, the second the string
+; to match it against. If the second string is "!" then the expression should
+; not compile, that is the first string is an invalid regular expression.
+; This is then followed by a list of integers that specify what should match,
+; each pair represents the starting and ending positions of a subexpression
+; starting with the zeroth subexpression (the whole match).
+; A value of -1 indicates that the subexpression should not take part in the
+; match at all, if the first value is -1 then no part of the expression should
+; match the string.
+;
+; Tests taken from BOOST testsuite and adapted to glibc regex.
+;
+; Boost Software License - Version 1.0 - August 17th, 2003
+;
+; Permission is hereby granted, free of charge, to any person or organization
+; obtaining a copy of the software and accompanying documentation covered by
+; this license (the "Software") to use, reproduce, display, distribute,
+; execute, and transmit the Software, and to prepare derivative works of the
+; Software, and to permit third-parties to whom the Software is furnished to
+; do so, all subject to the following:
+;
+; The copyright notices in the Software and this entire statement, including
+; the above license grant, this restriction and the following disclaimer,
+; must be included in all copies of the Software, in whole or in part, and
+; all derivative works of the Software, unless such copies or derivative
+; works are solely in the form of machine-executable object code generated by
+; a source language processor.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+; DEALINGS IN THE SOFTWARE.
+;
+
+- match_default normal REG_EXTENDED
+
+;
+; try some really simple literals:
+a a 0 1
+Z Z 0 1
+Z aaa -1 -1
+Z xxxxZZxxx 4 5
+
+; and some simple brackets:
+(a) zzzaazz 3 4 3 4
+() zzz 0 0 0 0
+() "" 0 0 0 0
+( !
+) ) 0 1
+(aa !
+aa) baa)b 1 4
+a b -1 -1
+\(\) () 0 2
+\(a\) (a) 0 3
+\() () 0 2
+(\) !
+p(a)rameter ABCparameterXYZ 3 12 4 5
+[pq](a)rameter ABCparameterXYZ 3 12 4 5
+
+; now try escaped brackets:
+- match_default bk_parens REG_BASIC
+\(a\) zzzaazz 3 4 3 4
+\(\) zzz 0 0 0 0
+\(\) "" 0 0 0 0
+\( !
+\) !
+\(aa !
+aa\) !
+() () 0 2
+(a) (a) 0 3
+(\) !
+\() !
+
+; now move on to "." wildcards
+- match_default normal REG_EXTENDED REG_STARTEND
+. a 0 1
+. \n 0 1
+. \r 0 1
+. \0 0 1
+
+;
+; now move on to the repetion ops,
+; starting with operator *
+- match_default normal REG_EXTENDED
+a* b 0 0
+ab* a 0 1
+ab* ab 0 2
+ab* sssabbbbbbsss 3 10
+ab*c* a 0 1
+ab*c* abbb 0 4
+ab*c* accc 0 4
+ab*c* abbcc 0 5
+*a !
+\<* !
+\>* !
+\n* \n\n 0 2
+\** ** 0 2
+\* * 0 1
+
+; now try operator +
+ab+ a -1 -1
+ab+ ab 0 2
+ab+ sssabbbbbbsss 3 10
+ab+c+ a -1 -1
+ab+c+ abbb -1 -1
+ab+c+ accc -1 -1
+ab+c+ abbcc 0 5
++a !
+\<+ !
+\>+ !
+\n+ \n\n 0 2
+\+ + 0 1
+\+ ++ 0 1
+\++ ++ 0 2
+
+; now try operator ?
+- match_default normal REG_EXTENDED
+a? b 0 0
+ab? a 0 1
+ab? ab 0 2
+ab? sssabbbbbbsss 3 5
+ab?c? a 0 1
+ab?c? abbb 0 2
+ab?c? accc 0 2
+ab?c? abcc 0 3
+?a !
+\<? !
+\>? !
+\n? \n\n 0 1
+\? ? 0 1
+\? ?? 0 1
+\?? ?? 0 1
+
+; now try operator {}
+- match_default normal REG_EXTENDED
+a{2} a -1 -1
+a{2} aa 0 2
+a{2} aaa 0 2
+a{2,} a -1 -1
+a{2,} aa 0 2
+a{2,} aaaaa 0 5
+a{2,4} a -1 -1
+a{2,4} aa 0 2
+a{2,4} aaa 0 3
+a{2,4} aaaa 0 4
+a{2,4} aaaaa 0 4
+a{} !
+a{2 !
+a} a} 0 2
+\{\} {} 0 2
+
+- match_default normal REG_BASIC
+a\{2\} a -1 -1
+a\{2\} aa 0 2
+a\{2\} aaa 0 2
+a\{2,\} a -1 -1
+a\{2,\} aa 0 2
+a\{2,\} aaaaa 0 5
+a\{2,4\} a -1 -1
+a\{2,4\} aa 0 2
+a\{2,4\} aaa 0 3
+a\{2,4\} aaaa 0 4
+a\{2,4\} aaaaa 0 4
+{} {} 0 2
+
+; now test the alternation operator |
+- match_default normal REG_EXTENDED
+a|b a 0 1
+a|b b 0 1
+a(b|c) ab 0 2 1 2
+a(b|c) ac 0 2 1 2
+a(b|c) ad -1 -1 -1 -1
+a\| a| 0 2
+
+; now test the set operator []
+- match_default normal REG_EXTENDED
+; try some literals first
+[abc] a 0 1
+[abc] b 0 1
+[abc] c 0 1
+[abc] d -1 -1
+[^bcd] a 0 1
+[^bcd] b -1 -1
+[^bcd] d -1 -1
+[^bcd] e 0 1
+a[b]c abc 0 3
+a[ab]c abc 0 3
+a[^ab]c adc 0 3
+a[]b]c a]c 0 3
+a[[b]c a[c 0 3
+a[-b]c a-c 0 3
+a[^]b]c adc 0 3
+a[^-b]c adc 0 3
+a[b-]c a-c 0 3
+a[b !
+a[] !
+
+; then some ranges
+[b-e] a -1 -1
+[b-e] b 0 1
+[b-e] e 0 1
+[b-e] f -1 -1
+[^b-e] a 0 1
+[^b-e] b -1 -1
+[^b-e] e -1 -1
+[^b-e] f 0 1
+a[1-3]c a2c 0 3
+a[3-1]c !
+a[1-3-5]c !
+a[1- !
+
+; and some classes
+a[[:alpha:]]c abc 0 3
+a[[:unknown:]]c !
+a[[: !
+a[[:alpha !
+a[[:alpha:] !
+a[[:alpha,:] !
+a[[:]:]]b !
+a[[:-:]]b !
+a[[:alph:]] !
+a[[:alphabet:]] !
+[[:alnum:]]+ -%@a0X_- 3 6
+[[:alpha:]]+ -%@aX_0- 3 5
+[[:blank:]]+ "a  \tb" 1 4
+[[:cntrl:]]+ a\n\tb 1 3
+[[:digit:]]+ a019b 1 4
+[[:graph:]]+ " a%b " 1 4
+[[:lower:]]+ AabC 1 3
+; This test fails with STLPort, disable for now as this is a corner case anyway...
+;[[:print:]]+ "\na b\n" 1 4
+[[:punct:]]+ " %-&\t" 1 4
+[[:space:]]+ "a \n\t\rb" 1 5
+[[:upper:]]+ aBCd 1 3
+[[:xdigit:]]+ p0f3Cx 1 5
+
+; now test flag settings:
+- escape_in_lists REG_NO_POSIX_TEST
+[\n] \n 0 1
+- REG_NO_POSIX_TEST
+
+; line anchors
+- match_default normal REG_EXTENDED
+^ab ab 0 2
+^ab xxabxx -1 -1
+ab$ ab 0 2
+ab$ abxx -1 -1
+- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
+^ab ab -1 -1
+^ab xxabxx -1 -1
+ab$ ab -1 -1
+ab$ abxx -1 -1
+
+; back references
+- match_default normal REG_PERL
+a(b)\2c	!
+a(b\1)c	!
+a(b*)c\1d abbcbbd 0 7 1 3
+a(b*)c\1d abbcbd -1 -1
+a(b*)c\1d abbcbbbd -1 -1
+^(.)\1 abc -1 -1
+a([bc])\1d abcdabbd	4 8 5 6
+; strictly speaking this is at best ambiguous, at worst wrong, this is what most
+; re implimentations will match though.
+a(([bc])\2)*d abbccd 0 6 3 5 3 4
+
+a(([bc])\2)*d abbcbd -1 -1
+a((b)*\2)*d abbbd 0 5 1 4 2 3
+; perl only:
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+(a)\1bcd aabcd 0 5 0 1
+(a)\1bc*d aabcd 0 5 0 1
+(a)\1bc*d aabd 0 4 0 1
+(a)\1bc*d aabcccd 0 7 0 1
+(a)\1bc*[ce]d aabcccd 0 7 0 1
+^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
+
+; posix only: 
+- match_default extended REG_EXTENDED
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+
+;
+; word operators:
+\w a 0 1
+\w z 0 1
+\w A 0 1
+\w Z 0 1
+\w _ 0 1
+\w } -1 -1
+\w ` -1 -1
+\w [ -1 -1
+\w @ -1 -1
+; non-word:
+\W a -1 -1
+\W z -1 -1
+\W A -1 -1
+\W Z -1 -1
+\W _ -1 -1
+\W } 0 1
+\W ` 0 1
+\W [ 0 1
+\W @ 0 1
+; word start:
+\<abcd "  abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+; word boundary:
+\babcd "  abcd" 2 6
+\bab cab -1 -1
+\bab "\nab" 1 3
+\btag ::tag 2 5
+abc\b abc 0 3
+abc\b abcd -1 -1
+abc\b abc\n 0 3
+abc\b abc:: 0 3
+; within word:
+\B ab 1 1
+a\Bb ab 0 2
+a\B ab 0 1
+a\B a -1 -1
+a\B "a " -1 -1
+
+;
+; buffer operators:
+\`abc abc 0 3
+\`abc \nabc -1 -1
+\`abc " abc" -1 -1
+abc\' abc 0 3
+abc\' abc\n -1 -1
+abc\' "abc " -1 -1
+
+;
+; now follows various complex expressions designed to try and bust the matcher:
+a(((b)))c abc 0 3 1 2 1 2 1 2
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c)d abbd 0 4 1 3
+; just gotta have one DFA-buster, of course
+a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
+; and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
+; and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
+; one really big one
+1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
+; fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
+[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
+[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
+[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
+; and as parenthesis go past 9:
+(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
+(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
+(a)d|(b)c abc 1 3 -1 -1 1 2
+_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:"; 12 20 13 19 -1 -1 -1 -1 13 19
+
+; subtleties of matching
+;a(b)?c\1d acd 0 3 -1 -1
+; POSIX is about the following test:
+a(b)?c\1d acd -1 -1 -1 -1
+a(b?c)+d accd 0 4 2 3
+(wee|week)(knights|night) weeknights 0 10 0 3 3 10
+.* abc 0 3
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c|e)d abbd 0 4 1 3
+a(b*|c|e)d acd 0 3 1 2
+a(b*|c|e)d ad 0 2 1 1
+a(b?)c abc 0 3 1 2
+a(b?)c ac 0 2 1 1
+a(b+)c abc 0 3 1 2
+a(b+)c abbbc 0 5 1 4 
+a(b*)c ac 0 2 1 1 
+(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
+a([bc]?)c abc 0 3 1 2
+a([bc]?)c ac 0 2 1 1 
+a([bc]+)c abc 0 3 1 2
+a([bc]+)c abcc 0 4 1 3
+a([bc]+)bc abcbc 0 5 1 3
+a(bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abbb 0 4 1 3
+a(bbb+|bb+|b)bb abbb 0 4 1 2
+(.*).* abcdef 0 6 0 6
+(a*)* bc 0 0 0 0
+xyx*xz xyxxxxyxxxz 5 11
+
+; do we get the right subexpression when it is used more than once?
+a(b|c)*d ad 0 2 -1 -1
+a(b|c)*d abcd 0 4 2 3
+a(b|c)+d abd 0 3 1 2
+a(b|c)+d abcd 0 4 2 3
+a(b|c?)+d ad 0 2 1 1
+a(b|c){0,0}d ad 0 2 -1 -1
+a(b|c){0,1}d ad 0 2 -1 -1
+a(b|c){0,1}d abd 0 3 1 2
+a(b|c){0,2}d ad 0 2 -1 -1
+a(b|c){0,2}d abcd 0 4 2 3
+a(b|c){0,}d ad 0 2 -1 -1
+a(b|c){0,}d abcd 0 4 2 3
+a(b|c){1,1}d abd 0 3 1 2
+a(b|c){1,2}d abd 0 3 1 2
+a(b|c){1,2}d abcd 0 4 2 3
+a(b|c){1,}d abd 0 3 1 2
+a(b|c){1,}d abcd 0 4 2 3
+a(b|c){2,2}d acbd 0 4 2 3
+a(b|c){2,2}d abcd 0 4 2 3
+a(b|c){2,4}d abcd 0 4 2 3
+a(b|c){2,4}d abcbd 0 5 3 4
+a(b|c){2,4}d abcbcd 0 6 4 5
+a(b|c){2,}d abcd 0 4 2 3
+a(b|c){2,}d abcbd 0 5 3 4
+; perl only:
+a(b|c?)+d abcd 0 4 3 3
+a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
+a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
+
+; posix only:
+- match_default extended REG_EXTENDED REG_STARTEND
+
+; XXX FIXME the following 4 tests fail ATM
+;a(b|c?)+d abcd 0 4 2 3
+;a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
+;a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
+;a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
+
+a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
+
+; XXX FIXME the following 3 tests fail ATM
+;a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
+;a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
+;a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
+
+a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
+
+- match_default normal REG_PERL
+; try to match C++ syntax elements:
+; line comment:
+//[^\n]* "++i //here is a line comment\n" 4 28
+; block comment:
+/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
+/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
+; preprossor directives:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
+; perl only:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);" 0 53 30 42
+; literals:
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF         						0 4		0 4		0 4 	-1 -1 	-1 -1 	-1 -1 	-1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 									0 2 	0 2		-1 -1 	0 2 	-1 -1 	-1 -1 	-1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 								0 5		0 4		0 4 	-1 -1 	-1 -1 	-1 -1 	-1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 								0 5		0 4		0 4 	-1 -1 	4 5 	-1 -1 	-1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 			0 24	0 18	0 18 	-1 -1 	19 24 	19 24 	22 24
+; strings:
+'([^\\']|\\.)*' '\\x3A' 0 6 4 5
+'([^\\']|\\.)*' '\\'' 0 4 1 3
+'([^\\']|\\.)*' '\\n' 0 4 1 3
+
+; finally try some case insensitive matches:
+- match_default normal REG_EXTENDED REG_ICASE
+; upper and lower have no meaning here so they fail, however these
+; may compile with other libraries...
+;[[:lower:]] !
+;[[:upper:]] !
+0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
+
+; known and suspected bugs:
+- match_default normal REG_EXTENDED
+\( ( 0 1
+\) ) 0 1
+\$ $ 0 1
+\^ ^ 0 1
+\. . 0 1
+\* * 0 1
+\+ + 0 1
+\? ? 0 1
+\[ [ 0 1
+\] ] 0 1
+\| | 0 1
+\\ \\ 0 1
+# # 0 1
+\# # 0 1
+a- a- 0 2
+\- - 0 1
+\{ { 0 1
+\} } 0 1
+0 0 0 1
+1 1 0 1
+9 9 0 1
+b b 0 1
+B B 0 1
+< < 0 1
+> > 0 1
+w w 0 1
+W W 0 1
+` ` 0 1
+' ' 0 1
+\n \n 0 1
+, , 0 1
+a a 0 1
+f f 0 1
+n n 0 1
+r r 0 1
+t t 0 1
+v v 0 1
+c c 0 1
+x x 0 1
+: : 0 1
+(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
+
+- match_default normal REG_EXTENDED REG_ICASE
+a A 0 1
+A a 0 1
+[abc]+ abcABC 0 6
+[ABC]+ abcABC 0 6
+[a-z]+ abcABC 0 6
+[A-Z]+ abzANZ 0 6
+[a-Z]+ abzABZ 0 6
+[A-z]+ abzABZ 0 6
+[[:lower:]]+ abyzABYZ 0 8
+[[:upper:]]+ abzABZ 0 6
+[[:alpha:]]+ abyzABYZ 0 8
+[[:alnum:]]+ 09abyzABYZ 0 10
+
+; word start:
+\<abcd "  abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+
+; collating elements and rewritten set code:
+- match_default normal REG_EXTENDED REG_STARTEND
+;[[.zero.]] 0 0 1
+;[[.one.]] 1 0 1
+;[[.two.]] 2 0 1
+;[[.three.]] 3 0 1
+[[.a.]] baa 1 2
+;[[.right-curly-bracket.]] } 0 1
+;[[.NUL.]] \0 0 1
+[[:<:]z] !
+[a[:>:]] !
+[[=a=]] a 0 1
+;[[=right-curly-bracket=]] } 0 1
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+[[.A.]] A 0 1
+[[.A.]] a 0 1
+[[.A.]-b]+ AaBb 0 4
+[A-[.b.]]+ AaBb 0 4
+[[.a.]-B]+ AaBb 0 4
+[a-[.B.]]+ AaBb 0 4
+- match_default normal REG_EXTENDED REG_STARTEND
+[[.a.]-c]+ abcd 0 3
+[a-[.c.]]+ abcd 0 3
+[[:alpha:]-a] !
+[a-[:alpha:]] !
+
+; try mutli-character ligatures:
+;[[.ae.]] ae 0 2
+;[[.ae.]] aE -1 -1
+;[[.AE.]] AE 0 2
+;[[.Ae.]] Ae 0 2
+;[[.ae.]-b] a -1 -1
+;[[.ae.]-b] b 0 1
+;[[.ae.]-b] ae 0 2
+;[a-[.ae.]] a 0 1
+;[a-[.ae.]] b -1 -1
+;[a-[.ae.]] ae 0 2
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+;[[.ae.]] AE 0 2
+;[[.ae.]] Ae 0 2
+;[[.AE.]] Ae 0 2
+;[[.Ae.]] aE 0 2
+;[[.AE.]-B] a -1 -1
+;[[.Ae.]-b] b 0 1
+;[[.Ae.]-b] B 0 1
+;[[.ae.]-b] AE 0 2
+
+- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
+\s+ "ab   ab" 2 5
+\S+ "  abc  " 2 5
+
+- match_default normal REG_EXTENDED REG_STARTEND
+\`abc abc 0 3
+\`abc aabc -1 -1
+abc\' abc 0 3
+abc\' abcd -1 -1
+abc\' abc\n\n -1 -1
+abc\' abc 0 3
+
+; extended repeat checking to exercise new algorithms:
+ab.*xy abxy_ 0 4
+ab.*xy ab_xy_ 0 5
+ab.*xy abxy 0 4
+ab.*xy ab_xy 0 5
+ab.* ab 0 2
+ab.* ab__ 0 4
+
+ab.{2,5}xy ab__xy_ 0 6
+ab.{2,5}xy ab____xy_ 0 8
+ab.{2,5}xy ab_____xy_ 0 9
+ab.{2,5}xy ab__xy 0 6
+ab.{2,5}xy ab_____xy 0 9
+ab.{2,5} ab__ 0 4
+ab.{2,5} ab_______ 0 7
+ab.{2,5}xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+ab.*?xy abxy_ 0 4
+ab.*?xy ab_xy_ 0 5
+ab.*?xy abxy 0 4
+ab.*?xy ab_xy 0 5
+ab.*? ab 0 2
+ab.*? ab__ 0 4
+
+ab.{2,5}?xy ab__xy_ 0 6
+ab.{2,5}?xy ab____xy_ 0 8
+ab.{2,5}?xy ab_____xy_ 0 9
+ab.{2,5}?xy ab__xy 0 6
+ab.{2,5}?xy ab_____xy 0 9
+ab.{2,5}? ab__ 0 4
+ab.{2,5}? ab_______ 0 7
+ab.{2,5}?xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+; again but with slower algorithm variant:
+- match_default REG_EXTENDED
+; now again for single character repeats:
+
+ab_*xy abxy_ 0 4
+ab_*xy ab_xy_ 0 5
+ab_*xy abxy 0 4
+ab_*xy ab_xy 0 5
+ab_* ab 0 2
+ab_* ab__ 0 4
+
+ab_{2,5}xy ab__xy_ 0 6
+ab_{2,5}xy ab____xy_ 0 8
+ab_{2,5}xy ab_____xy_ 0 9
+ab_{2,5}xy ab__xy 0 6
+ab_{2,5}xy ab_____xy 0 9
+ab_{2,5} ab__ 0 4
+ab_{2,5} ab_______ 0 7
+ab_{2,5}xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+ab_*?xy abxy_ 0 4
+ab_*?xy ab_xy_ 0 5
+ab_*?xy abxy 0 4
+ab_*?xy ab_xy 0 5
+ab_*? ab 0 2
+ab_*? ab__ 0 4
+
+ab_{2,5}?xy ab__xy_ 0 6
+ab_{2,5}?xy ab____xy_ 0 8
+ab_{2,5}?xy ab_____xy_ 0 9
+ab_{2,5}?xy ab__xy 0 6
+ab_{2,5}?xy ab_____xy 0 9
+ab_{2,5}? ab__ 0 4
+ab_{2,5}? ab_______ 0 7
+ab_{2,5}?xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+; and again for sets:
+ab[_,;]*xy abxy_ 0 4
+ab[_,;]*xy ab_xy_ 0 5
+ab[_,;]*xy abxy 0 4
+ab[_,;]*xy ab_xy 0 5
+ab[_,;]* ab 0 2
+ab[_,;]* ab__ 0 4
+
+ab[_,;]{2,5}xy ab__xy_ 0 6
+ab[_,;]{2,5}xy ab____xy_ 0 8
+ab[_,;]{2,5}xy ab_____xy_ 0 9
+ab[_,;]{2,5}xy ab__xy 0 6
+ab[_,;]{2,5}xy ab_____xy 0 9
+ab[_,;]{2,5} ab__ 0 4
+ab[_,;]{2,5} ab_______ 0 7
+ab[_,;]{2,5}xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+ab[_,;]*?xy abxy_ 0 4
+ab[_,;]*?xy ab_xy_ 0 5
+ab[_,;]*?xy abxy 0 4
+ab[_,;]*?xy ab_xy 0 5
+ab[_,;]*? ab 0 2
+ab[_,;]*? ab__ 0 4
+
+ab[_,;]{2,5}?xy ab__xy_ 0 6
+ab[_,;]{2,5}?xy ab____xy_ 0 8
+ab[_,;]{2,5}?xy ab_____xy_ 0 9
+ab[_,;]{2,5}?xy ab__xy 0 6
+ab[_,;]{2,5}?xy ab_____xy 0 9
+ab[_,;]{2,5}? ab__ 0 4
+ab[_,;]{2,5}? ab_______ 0 7
+ab[_,;]{2,5}?xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+; and again for tricky sets with digraphs:
+;ab[_[.ae.]]*xy abxy_ 0 4
+;ab[_[.ae.]]*xy ab_xy_ 0 5
+;ab[_[.ae.]]*xy abxy 0 4
+;ab[_[.ae.]]*xy ab_xy 0 5
+;ab[_[.ae.]]* ab 0 2
+;ab[_[.ae.]]* ab__ 0 4
+
+;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5} ab__ 0 4
+;ab[_[.ae.]]{2,5} ab_______ 0 7
+;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+;ab[_[.ae.]]*?xy abxy_ 0 4
+;ab[_[.ae.]]*?xy ab_xy_ 0 5
+;ab[_[.ae.]]*?xy abxy 0 4
+;ab[_[.ae.]]*?xy ab_xy 0 5
+;ab[_[.ae.]]*? ab 0 2
+;ab[_[.ae.]]*? ab__ 0 2
+
+;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5}? ab__ 0 4
+;ab[_[.ae.]]{2,5}? ab_______ 0 4
+;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+; new bugs detected in spring 2003:
+- normal match_continuous REG_NO_POSIX_TEST
+b abc 1 2
+
+() abc 0 0 0 0
+^() abc 0 0 0 0
+^()+ abc 0 0 0 0
+^(){1} abc 0 0 0 0
+^(){2} abc 0 0 0 0
+^((){2}) abc 0 0 0 0 0 0
+() "" 0 0 0 0
+()\1 "" 0 0 0 0
+()\1 a 0 0 0 0
+a()\1b ab 0 2 1 1
+a()b\1 ab 0 2 1 1
+
+; subtleties of matching with no sub-expressions marked
+- normal match_nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4 
+(wee|week)(knights|night) weeknights 0 10 
+.* abc 0 3
+a(b|(c))d abd 0 3 
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3 
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
+
+- normal nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4 
+(wee|week)(knights|night) weeknights 0 10 
+.* abc 0 3
+a(b|(c))d abd 0 3 
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3 
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
+

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]