This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.10-186-ga8f895e


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  a8f895ebe15c8988618ab1948d3f7035341a2eb7 (commit)
       via  2b7a8664fa86a182c053b6743f36a5ea8bf6bf6f (commit)
       via  63fbc91bec5623ab002c6e3ae1f3cd08a5f010fd (commit)
       via  8a4494506d9175a2c205ff8d39dc58abd83682eb (commit)
      from  c3db953c165baa444d01ee6c04ef0c51eba42522 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a8f895ebe15c8988618ab1948d3f7035341a2eb7

commit a8f895ebe15c8988618ab1948d3f7035341a2eb7
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Mon Jul 20 21:18:28 2009 -0700

    Better check for optimization in new x86-64 strstr/strcasestr.

diff --git a/ChangeLog b/ChangeLog
index abbbedb..b107428 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2009-07-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/x86_64/multiarch/strstr.c [USE_AS_STRCASESTR] (STRSTR_SSE42):
+	Use NONASCII_CASE information provided by the locale to determine
+	whether optimized string load function can be used.  Minor cleanups.
+
 2009-07-20  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* string/strcasestr.c (STRCASESTR): New macro.
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index bb42753..f1455a1 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -26,8 +26,7 @@
 
 #ifdef USE_AS_STRCASESTR
 # include <ctype.h>
-# include <locale.h>
-# include <string.h>
+# include <locale/localeinfo.h>
 
 # define LOADBYTE(C)		tolower (C)
 # define CMPBYTE(C1, C2) \
@@ -257,12 +256,20 @@ char *
 __attribute__ ((section (".text.sse4.2")))
 STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
 {
-  int len, len1;
+  int len;
+  int len1;
   const unsigned char *p1 = s1;
   const unsigned char *p2 = s2;
-  __m128i frag1, frag2, zero;
-  int cmp, cmp_c, cmp_z, cmp_s;
-  int kmp_fwd, bmsk, bmsk1;
+  __m128i frag1;
+  __m128i frag2;
+  __m128i zero;
+  int cmp;
+  int cmp_c;
+  int cmp_z;
+  int cmp_s;
+  int kmp_fwd;
+  int bmsk;
+  int bmsk1;
   const unsigned char *pt;
 
   if (!p2[0])
@@ -277,11 +284,8 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
 
 #ifdef USE_AS_STRCASESTR
   __m128i (*strloadu) (const unsigned char *);
-  const char *used_locale = setlocale (LC_CTYPE, NULL);
 
-  if (!used_locale
-      || (used_locale[0] == 'C' && used_locale[1] == '\0')
-      || strcmp (used_locale, "POSIX") == 0)
+  if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) == 0)
     strloadu = __m128i_strloadu_tolower_posix;
   else
     strloadu = __m128i_strloadu_tolower;
@@ -430,7 +434,7 @@ re_trace:
      action   done   done   continue    continue if s2 < s1
 	      false  match  retrace s1     else false
    */
- 
+
   if(cmp_s & !cmp)
     return (char *) pt;
   else if (cmp_z)

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2b7a8664fa86a182c053b6743f36a5ea8bf6bf6f

commit 2b7a8664fa86a182c053b6743f36a5ea8bf6bf6f
Author: H.J. Lu <hongjiu.lu@intel.com>
Date:   Mon Jul 20 21:06:50 2009 -0700

    SSE4.2 strstr/strcasestr for x86-64.
    
    This patch implements SSE4.2 strstr/strcasestr, using Knuth-Morris-Pratt
    string searching algorithm.

diff --git a/ChangeLog b/ChangeLog
index 9d6b6d3..abbbedb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2009-07-20  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* string/strcasestr.c (STRCASESTR): New macro.
+	(__strcasestr): Renamed to ..
+	(STRCASESTR): ...this.
+	* string/strstr.c (STRSTR): New macro.
+	(strstr): Renamed to ..
+	(STRSTR): ...this.
+	* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
+	strstr-c strcasestr-c
+	(CFLAGS-strstr.c): New.
+	(CFLAGS-strcasestr.c): Likewise.
+	* sysdeps/x86_64/multiarch/strcasestr-c.c: New file.
+	* sysdeps/x86_64/multiarch/strcasestr.c: New file.
+	* sysdeps/x86_64/multiarch/strstr-c.c: New file.
+	* sysdeps/x86_64/multiarch/strstr.c: New file.
+
 2009-07-20  Ulrich Drepper  <drepper@redhat.com>
 
 	* locale/localeinfo.h (LIMAGIC): Update value for LC_CTYPE.
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 92f2eac..088b5d9 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -1,5 +1,6 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994, 1996-2000, 2004, 2008 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1996-2000, 2004, 2008, 2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -52,11 +53,16 @@
 #undef strcasestr
 #undef __strcasestr
 
+#ifndef STRCASESTR
+#define STRCASESTR __strcasestr
+#endif
+
+
 /* Find the first occurrence of NEEDLE in HAYSTACK, using
    case-insensitive comparison.  This function gives unspecified
    results in multibyte locales.  */
 char *
-__strcasestr (const char *haystack_start, const char *needle_start)
+STRCASESTR (const char *haystack_start, const char *needle_start)
 {
   const char *haystack = haystack_start;
   const char *needle = needle_start;
diff --git a/string/strstr.c b/string/strstr.c
index a9dc312..ef45f82 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -1,5 +1,6 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003,2008 Free Software Foundation, Inc.
+   Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -40,11 +41,15 @@
 
 #undef strstr
 
+#ifndef STRSTR
+#define STRSTR strstr
+#endif
+
 /* Return the first occurrence of NEEDLE in HAYSTACK.  Return HAYSTACK
    if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
    HAYSTACK.  */
 char *
-strstr (const char *haystack_start, const char *needle_start)
+STRSTR (const char *haystack_start, const char *needle_start)
 {
   const char *haystack = haystack_start;
   const char *needle = needle_start;
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 71e85f0..5ce14aa 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -6,9 +6,11 @@ endif
 ifeq ($(subdir),string)
 sysdep_routines += stpncpy-c strncpy-c strncmp-c
 ifeq (yes,$(config-cflags-sse4))
-sysdep_routines += strcspn-c strpbrk-c strspn-c
+sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
 CFLAGS-strspn-c.c += -msse4
+CFLAGS-strstr.c += -msse4
+CFLAGS-strcasestr.c += -msse4
 endif
 endif
diff --git a/sysdeps/x86_64/multiarch/strcasestr-c.c b/sysdeps/x86_64/multiarch/strcasestr-c.c
new file mode 100644
index 0000000..e687953
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasestr-c.c
@@ -0,0 +1,18 @@
+#include "init-arch.h"
+
+#define STRCASESTR __strcasestr_sse2
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name) \
+  __hidden_ver1 (__strcasestr_sse2, __GI_strcasestr, __strcasestr_sse2);
+
+#include "string/strcasestr.c"
+
+extern char *__strcasestr_sse42 (const char *, const char *);
+
+#if 1
+libc_ifunc (__strcasestr,
+	    HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2);
+#else
+libc_ifunc (__strcasestr,
+	    0 ? __strcasestr_sse42 : __strcasestr_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcasestr.c b/sysdeps/x86_64/multiarch/strcasestr.c
new file mode 100644
index 0000000..064e3ef
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasestr.c
@@ -0,0 +1,3 @@
+#define USE_AS_STRCASESTR
+#define STRSTR_SSE42 __strcasestr_sse42
+#include "strstr.c"
diff --git a/sysdeps/x86_64/multiarch/strstr-c.c b/sysdeps/x86_64/multiarch/strstr-c.c
new file mode 100644
index 0000000..cff99b7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strstr-c.c
@@ -0,0 +1,12 @@
+#include "init-arch.h"
+
+#define STRSTR __strstr_sse2
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name) \
+  __hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
+
+#include "string/strstr.c"
+
+extern char *__strstr_sse42 (const char *, const char *);
+
+libc_ifunc (strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2);
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
new file mode 100644
index 0000000..bb42753
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -0,0 +1,483 @@
+/* strstr with SSE4.2 intrinsics
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <nmmintrin.h>
+
+#ifndef STRSTR_SSE42
+# define STRSTR_SSE42 __strstr_sse42
+#endif
+
+#ifdef USE_AS_STRCASESTR
+# include <ctype.h>
+# include <locale.h>
+# include <string.h>
+
+# define LOADBYTE(C)		tolower (C)
+# define CMPBYTE(C1, C2) \
+  ((C1) == (C2) || tolower (C1) == tolower (C2))
+#else
+# define LOADBYTE(C)		(C)
+# define CMPBYTE(C1, C2)	((C1) == (C2))
+#endif
+
+/* We use 0xe ordered-compare:
+	_SIDD_SBYTE_OPS
+	| _SIDD_CMP_EQUAL_ORDER
+	| _SIDD_LEAST_SIGNIFICANT
+   on pcmpistri to do the scanning and string comparsion requirements of
+   sub-string match.  In the scanning phase, we process Cflag and ECX
+   index to locate the first fragment match; once the first fragment
+   match position has been identified, we do comparison of subsequent
+   string fragments until we can conclude false or true match; whe
+   n concluding a false match, we may need to repeat scanning process
+   from next relevant offset in the target string.
+
+   In the scanning phase we have 4 cases:
+   case		ECX	CFlag	ZFlag	SFlag
+    1		16	  0	  0	  0
+    2a		16	  0	  0	  1
+    2b		16	  0	  1	  0
+    2c		16	  0	  1	  1
+
+   1. No ordered-comparison match, both 16B fragments are valid, so
+      continue to next fragment.
+   2. No ordered-comparison match, there is EOS in either fragment,
+   2a. Zflg = 0, Sflg = 1, we continue
+   2b. Zflg = 1, Sflg = 0, we conclude no match and return.
+   2c. Zflg = 1, sflg = 1, lenth determine match or no match
+
+   In the string comparison phase, the 1st fragment match is fixed up
+   to produce ECX = 0.  Subsequent fragment compare of nonzero index
+   and no match conclude a false match.
+
+   case		ECX	CFlag	ZFlag	SFlag
+    3		 X	  1	  0	  0/1
+    4a		 0  	  1	  0	  0
+    4b		 0  	  1	  0	  1
+    4c		0 < X  	  1	  0	  0/1
+    5		16 	  0	  1	  0
+
+   3. An initial ordered-comparison fragment match, we fix up to do
+      subsequent string comparison
+   4a. Continuation of fragment comparison of a string compare.
+   4b. EOS reached in the reference string, we conclude true match and
+       return
+   4c. String compare failed if index is nonzero, we need to go back to
+       scanning
+   5.  failed string compare, go back to scanning
+ */
+
+/* Fix-up of removal of unneeded data due to 16B aligned load
+   parameters:
+     value: 16B data loaded from 16B aligned address.
+     offset: Offset of target data address relative to 16B aligned load
+	     address.
+ */
+
+static __inline__ __m128i
+__m128i_shift_right (__m128i value, int offset)
+{
+  switch (offset)
+    {
+    case 1:
+      value = _mm_srli_si128 (value, 1);
+      break;
+    case 2:
+      value = _mm_srli_si128 (value, 2);
+      break;
+    case 3:
+      value = _mm_srli_si128 (value, 3);
+      break;
+    case 4:
+      value = _mm_srli_si128 (value, 4);
+      break;
+    case 5:
+      value = _mm_srli_si128 (value, 5);
+      break;
+    case 6:
+      value = _mm_srli_si128 (value, 6);
+      break;
+    case 7:
+      value = _mm_srli_si128 (value, 7);
+      break;
+    case 8:
+      value = _mm_srli_si128 (value, 8);
+      break;
+    case 9:
+      value = _mm_srli_si128 (value, 9);
+      break;
+    case 10:
+      value = _mm_srli_si128 (value, 10);
+      break;
+    case 11:
+      value = _mm_srli_si128 (value, 11);
+      break;
+    case 12:
+      value = _mm_srli_si128 (value, 12);
+      break;
+    case 13:
+      value = _mm_srli_si128 (value, 13);
+      break;
+    case 14:
+      value = _mm_srli_si128 (value, 14);
+      break;
+    case 15:
+      value = _mm_srli_si128 (value, 15);
+      break;
+    }
+  return value;
+}
+
+/* Simple replacement of movdqu to address 4KB boundary cross issue.
+   If EOS occurs within less than 16B before 4KB boundary, we don't
+   cross to next page.  */
+
+static __m128i
+__attribute__ ((section (".text.sse4.2")))
+__m128i_strloadu (const unsigned char * p)
+{
+  int offset = ((size_t) p & (16 - 1));
+
+  if (offset && (int) ((size_t) p & 0xfff) > 0xff0)
+    {
+      __m128i a = _mm_load_si128 ((__m128i *) (p - offset));
+      __m128i zero = _mm_setzero_si128 ();
+      int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero));
+      if ((bmsk >> offset) != 0)
+	return __m128i_shift_right (a, offset);
+    }
+  return _mm_loadu_si128 ((__m128i *) p);
+}
+
+#ifdef USE_AS_STRCASESTR
+
+/* Similar to __m128i_strloadu.  Convert to lower case for POSIX/C
+   locale.  */
+
+static __m128i
+__attribute__ ((section (".text.sse4.2")))
+__m128i_strloadu_tolower_posix (const unsigned char * p)
+{
+  __m128i frag = __m128i_strloadu (p);
+
+  /* Convert frag to lower case for POSIX/C locale.  */
+  __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41);
+  __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0);
+  __m128i mask1 = _mm_cmpistrm (rangeuc, frag, 0x44);
+  __m128i mask2 = _mm_blendv_epi8 (u2ldelta, frag, mask1);
+  mask2 = _mm_sub_epi8 (mask2, u2ldelta);
+  return  _mm_blendv_epi8 (frag, mask2, mask1);
+}
+
+/* Similar to __m128i_strloadu.  Convert to lower case for none-POSIX/C
+   locale.  */
+
+static __m128i
+__attribute__ ((section (".text.sse4.2")))
+__m128i_strloadu_tolower (const unsigned char * p)
+{
+  union
+    {
+      char b[16];
+      __m128i x;
+    } u;
+
+  for (int i = 0; i < 16; i++)
+    if (p[i] == 0)
+      {
+	u.b[i] = 0;
+	break;
+      }
+    else
+      u.b[i] = tolower (p[i]);
+
+  return u.x;
+}
+#endif
+
+/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
+   algorithm) overlap for a fully populated 16B vector.
+   Input parameter: 1st 16Byte loaded from the reference string of a
+		    strstr function.
+   We don't use KMP algorithm if reference string is less than 16B.
+ */
+
+static int
+__inline__ __attribute__ ((__always_inline__,))
+KMP16Bovrlap (__m128i s2)
+{
+  __m128i a, b;
+  int bmsk, k1;
+
+  b = _mm_unpacklo_epi8 (s2, s2);
+  a = _mm_unpacklo_epi8 (b, b);
+  a = _mm_shuffle_epi32 (a, 0);
+  b = _mm_srli_si128 (s2, sizeof (char));
+  bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (b, a));
+
+  /* _BitScanForward(&k1, bmsk); */
+  __asm ("bsfl %[bmsk], %[k1]" : [k1] "=r" (k1) : [bmsk] "r" (bmsk));
+  if (!bmsk)
+    return 16;
+  else if (bmsk == 0x7fff)
+    return 1;
+  else if (!k1)
+    {
+      /* There are al least two ditinct char in s2.  If byte 0 and 1 are
+	 idential and the distinct value lies farther down, we can deduce
+	 the next byte offset to restart full compare is least no earlier
+	 than byte 3.  */
+      return 3;
+    }
+  else
+    {
+      /* Byte 1 is not degenerated to byte 0.  */
+      return k1 + 1;
+    }
+}
+
+char *
+__attribute__ ((section (".text.sse4.2")))
+STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
+{
+  int len, len1;
+  const unsigned char *p1 = s1;
+  const unsigned char *p2 = s2;
+  __m128i frag1, frag2, zero;
+  int cmp, cmp_c, cmp_z, cmp_s;
+  int kmp_fwd, bmsk, bmsk1;
+  const unsigned char *pt;
+
+  if (!p2[0])
+    return (char *) p1;
+
+  if (!p1[0])
+    return NULL;
+
+  /* Check if p1 length is 1 byte long.  */
+  if (!p1[1])
+    return !p2[1] && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
+
+#ifdef USE_AS_STRCASESTR
+  __m128i (*strloadu) (const unsigned char *);
+  const char *used_locale = setlocale (LC_CTYPE, NULL);
+
+  if (!used_locale
+      || (used_locale[0] == 'C' && used_locale[1] == '\0')
+      || strcmp (used_locale, "POSIX") == 0)
+    strloadu = __m128i_strloadu_tolower_posix;
+  else
+    strloadu = __m128i_strloadu_tolower;
+#else
+# define strloadu __m128i_strloadu
+#endif
+
+  /* p1 > 1 byte long.  Load up to 16 bytes of fragment.  */
+  frag1 = strloadu (p1);
+
+  if (p2[1])
+    {
+      /* p2 is > 1 byte long.  */
+      frag2 = strloadu (p2);
+    }
+  else
+    {
+      zero = _mm_setzero_si128 ();
+      frag2 = _mm_insert_epi8 (zero, LOADBYTE(p2[0]), 0);
+    }
+
+  /* Unsigned bytes, equal order, does frag2 has null?  */
+  cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+  cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+  cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+  cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
+  if (cmp_s & cmp_c)
+    {
+      zero = _mm_setzero_si128 ();
+      bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero));
+      __asm ("bsfl %[bmsk], %[len]"
+	     : [len] "=r" (len) : [bmsk] "r" (bmsk));
+      p1 += cmp;
+      if ((len + cmp) <= 16)
+	return (char *) p1;
+      else
+	{
+	  /* Load up to 16 bytes of fragment.  */
+	  frag1 = strloadu (p1);
+	  cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+	  cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
+	  cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+	  cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+	  if ((len + cmp) <= 16)
+	    return (char *) p1 + cmp;
+	}
+    }
+
+  if (cmp_s)
+    {
+      /* Adjust addr for 16B alginment in ensuing loop.  */
+      while (!cmp_z)
+	{
+	  p1 += cmp;
+	  /* Load up to 16 bytes of fragment.  */
+	  frag1 = strloadu (p1);
+	  cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+	  cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+	  cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+	  /* Because s2 < 16 bytes and we adjusted p1 by non-zero cmp
+	     once already, this time cmp will be zero and we can exit.  */
+	  if ((!cmp) & cmp_c)
+	    break;
+	}
+
+      if (!cmp_c)
+	return NULL;
+      else
+	{
+	  /* Since s2 is less than 16 bytes, com_c is definitive
+	     determination of full match.  */
+	  return (char *) p1 + cmp;
+	}
+    }
+
+  /* General case, s2 is at least 16 bytes or more.
+     First, the common case of false-match at first byte of p2.  */
+  pt = NULL;
+  kmp_fwd = 0;
+re_trace:
+  while (!cmp_c)
+    {
+      /* frag1 has null. */
+      if (cmp_z)
+	return NULL;
+
+      /* frag 1 has no null, advance 16 bytes.  */
+      p1 += 16;
+      /* Load up to 16 bytes of fragment.  */
+      frag1 = strloadu (p1);
+      /* Unsigned bytes, equal order, is there a partial match?  */
+      cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+      cmp = _mm_cmpistri(frag2, frag1, 0x0c);
+      cmp_z = _mm_cmpistrz(frag2, frag1, 0x0c);
+    }
+
+  /* Next, handle inital positive match as first byte of p2.  We have
+     a partial fragment match, make full determination until we reached
+     end of s2.  */
+  if (!cmp)
+    {
+      if (cmp_z)
+	return (char *) p1;
+
+      pt = p1;
+      p1 += 16;
+      p2 += 16;
+      /* Load up to 16 bytes of fragment.  */
+      frag2 = strloadu(p2);
+    }
+  else
+    {
+      /* Adjust 16B alignment.  */
+      p1 += cmp;
+      pt = p1;
+    }
+
+  /* Load up to 16 bytes of fragment.  */
+  frag1 = strloadu (p1);
+
+  /* Unsigned bytes, equal order, does frag2 has null?  */
+  cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+  cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+  cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+  cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
+  while (!(cmp | cmp_z | cmp_s))
+    {
+      p1 += 16;
+      p2 += 16;
+      /* Load up to 16 bytes of fragment.  */
+      frag2 = strloadu (p2);
+      /* Load up to 16 bytes of fragment.  */
+      frag1 = strloadu (p1);
+      /* Unsigned bytes, equal order, does frag2 has null?  */
+      cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+      cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+      cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+      cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
+    }
+
+  /* Full determination yielded false result, retrace s1 to next
+     starting position.
+     Zflg	1      0      1			0/1
+     Sflg	0      1      1			0/1
+     cmp	na     0      0			>0
+     action   done   done   continue    continue if s2 < s1
+	      false  match  retrace s1     else false
+   */
+ 
+  if(cmp_s & !cmp)
+    return (char *) pt;
+  else if (cmp_z)
+    {
+      if (!cmp_s)
+	return NULL;
+
+      /* Handle both zero and sign flag set and s1 is shorter in
+	 length.  */
+      zero = _mm_setzero_si128 ();
+      bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2));
+      bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1));
+      __asm ("bsfl %[bmsk], %[len]"
+	     : [len] "=r" (len) : [bmsk] "r" (bmsk));
+      __asm ("bsfl %[bmsk1], %[len1]"
+	     : [len1] "=r" (len1) : [bmsk1] "r" (bmsk1));
+      if (len >= len1)
+	return NULL;
+    }
+  else if (!cmp)
+    return (char *) pt;
+
+  /* Otherwise, we have to retrace and continue.  Default of multiple
+     paths that need to retrace from next byte in s1.  */
+  p2 = s2;
+  frag2 = strloadu (p2);
+
+  if (!kmp_fwd)
+    kmp_fwd = KMP16Bovrlap (frag2);
+
+  /* KMP algorithm predicted overlap needs to be corrected for
+     partial fragment compare.  */
+  p1 = pt + (kmp_fwd > cmp ? cmp : kmp_fwd);
+
+  /* Since s2 is at least 16 bytes long, we're certain there is no
+     match.  */
+  if (!p1[0])
+    return NULL;
+  else
+    {
+      /* Load up to 16 bytes of fragment.  */
+      frag1 = strloadu (p1);
+    }
+
+  /* Unsigned bytes, equal order, is there a partial match?  */
+  cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
+  cmp = _mm_cmpistri (frag2, frag1, 0x0c);
+  cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
+  goto re_trace;
+}

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=63fbc91bec5623ab002c6e3ae1f3cd08a5f010fd

commit 63fbc91bec5623ab002c6e3ae1f3cd08a5f010fd
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Mon Jul 20 20:50:38 2009 -0700

    Use correct index for _NL_CTYPE_NONASCII_CASE.

diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index 2044fb7..420b08a 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -528,7 +528,7 @@ _nl_C_LC_CTYPE_width attribute_hidden =
 };
 
 /* Number of fields with fixed meanings, starting at 0.  */
-#define NR_FIXED 71
+#define NR_FIXED 72
 /* Number of class fields, starting at CLASS_OFFSET.  */
 #define NR_CLASSES 12
 /* Number of map fields, starting at MAP_OFFSET.  */
@@ -667,6 +667,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
     { .wstr = NULL },
     /* _NL_CTYPE_MAP_TO_NONASCII */
     { .word = 0 },
+    /* _NL_CTYPE_NONASCII_CASE */
+    { .word = 0 },
     /* NR_CLASSES wctype_tables */
     { .string = (const char *) _nl_C_LC_CTYPE_class_upper.header },
     { .string = (const char *) _nl_C_LC_CTYPE_class_lower.header },
@@ -682,8 +684,6 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
     { .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header },
     /* NR_MAPS wctrans_tables */
     { .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header },
-    { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header },
-    /* _NL_CTYPE_NONASCII_CASE */
-    { .word = 0 }
+    { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header }
   }
 };
diff --git a/locale/langinfo.h b/locale/langinfo.h
index d7ef6f6..c940c74 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -334,6 +334,7 @@ enum
   _NL_CTYPE_TRANSLIT_IGNORE_LEN,
   _NL_CTYPE_TRANSLIT_IGNORE,
   _NL_CTYPE_MAP_TO_NONASCII,
+  _NL_CTYPE_NONASCII_CASE,
   _NL_CTYPE_EXTRA_MAP_1,
   _NL_CTYPE_EXTRA_MAP_2,
   _NL_CTYPE_EXTRA_MAP_3,
@@ -348,7 +349,6 @@ enum
   _NL_CTYPE_EXTRA_MAP_12,
   _NL_CTYPE_EXTRA_MAP_13,
   _NL_CTYPE_EXTRA_MAP_14,
-  _NL_CTYPE_NONASCII_CASE,
   _NL_NUM_LC_CTYPE,
 
   /* LC_MONETARY category: formatting of monetary quantities.

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8a4494506d9175a2c205ff8d39dc58abd83682eb

commit 8a4494506d9175a2c205ff8d39dc58abd83682eb
Author: Ulrich Drepper <drepper@redhat.com>
Date:   Mon Jul 20 20:04:42 2009 -0700

    Check generated locale for non-ASCII 8-bit characters with case conversion.
    
    If a locale does not have 8-bit characters with case conversion which
    are different from the ASCII conversion (±0x20) then we can perform
    some optimizations.  These will follow later.

diff --git a/ChangeLog b/ChangeLog
index da91742..9d6b6d3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2009-07-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* locale/localeinfo.h (LIMAGIC): Update value for LC_CTYPE.
+	* locale/langinfo.h: Define _NL_CTYPE_NONASCII_CASE.
+	* locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializer for
+	_NL_CTYPE_NONASCII_CASE.
+	* locale/programs/ld-ctype.c (locale_ctype_t): Add nonascii_case
+	field.
+	(ctype_finish): Check whether there are any 8-bit characters outside
+	the range ASCII has or whether the mapping isn't the same as for
+	ASCII (±0x20).  Set nonascii_case appropriately.
+	(ctype_output): Add output handler for nonascii_case.
+
 2009-07-17  Ulrich Drepper  <drepper@redhat.com>
 
 	* sysdeps/generic/sysdep.h: Define cfi_personality, cfi_lsda,
diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index 85f3d2a..2044fb7 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2002, 2003, 2009 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
 
@@ -682,6 +682,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
     { .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header },
     /* NR_MAPS wctrans_tables */
     { .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header },
-    { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header }
+    { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header },
+    /* _NL_CTYPE_NONASCII_CASE */
+    { .word = 0 }
   }
 };
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 59017b3..d7ef6f6 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -348,6 +348,7 @@ enum
   _NL_CTYPE_EXTRA_MAP_12,
   _NL_CTYPE_EXTRA_MAP_13,
   _NL_CTYPE_EXTRA_MAP_14,
+  _NL_CTYPE_NONASCII_CASE,
   _NL_NUM_LC_CTYPE,
 
   /* LC_MONETARY category: formatting of monetary quantities.
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 3661080..19ea41a 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -1,5 +1,5 @@
 /* Declarations for internal libc locale interfaces
-   Copyright (C) 1995-2003, 2005, 2006, 2007, 2008
+   Copyright (C) 1995-2003, 2005, 2006, 2007, 2008, 2009
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -35,6 +35,8 @@
 #define	LIMAGIC(category) \
   (category == LC_COLLATE						\
    ? ((unsigned int) (0x20051014 ^ (category)))				\
+   : category == LC_CTYPE						\
+   ? ((unsigned int) (0x20090720 ^ (category)))				\
    : ((unsigned int) (0x20031115 ^ (category))))
 
 /* Two special weight constants for the collation data.  */
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index d4474bf..376a02c 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
 
@@ -181,6 +181,7 @@ struct locale_ctype_t
   size_t default_missing_lineno;
 
   uint32_t to_nonascii;
+  uint32_t nonascii_case;
 
   /* The arrays for the binary representation.  */
   char_class_t *ctype_b;
@@ -625,6 +626,27 @@ character <SP> not defined in character map")));
   else
     ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
 
+  /* Check whether all single-byte characters make to their upper/lowercase
+     equivalent according to the ASCII rules.  */
+  for (cnt = 'A'; cnt <= 'Z'; ++cnt)
+    {
+      uint32_t uppval = ctype->map256_collection[0][cnt];
+      uint32_t lowval = ctype->map256_collection[1][cnt];
+      uint32_t lowuppval = ctype->map256_collection[0][lowval];
+      uint32_t lowlowval = ctype->map256_collection[1][lowval];
+
+      if (uppval != cnt
+	  || lowval != cnt + 0x20
+	  || lowuppval != cnt
+	  || lowlowval != cnt + 0x20)
+	ctype->nonascii_case = 1;
+    }
+  for (cnt = 0; cnt < 256; ++cnt)
+    if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
+      if (ctype->map256_collection[0][cnt] != cnt
+	  || ctype->map256_collection[1][cnt] != cnt)
+	ctype->nonascii_case = 1;
+
   /* Now that the tests are done make sure the name array contains all
      characters which are handled in the WIDTH section of the
      character set definition file.  */
@@ -1045,6 +1067,9 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
 	  CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
 		      &ctype->to_nonascii, sizeof (uint32_t));
 
+	  CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
+		      &ctype->nonascii_case, sizeof (uint32_t));
+
 	  case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
 	    iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
 	    iov[2 + elem + offset].iov_len = sizeof (uint32_t);

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                               |   36 +++
 locale/C-ctype.c                        |    6 +-
 locale/langinfo.h                       |    1 +
 locale/localeinfo.h                     |    4 +-
 locale/programs/ld-ctype.c              |   27 ++-
 string/strcasestr.c                     |   10 +-
 string/strstr.c                         |    9 +-
 sysdeps/x86_64/multiarch/Makefile       |    4 +-
 sysdeps/x86_64/multiarch/strcasestr-c.c |   18 ++
 sysdeps/x86_64/multiarch/strcasestr.c   |    3 +
 sysdeps/x86_64/multiarch/strstr-c.c     |   12 +
 sysdeps/x86_64/multiarch/strstr.c       |  487 +++++++++++++++++++++++++++++++
 12 files changed, 608 insertions(+), 9 deletions(-)
 create mode 100644 sysdeps/x86_64/multiarch/strcasestr-c.c
 create mode 100644 sysdeps/x86_64/multiarch/strcasestr.c
 create mode 100644 sysdeps/x86_64/multiarch/strstr-c.c
 create mode 100644 sysdeps/x86_64/multiarch/strstr.c


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]