This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize strstr, strcasestr and memmem


This patch optimizes strstr, strcasestr and memmem functions.  This patch speeds up strstr, strcase and memmem functions with short needle inputs by more than 2 times on i686, x86_64, MIPS and other architectures.

GLIBC 2.9 introduced new, algorithmically-superior implementation of strstr, strcasestr and memmem functions. Unfortunately, this new implementation uses memchr to detect end-of-string condition which comes at significant overhead compared to piggy-backing matching procedure that GLIBC 2.8 and earlier versions used.  The new implementation heavily regressed the case for short needles, making strstr more than 2 times slower.  This patch cures the regression and even makes the GLIBC 2.9+ implementation faster than original GLIBC 2.8- version.

This patch adds several optimizations:

- Piggy-back the matching procedure to detect end-of-string for strstr and strcasestr when needle is short.  This removes the need to use memchr in two_way_short_needle.  [The long-needle case hops through parts of the string, and the same optimization cannot be used there as not every haystack character is read in by the matching loop.]

- Optimize search for the first character in strstr, strcasestr and memmem functions.  This is the hot-spot of the functions.

- Use pointer references instead of array[index] references.  This optimization makes it easier for compiler to generate good code, especially on architectures that don't have base+index addressing.

- Use straight tolower() for strcasestr (complexity of isupper() is about the same as tolower()).

Tested on i686-linux-gnu and x86_64-linux-gnu with no regressions.

OK to apply?

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics


	2012-05-18  Maxim Kuvyrkov  <maxim@codesourcery.com>
	* string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0,)
	(AVAILABLE_USES_J): New macros, define their defaults.
	(two_way_short_needle): Use pointers for traversing arrays, detect
	end-of-string on-the-fly.  Use optimized first-character-loop.
	(two_way_long_needle): Use pointers for traversing arrays.
	* string/strcasestr.c, string/strstr.c (AVAILABLE): Update.
	(AVAILABLE1, AVAILABLE2, RET0_IF_0, AVAILABLE_USES_J): Define.
	* string/strcasestr.c (TOLOWER): Make auto-increment safe.
	* string/bug-strcasestr1.c: New test.
	* string/Makefile: Run it.
---
 string/Makefile          |    3 +-
 string/bug-strcasestr1.c |   21 ++++++++
 string/str-two-way.h     |  129 ++++++++++++++++++++++++++++++++++++++--------
 string/strcasestr.c      |    8 ++-
 string/strstr.c          |    6 ++-
 5 files changed, 142 insertions(+), 25 deletions(-)
 create mode 100644 string/bug-strcasestr1.c

diff --git a/string/Makefile b/string/Makefile
index 80923a2..fb97ce4 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -56,7 +56,7 @@ tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   tst-strtok tst-strxfrm bug-strcoll1 tst-strfry	\
 		   bug-strtok1 $(addprefix test-,$(strop-tests))	\
 		   bug-envz1 tst-strxfrm2 tst-endian tst-svc2		\
-		   bug-strstr1 bug-strchr1
+		   bug-strstr1 bug-strcasestr1 bug-strchr1
 
 
 include ../Rules
@@ -74,6 +74,7 @@ CFLAGS-stratcliff.c = -fno-builtin
 CFLAGS-test-ffs.c = -fno-builtin
 CFLAGS-tst-inlcall.c = -fno-builtin
 CFLAGS-bug-strstr1.c = -fno-builtin
+CFLAGS-bug-strcasestr1.c = -fno-builtin
 
 ifeq ($(cross-compiling),no)
 tests: $(objpfx)tst-svc.out
diff --git a/string/bug-strcasestr1.c b/string/bug-strcasestr1.c
new file mode 100644
index 0000000..16c77c3
--- /dev/null
+++ b/string/bug-strcasestr1.c
@@ -0,0 +1,21 @@
+#include <stdio.h>
+#include <string.h>
+
+#define TEST_FUNCTION do_test ()
+static int
+do_test (void)
+{
+  const char haystack[] = "AOKB";
+  const char needle[] = "OK";
+  const char *sub = strcasestr (haystack, needle);
+
+  if (sub == NULL)
+    {
+      fprintf (stderr, "BUG: didn't find \"%s\" in \"%s\"\n", needle, haystack);
+      return 1;
+    }
+
+  return 0;
+}
+
+#include "../test-skeleton.c"
diff --git a/string/str-two-way.h b/string/str-two-way.h
index 1b2a8bd..a826741 100644
--- a/string/str-two-way.h
+++ b/string/str-two-way.h
@@ -1,5 +1,5 @@
 /* Byte-wise substring search, using the Two-Way algorithm.
-   Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2008-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Eric Blake <ebb9@byu.net>, 2008.
 
@@ -78,6 +78,19 @@
 # define CMP_FUNC memcmp
 #endif
 
+#ifndef AVAILABLE1
+# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#endif
+#ifndef AVAILABLE2
+# define AVAILABLE2(h, h_l, j, n_l) (1)
+#endif
+#ifndef RET0_IF_0
+# define RET0_IF_0(a) /* nothing */
+#endif
+#ifndef AVAILABLE_USES_J
+# define AVAILABLE_USES_J (1)
+#endif
+
 /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
    Return the index of the first byte in the right half, and set
    *PERIOD to the global period of the right half.
@@ -233,17 +246,24 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Scan for matches in right half.  */
 	  i = MAX (suffix, memory);
-	  while (i < needle_len && (CANON_ELEMENT (needle[i])
-				    == CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len && (CANON_ELEMENT (*pneedle++)
+				    == CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (memory < i + 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+					== CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i + 1 < memory + 1)
 		return (RETURN_TYPE) (haystack + j);
@@ -261,32 +281,81 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
     }
   else
     {
+      const unsigned char *phaystack = &haystack[suffix];
+      /* The comparison always starts from needle[suffix], so cache it
+	 and use an optimized first-character loop.  */
+      unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]);
+
       /* The two halves of needle are distinct; no extra memory is
 	 required, and any mismatch results in a maximal shift.  */
       period = MAX (suffix, needle_len - suffix) + 1;
       j = 0;
-      while (AVAILABLE (haystack, haystack_len, j, needle_len))
+      while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  unsigned char a;
+
+	  /* TODO: The first-character loop can be sped up by adapting
+	     longword-at-a-time implementation of memchr/strchr.  */
+	  if (needle_suffix
+	      != (a = CANON_ELEMENT (*phaystack++)))
+	    {
+#if AVAILABLE_USES_J
+	      ++j;
+#endif
+	      RET0_IF_0 (a);
+	      continue;
+	    }
+
+#if !AVAILABLE_USES_J
+	  /* Calculate J if it wasn't kept up-to-date in the first-character
+	     loop.  */
+	  j = phaystack - &haystack[suffix] - 1;
+#endif
+
 	  /* Scan for matches in right half.  */
-	  i = suffix;
-	  while (i < needle_len && (CANON_ELEMENT (needle[i])
-				    == CANON_ELEMENT (haystack[i + j])))
-	    ++i;
+	  i = suffix + 1;
+	  pneedle = &needle[i];
+	  while (i < needle_len)
+	    {
+	      if (CANON_ELEMENT (*pneedle++)
+		  != (a = CANON_ELEMENT (*phaystack++)))
+		{
+		  RET0_IF_0 (a);
+		  break;
+		}
+	      ++i;
+	    }
 	  if (needle_len <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
-				       == CANON_ELEMENT (haystack[i + j])))
-		--i;
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (i != SIZE_MAX)
+		{
+		  if (CANON_ELEMENT (*pneedle--)
+		      != (a = CANON_ELEMENT (*phaystack--)))
+		    {
+		      RET0_IF_0 (a);
+		      break;
+		    }
+		  --i;
+		}
 	      if (i == SIZE_MAX)
 		return (RETURN_TYPE) (haystack + j);
 	      j += period;
 	    }
 	  else
 	    j += i - suffix + 1;
+
+	  if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
+	    break;
+
+	  phaystack = &haystack[suffix + j];
 	}
     }
+ ret0: __attribute__ ((unused))
   return NULL;
 }
 
@@ -338,6 +407,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Check the last byte first; if it does not match, then
 	     shift to the next possible match location.  */
 	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
@@ -357,15 +429,19 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 	  /* Scan for matches in right half.  The last byte has
 	     already been matched, by virtue of the shift table.  */
 	  i = MAX (suffix, memory);
-	  while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+					== CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len - 1 <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (memory < i + 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (memory < i + 1 && (CANON_ELEMENT (*pneedle--)
+					== CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i + 1 < memory + 1)
 		return (RETURN_TYPE) (haystack + j);
@@ -390,6 +466,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
       j = 0;
       while (AVAILABLE (haystack, haystack_len, j, needle_len))
 	{
+	  const unsigned char *pneedle;
+	  const unsigned char *phaystack;
+
 	  /* Check the last byte first; if it does not match, then
 	     shift to the next possible match location.  */
 	  shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
@@ -401,15 +480,19 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 	  /* Scan for matches in right half.  The last byte has
 	     already been matched, by virtue of the shift table.  */
 	  i = suffix;
-	  while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
-					== CANON_ELEMENT (haystack[i + j])))
+	  pneedle = &needle[i];
+	  phaystack = &haystack[i + j];
+	  while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++)
+					== CANON_ELEMENT (*phaystack++)))
 	    ++i;
 	  if (needle_len - 1 <= i)
 	    {
 	      /* Scan for matches in left half.  */
 	      i = suffix - 1;
-	      while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
-				       == CANON_ELEMENT (haystack[i + j])))
+	      pneedle = &needle[i];
+	      phaystack = &haystack[i + j];
+	      while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--)
+				       == CANON_ELEMENT (*phaystack--)))
 		--i;
 	      if (i == SIZE_MAX)
 		return (RETURN_TYPE) (haystack + j);
@@ -423,6 +506,10 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
 }
 
 #undef AVAILABLE
+#undef AVAILABLE1
+#undef AVAILABLE2
+#undef AVAILABLE_USES_J
 #undef CANON_ELEMENT
 #undef CMP_FUNC
+#undef RET0_IF_0
 #undef RETURN_TYPE
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 9e1bde9..9848d10 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -1,5 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994, 1996-2000, 2004, 2008, 2009, 2010
+   Copyright (C) 1994-2012
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -37,13 +37,17 @@
 #include <stdbool.h>
 #include <strings.h>
 
-#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+#define TOLOWER(Ch) tolower (Ch)
 
 /* Two-Way algorithm.  */
 #define RETURN_TYPE char *
 #define AVAILABLE(h, h_l, j, n_l)			\
   (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
    && ((h_l) = (j) + (n_l)))
+#define AVAILABLE1(h, h_l, j, n_l) (true)
+#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#define RET0_IF_0(a) if (!a) goto ret0
+#define AVAILABLE_USES_J (0)
 #define CANON_ELEMENT(c) TOLOWER (c)
 #define CMP_FUNC(p1, p2, l)				\
   __strncasecmp ((const char *) (p1), (const char *) (p2), l)
diff --git a/string/strstr.c b/string/strstr.c
index 10e6fdc..94d071d 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -1,5 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
+   Copyright (C) 1994-2012
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -36,6 +36,10 @@
 #define AVAILABLE(h, h_l, j, n_l)			\
   (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
    && ((h_l) = (j) + (n_l)))
+#define AVAILABLE1(h, h_l, j, n_l) (true)
+#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
+#define RET0_IF_0(a) if (!a) goto ret0
+#define AVAILABLE_USES_J (0)
 #include "str-two-way.h"
 
 #undef strstr
-- 
1.7.4.1




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]