This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: rawmemchr


Dave Korn <dave.korn <at> artimi.com> writes:

> 
>   Maybe you could show a few timing measurements from realistic testcases at
> this point in the discussion?  I haven't felt comfortable trying to directly
> infer from instruction counts to real-time clock cycles any time in the past
> two decades myself, there are so many confounding factors these days that
> I'm not even confident of being able to validly reason about it any more.

Here's my test program:

#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))

static char *
mystrchr (const char *str, int c)
{
  if (c)
    {
      /* Align str.  */
      while ((int)str & 0x3)
        {
          if (!*str)
            return c ? NULL : (char *) str;
          if (*str == c)
            return (char *) str;
          str++;
        }
      /* Operate a word at a time.  */
      int mask = c * 0x01010101;
      int *ptr = (int *) str;
      while (!DETECTNULL (*ptr) && !DETECTCHAR (*ptr, mask))
        ptr++;
      /* Found the end of string or word containing c.  */
      str = (const char *) str;
      while (*str && *str != c)
        str++;
      if (*str == c)
        return (char *) str;
      return NULL;
    }
  /* Special case for finding NUL.  */
  /* Align str.  */
  while ((int)str & 0x3)
    {
      if (!*str)
        return (char *) str;
      str++;
    }
  /* Operate a word at a time.  */
  int *ptr = (int *) str;
  while (!DETECTNULL (*ptr))
    ptr++;
  /* Found the end of string.  */
  str = (const char *) str;
  while (*str)
    str++;
  return (char *) str;
}

int main (int argc, char **argv)
{
  if (argc < 6)
    {
      printf ("usage: %s size fill offset repeat goal [func]\n", argv[0]);
      return 1;
    }
  int size = strtol (argv[1], NULL, 0);
  char *buf = malloc (size);
  int offset = strtol (argv[3], NULL, 0);
  int repeat = strtol (argv[4], NULL, 0);
  int goal = strtol (argv[5], NULL, 0);
  char *(*func)(const char *,int);
  func = argc > 6 ? mystrchr : strchr;

  memset (buf, strtol (argv[2], NULL, 0), size - 2);
  buf[size - 2] = goal;
  buf[size - 1] = 0;

  char *expected = goal == *buf ? buf + offset : &buf[size - 2];
  buf += offset;
  while (repeat--)
    assert (func (buf, goal) == expected);
  buf -= offset;
  free (buf);
  return 0;
}


And some sample timings when compiled at -O2 under cygwin, with my 
interpretation:

$ time ./foo 1000000 1 0 1000 2

real	0m0.594s
user	0m0.593s
sys	0m0.015s

$ time ./foo 1000000 1 0 1000 0

real	0m0.578s
user	0m0.593s
sys	0m0.015s

# The assembly version doesn't special case c==0

$ time ./foo 1000000 1 1 1000 2

real	0m0.969s
user	0m0.983s
sys	0m0.015s

$ time ./foo 1000000 1 1 1000 0

real	0m0.969s
user	0m0.983s
sys	0m0.015s

# The assembly version is nearly 40% slower if ptr is not aligned

$ time ./foo 1000000 1 0 1000 2 1

real	0m1.594s
user	0m1.608s
sys	0m0.015s

# C is 3x slower than the current assembly on aligned ptr

$ time ./foo 1000000 1 0 1000 0 1

real	0m0.922s
user	0m0.921s
sys	0m0.030s

# But my special casing of strchr(ptr,0) shows > 40% improvement!

$ time ./foo 1000000 1 1 1000 2 1

real	0m1.594s
user	0m1.608s
sys	0m0.015s

$ time ./foo 1000000 1 1 1000 0 1

real	0m0.921s
user	0m0.936s
sys	0m0.015s

# the C code does not slow down for unaligned str
# And my C code for strchr(unaligned,0) BEATS the current assembly!

-- 
Eric Blake




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]