This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
Re: rawmemchr
Dave Korn <dave.korn <at> artimi.com> writes:
>
> Maybe you could show a few timing measurements from realistic testcases at
> this point in the discussion? I haven't felt comfortable trying to directly
> infer from instruction counts to real-time clock cycles any time in the past
> two decades myself, there are so many confounding factors these days that
> I'm not even confident of being able to validly reason about it any more.
Here's my test program:
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
static char *
mystrchr (const char *str, int c)
{
if (c)
{
/* Align str. */
while ((int)str & 0x3)
{
if (!*str)
return c ? NULL : (char *) str;
if (*str == c)
return (char *) str;
str++;
}
/* Operate a word at a time. */
int mask = c * 0x01010101;
int *ptr = (int *) str;
while (!DETECTNULL (*ptr) && !DETECTCHAR (*ptr, mask))
ptr++;
/* Found the end of string or word containing c. */
str = (const char *) str;
while (*str && *str != c)
str++;
if (*str == c)
return (char *) str;
return NULL;
}
/* Special case for finding NUL. */
/* Align str. */
while ((int)str & 0x3)
{
if (!*str)
return (char *) str;
str++;
}
/* Operate a word at a time. */
int *ptr = (int *) str;
while (!DETECTNULL (*ptr))
ptr++;
/* Found the end of string. */
str = (const char *) str;
while (*str)
str++;
return (char *) str;
}
int main (int argc, char **argv)
{
if (argc < 6)
{
printf ("usage: %s size fill offset repeat goal [func]\n", argv[0]);
return 1;
}
int size = strtol (argv[1], NULL, 0);
char *buf = malloc (size);
int offset = strtol (argv[3], NULL, 0);
int repeat = strtol (argv[4], NULL, 0);
int goal = strtol (argv[5], NULL, 0);
char *(*func)(const char *,int);
func = argc > 6 ? mystrchr : strchr;
memset (buf, strtol (argv[2], NULL, 0), size - 2);
buf[size - 2] = goal;
buf[size - 1] = 0;
char *expected = goal == *buf ? buf + offset : &buf[size - 2];
buf += offset;
while (repeat--)
assert (func (buf, goal) == expected);
buf -= offset;
free (buf);
return 0;
}
And some sample timings when compiled at -O2 under cygwin, with my
interpretation:
$ time ./foo 1000000 1 0 1000 2
real 0m0.594s
user 0m0.593s
sys 0m0.015s
$ time ./foo 1000000 1 0 1000 0
real 0m0.578s
user 0m0.593s
sys 0m0.015s
# The assembly version doesn't special case c==0
$ time ./foo 1000000 1 1 1000 2
real 0m0.969s
user 0m0.983s
sys 0m0.015s
$ time ./foo 1000000 1 1 1000 0
real 0m0.969s
user 0m0.983s
sys 0m0.015s
# The assembly version is nearly 40% slower if ptr is not aligned
$ time ./foo 1000000 1 0 1000 2 1
real 0m1.594s
user 0m1.608s
sys 0m0.015s
# C is 3x slower than the current assembly on aligned ptr
$ time ./foo 1000000 1 0 1000 0 1
real 0m0.922s
user 0m0.921s
sys 0m0.030s
# But my special casing of strchr(ptr,0) shows > 40% improvement!
$ time ./foo 1000000 1 1 1000 2 1
real 0m1.594s
user 0m1.608s
sys 0m0.015s
$ time ./foo 1000000 1 1 1000 0 1
real 0m0.921s
user 0m0.936s
sys 0m0.015s
# the C code does not slow down for unaligned str
# And my C code for strchr(unaligned,0) BEATS the current assembly!
--
Eric Blake