This is the mail archive of the
libc-help@sourceware.org
mailing list for the glibc project.
[RFC] possible strcmp speedup
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-help at sourceware dot org
- Date: Sun, 15 Jul 2012 17:44:44 +0200
- Subject: [RFC] possible strcmp speedup
Hello,
strcmp is most of time decided by first 16 characters.
This could be tested faster with following implementation.
I do not know how this is applicable for loop. A posible way could be
read a aligned and b unaligned and handle b page boundaries separately.
#define USE_SSE2
#include "vector.h"
int strcmp2(uchar *a,uchar *b){
/*Test if we cross page. Assuming addresses are uniformly random
probability of that a|b is false positive is 1/16*/
if (__builtin_expect((((size_t)a)|((size_t)b))%4096>=4096-sizeof(tp_vector),0))
return strcmp(a,b);
/* For pair of characters following cases can happen.
xi==yi!=0 xi==yi==0 xi!=yi
si=255 si=255 si=0
mi!=0 mi=0 mi=0
ri=0 ri=255 ri=255
*/
tp_vector x=LOAD_UNALIGNED(a);
tp_vector y=LOAD_UNALIGNED(b);
tp_vector s=TEST_EQ(x,y);
tp_vector m=AND(s,x);
tp_vector r=TEST_EQ(m,BROADCAST(0));
tp_mask mask =get_mask(r);
int first=__builtin_ctz(mask);
if(mask){
#ifdef AS_STRCASECMP
/*If characters differ they are probably caselessly diferent.*/
uchar ac=tolower(a[first]),bc=tolower(b[first]);
if(!ac) return ac-bc;
if(ac!=bc) return ac-bc;
return strcmp(a+first,b+first);
#else
return a[first]-b[first];
#endif
}
return strcmp(a+sizeof(tp_vector),b+sizeof(tp_vector));
}