This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] DT_GNU_HASH: ~ 50% dynamic linking improvement


On Wed, Jun 28, 2006 at 11:45:44AM -0700, Paul Eggert wrote:
> Jakub Jelinek <jakub@redhat.com> writes:
> 
> > (Dan Bernstein's string hash function posted eons ago on comp.lang.c.)
> 
> Bernstein now prefers XOR, i.e.,
> "h = h * 33 ^ c;" instead of
> "h = h * 33 + c;".  Did you try that as well?
> Several sources indicate it's a bit better, e.g.,
> <http://eternallyconfuzzled.com/tuts/hashing.html#djb2>.
> 
> > We have tested a bunch of different hash functions
> 
> Which hash functions did you try?  One-at-a-time?  FNV?  You'll
> probably get hassled by hash triviists (like me :-) no matter which
> function you choose, but you can forstall that to some extent by
> mentioning which functions you tested.

Ok, added a few further functions from the URL you referenced (previously
just had sysv, libiberty, dcache, djb2 and sdbm).
The number of collisions in the 537403 symbols is:
name		2sym collision #	3sym collision #	more than 3sym collision #
sysv		1749			5
libiberty	42
dcache		37
djb2		29
sdbm		39
djb3		31
rot		337			39			61
sax		34
fnv		40
oat		30
Speed and number of collisions on the set of all symbols with
full 32-bit hash values (i.e. not modulo nbuckets) are the most important
factors in this case, as handling such collisions is expensive.
Also, djb2 (i.e. Bernstein with +) gave up max common prefix len
3, while djb3 gives 4, fnv 4 as well, oat and sax too.

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <sys/types.h>

unsigned int
elf_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int g, h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    {
      h = (h << 4) + ch;
      if ((g = (h & 0xf0000000)) != 0)
        {
          h ^= g >> 24;
          h ^= g;
        }
    }
  return h;
}

unsigned int
libiberty_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    h = h * 67 + ch - 113;

  return h;
}

unsigned int
dcache_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    h = (h + (ch << 4) + (ch >> 4)) * 11;

  return h;
}

unsigned int
djb2_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 5381;
  int ch;

  while ((ch = *name++) != '\0')
    h = (h << 5) + h + ch;

  return h;
}

unsigned int
sdbm_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    h = ch + (h << 6) + (h << 16) - h;

  return h;
}

unsigned int
djb3_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 5381;
  int ch;

  while ((ch = *name++) != '\0')
    h = ((h << 5) + h) ^ ch;

  return h;
}

unsigned int
rot_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    h = ch ^ (h << 4) ^ (h >> 28);

  return h;
}

unsigned int
sax_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 0;
  int ch;

  while ((ch = *name++) != '\0')
    h ^= ch ^ (h << 5) + (h >> 2);

  return h;
}

unsigned int
fnv_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 2166136261;
  int ch;

  while ((ch = *name++) != '\0')
    h = (h * 16777619) ^ ch;

  return h;
}

unsigned int
oat_hash (const char *namearg)
{
  const unsigned char *name = (const unsigned char *) namearg;
  unsigned int h = 2166136261;
  int ch;

  while ((ch = *name++) != '\0')
    {
      h += ch;
      h += (h << 10);
      h ^= (h >> 6);
    }

  h += (h << 3);
  h ^= (h >> 11);
  h += (h << 15);

  return h;
}

int
main (int argc, char **argv)
{
  char *line = NULL;
  size_t len = 0;
  ssize_t n;
  unsigned int (*hash) (const char *);
  if (strcmp (argv[1], "sysv") == 0)
    hash = elf_hash;
  else if (strcmp (argv[1], "libiberty") == 0)
    hash = libiberty_hash;
  else if (strcmp (argv[1], "dcache") == 0)
    hash = dcache_hash;
  else if (strcmp (argv[1], "djb2") == 0)
    hash = djb2_hash;
  else if (strcmp (argv[1], "sdbm") == 0)
    hash = sdbm_hash;
  else if (strcmp (argv[1], "djb3") == 0)
    hash = djb3_hash;
  else if (strcmp (argv[1], "rot") == 0)
    hash = rot_hash;
  else if (strcmp (argv[1], "sax") == 0)
    hash = sax_hash;
  else if (strcmp (argv[1], "fnv") == 0)
    hash = fnv_hash;
  else if (strcmp (argv[1], "oat") == 0)
    hash = oat_hash;
  else
    return 1;
  while ((n = getline (&line, &len, stdin)) > 0)
    {
      if (line[n - 1] == '\n')
	line[n - 1] = '\0';
      printf ("%08x %s\n", hash (line), line);
    }
  return 0;
}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]