This is the mail archive of the binutils@sources.redhat.com mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] ld speedup 1/3 (suffix merge)


Hi,

On Wed, 10 Sep 2003, Alan Modra wrote:

> No need to wait for an assignment before posting a patch..

Ok then, here it is.  Background: During the last KDE conference in Nove
Hrady Lars and me sat together and profiled ld on our testcases which was
libqt itself and some support programs of it (assistant and designer).
With those results we changed the three biggest hotspots in ld to work
acceptable on debug and non-debug builds given the problem set that C++
objects usually create (many similar symbols due to name mangling, many
sections, big debug infos).

One of those changes was using a non-quadratic string suffix merge, patch
is below.  The result looks like so (binutils compiled without
optimizations, time for linking libqt.so, assistant and designer, ld.cvs
is the ld as it falls out of CVS HEAD, ld.strmerge is CVS with the below
patch):

ld.cvs, no debug infos in the binaries
libqt-mt.so  real    0m2.409s user    0m2.190s sys     0m0.220s
assistant    real    0m2.649s user    0m2.600s sys     0m0.050s
designer     real    0m3.425s user    0m3.350s sys     0m0.080s

ld.strmerge, no debug infos in the binaries
libqt-mt.so  real    0m1.600s user    0m1.350s sys     0m0.250s
assistant    real    0m2.666s user    0m2.660s sys     0m0.010s
designer     real    0m3.497s user    0m3.190s sys     0m0.160s

ld.cvs, binaries with debug info
libqt-mt.so  real    1m5.676s user    1m2.380s sys     0m1.280s
assistant    real    0m4.346s user    0m4.110s sys     0m0.090s
designer     real    0m24.263s user    0m19.700s sys     0m0.680s

ld.strmerge, binaries with debug info
libqt-mt.so  real    0m20.820s user    0m15.910s sys     0m1.450s
assistant    real    0m3.906s user    0m3.720s sys     0m0.110s
designer     real    0m12.919s user    0m9.090s sys     0m0.850s

With a debug build the objects don't fit completely in the disc cache,
hence the larger real time, that user time.  For entertainment here the
sizes of the three binaries in debug build:

     4713844 2003-09-09 17:11 assistant
    34832011 2003-09-09 17:11 designer
    64162740 2003-09-09 17:11 libqt-mt.so.3.2.1

This patch doesn't introduce any regressions (and we use the whole
patchset in our distribution since some time, additionally of having it
tested by some dozen KDE developers).  I'll cleanup and submit the other
two changes in some hours.  Copyright assignment of Lars and me should be
on the way.


Ciao,
Michael.

2003-09-09  Lars Knoll  <lars@trolltech.com>
            Michael Matz  <matz@suse.de>

        * merge.c (cmplengthentry, last4_eq, last_eq): Delete.
        (strrevcmp, is_suffix): New.
        (merge_strings): Use them to implement fast suffix merging.
        * elf-strtab.c (cmplengthentry, last4_eq): Delete.
        (strrevcmp, is_suffix): New.
        (_bfd_elf_strtab_finalize): Rework to implement fast suffix merging.

Index: merge.c
===================================================================
RCS file: /cvs/src/src/bfd/merge.c,v
retrieving revision 1.16
diff -u -p -r1.16 merge.c
--- merge.c	31 Aug 2003 10:07:46 -0000	1.16
+++ merge.c	9 Sep 2003 15:29:03 -0000
@@ -420,79 +420,6 @@ _bfd_merge_section (bfd *abfd, void **ps
   return FALSE;
 }

-/* Compare two sec_merge_hash_entry structures.  This is called via qsort.  */
-
-static int
-cmplengthentry (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = *(struct sec_merge_hash_entry **) a;
-  struct sec_merge_hash_entry * B = *(struct sec_merge_hash_entry **) b;
-
-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
-
-  return memcmp (A->root.string, B->root.string, A->len);
-}
-
-static int
-last4_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (memcmp (A->root.string + A->len - 5 * A->u.entsize,
-	      B->root.string + B->len - 5 * A->u.entsize,
-	      4 * A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 5 * A->u.entsize) == 0;
-}
-
-static int
-last_eq (const void *a, const void *b)
-{
-  struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
-  struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
-  if (B->len >= 5 * A->u.entsize)
-    /* Longer strings are just pushed into the hash table,
-       they'll be used when looking up for very short strings.  */
-    return 0;
-
-  if (memcmp (A->root.string + A->len - 2 * A->u.entsize,
-	      B->root.string + B->len - 2 * A->u.entsize,
-	      A->u.entsize) != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
-  if (A->len <= B->len)
-    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
-       not to be equal by the hash table.  */
-    return 0;
-
-  if (A->alignment < B->alignment
-      || ((A->len - B->len) & (B->alignment - 1)))
-    /* The suffix is not sufficiently aligned.  */
-    return 0;
-
-  return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 2 * A->u.entsize) == 0;
-}
-
 /* Record one section into the hash table.  */
 static bfd_boolean
 record_section (struct sec_merge_info *sinfo,
@@ -576,6 +503,41 @@ error_return:
   return FALSE;
 }

+static int
+strrevcmp (const void *a, const void *b)
+{
+  struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+  struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+
+  const unsigned char *s = A->root.string + A->len - A->u.entsize;
+  const unsigned char *t = B->root.string + B->len - B->u.entsize;
+
+  int l = A->len < B->len ? A->len : B->len;
+  l -= (A->u.entsize - 1);
+  while (l)
+    {
+      if (*s != *t)
+	return (int)*s - (int)*t;
+      s--;
+      t--;
+      l--;
+    }
+  return A->len - B->len;
+}
+
+static int
+is_suffix (const struct sec_merge_hash_entry *A,
+	   const struct sec_merge_hash_entry *B)
+{
+  if (A->len <= B->len)
+    /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
+       not to be equal by the hash table.  */
+    return 0;
+
+  return memcmp (A->root.string + (A->len - B->len),
+		 B->root.string, B->len - 1 * A->u.entsize) == 0;
+}
+
 /* This is a helper function for _bfd_merge_sections.  It attempts to
    merge strings matching suffixes of longer strings.  */
 static void
@@ -583,10 +545,9 @@ merge_strings (struct sec_merge_info *si
 {
   struct sec_merge_hash_entry **array, **a, **end, *e;
   struct sec_merge_sec_info *secinfo;
-  htab_t lasttab = NULL, last4tab = NULL;
   bfd_size_type size, amt;

-  /* Now sort the strings by length, longest first.  */
+  /* Now sort the strings */
   array = NULL;
   amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
   array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
@@ -599,86 +560,46 @@ merge_strings (struct sec_merge_info *si

   sinfo->htab->size = a - array;

-  qsort (array, (size_t) sinfo->htab->size,
-	 sizeof (struct sec_merge_hash_entry *), cmplengthentry);
+  /* The qsort predicate needs the entsize member.  */
+  for (a = array, end = array + sinfo->htab->size; a < end; a++)
+    {
+      e = *a;
+      e->u.entsize = sinfo->htab->entsize;
+    }

-  last4tab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-				NULL, last4_eq, NULL, calloc, free);
-  lasttab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
-			       NULL, last_eq, NULL, calloc, free);
-  if (lasttab == NULL || last4tab == NULL)
-    goto alloc_failure;
+  qsort (array, (size_t) sinfo->htab->size,
+	 sizeof (struct sec_merge_hash_entry *), strrevcmp);

-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
+  /* now loop over the sorted array and merge suffixes */
   for (a = array, end = array + sinfo->htab->size; a < end; a++)
     {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int i;
-      const unsigned char *s;
-      void **p;
-
       e = *a;
-      e->u.entsize = sinfo->htab->entsize;
-      if (e->len <= e->u.entsize)
-	break;
-      if (e->len > 4 * e->u.entsize)
+      if (e->len > e->u.entsize)
 	{
-	  s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-	  hash = 0;
-	  for (i = 0; i < 4 * e->u.entsize; i++)
-	    {
-	      c = *--s;
-	      hash += c + (c << 17);
-	      hash ^= hash >> 2;
-	    }
-	  p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-	  if (p == NULL)
-	    goto alloc_failure;
-	  if (*p)
+	  struct sec_merge_hash_entry **b = a+1;
+	  bfd_boolean found = FALSE;
+	  while (b < end)
 	    {
-	      struct sec_merge_hash_entry *ent;
-
-	      ent = (struct sec_merge_hash_entry *) *p;
-	      e->u.suffix = ent;
-	      e->alignment = 0;
-	      continue;
+	      struct sec_merge_hash_entry *cmp = *b;
+	      if (!is_suffix(cmp, e))
+		break;
+	      if (e->alignment >= cmp->alignment
+		  && !((e->len - cmp->len) & (cmp->alignment - 1)))
+		/* The suffix is sufficiently aligned.  */
+		{
+		  e->u.suffix = cmp;
+		  found = TRUE;
+		}
+	      ++b;
 	    }
-	  else
-	    *p = e;
-	}
-      s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
-      hash = 0;
-      for (i = 0; i < e->u.entsize; i++)
-	{
-	  c = *--s;
-	  hash += c + (c << 17);
-	  hash ^= hash >> 2;
-	}
-      p = htab_find_slot_with_hash (lasttab, e, hash, INSERT);
-      if (p == NULL)
-	goto alloc_failure;
-      if (*p)
-	{
-	  struct sec_merge_hash_entry *ent;
-
-	  ent = (struct sec_merge_hash_entry *) *p;
-	  e->u.suffix = ent;
-	  e->alignment = 0;
+	  if (found)
+	    e->alignment = 0;
 	}
-      else
-	*p = e;
     }

 alloc_failure:
   if (array)
     free (array);
-  if (lasttab)
-    htab_delete (lasttab);
-  if (last4tab)
-    htab_delete (last4tab);

   /* Now assign positions to the strings we want to keep.  */
   size = 0;
Index: elf-strtab.c
===================================================================
RCS file: /cvs/src/src/bfd/elf-strtab.c,v
retrieving revision 1.7
diff -u -p -r1.7 elf-strtab.c
--- elf-strtab.c	7 Aug 2003 07:25:34 -0000	1.7
+++ elf-strtab.c	9 Sep 2003 15:29:03 -0000
@@ -256,37 +256,37 @@ _bfd_elf_strtab_emit (register bfd *abfd
 /* Compare two elf_strtab_hash_entry structures.  This is called via qsort.  */

 static int
-cmplengthentry (const void *a, const void *b)
+strrevcmp(const void *a, const void *b)
 {
   struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a;
   struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b;

-  if (A->len < B->len)
-    return 1;
-  else if (A->len > B->len)
-    return -1;
+  const unsigned char *s = A->root.string + A->len - 1;
+  const unsigned char *t = B->root.string + B->len - 1;

-  return memcmp (A->root.string, B->root.string, A->len);
+  int l = A->len < B->len ? A->len : B->len;
+  while (l)
+    {
+      if (*s != *t)
+	return (int)*s - (int)*t;
+      s--;
+      t--;
+      l--;
+    }
+  return A->len - B->len;
 }

 static int
-last4_eq (const void *a, const void *b)
+is_suffix(const struct elf_strtab_hash_entry *A,
+	  const struct elf_strtab_hash_entry *B)
 {
-  const struct elf_strtab_hash_entry *A = a;
-  const struct elf_strtab_hash_entry *B = b;
-
-  if (memcmp (A->root.string + A->len - 5, B->root.string + B->len - 5, 4)
-      != 0)
-    /* This was a hashtable collision.  */
-    return 0;
-
   if (A->len <= B->len)
     /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
        not to be equal by the hash table.  */
     return 0;

   return memcmp (A->root.string + (A->len - B->len),
-		 B->root.string, B->len - 5) == 0;
+		 B->root.string, B->len - 1) == 0;
 }

 /* This function assigns final string table offsets for used strings,
@@ -296,9 +296,7 @@ void
 _bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
 {
   struct elf_strtab_hash_entry **array, **a, **end, *e;
-  htab_t last4tab = NULL;
   bfd_size_type size, amt;
-  struct elf_strtab_hash_entry *last[256], **last_ptr[256];

   /* GCC 2.91.66 (egcs-1.1.2) on i386 miscompiles this function when i is
      a 64-bit bfd_size_type: a 64-bit target or --enable-64-bit-bfd.
@@ -306,16 +304,13 @@ _bfd_elf_strtab_finalize (struct elf_str
      cycles.  */
   size_t i;

-  /* Now sort the strings by length, longest first.  */
+  /* Now sort the strings by suffix and length.  */
   array = NULL;
   amt = tab->size * sizeof (struct elf_strtab_hash_entry *);
   array = bfd_malloc (amt);
   if (array == NULL)
     goto alloc_failure;

-  memset (last, 0, sizeof (last));
-  for (i = 0; i < 256; ++i)
-    last_ptr[i] = &last[i];
   for (i = 1, a = array; i < tab->size; ++i)
     if (tab->array[i]->refcount)
       *a++ = tab->array[i];
@@ -324,80 +319,33 @@ _bfd_elf_strtab_finalize (struct elf_str

   size = a - array;

-  qsort (array, size, sizeof (struct elf_strtab_hash_entry *), cmplengthentry);
+  qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp);

-  last4tab = htab_create_alloc (size * 4, NULL, last4_eq, NULL, calloc, free);
-  if (last4tab == NULL)
-    goto alloc_failure;
-
-  /* Now insert the strings into hash tables (strings with last 4 characters
-     and strings with last character equal), look for longer strings which
-     we're suffix of.  */
+  /* now loop over the sorted array and merge suffixes */
   for (a = array, end = array + size; a < end; a++)
     {
-      register hashval_t hash;
-      unsigned int c;
-      unsigned int j;
-      const unsigned char *s;
-      void **p;
-
       e = *a;
-      if (e->len > 4)
-	{
-	  s = e->root.string + e->len - 1;
-	  hash = 0;
-	  for (j = 0; j < 4; j++)
-	    {
-	      c = *--s;
-	      hash += c + (c << 17);
-	      hash ^= hash >> 2;
-	    }
-	  p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
-	  if (p == NULL)
-	    goto alloc_failure;
-	  if (*p)
-	    {
-	      struct elf_strtab_hash_entry *ent;
-
-	      ent = *p;
-	      e->u.suffix = ent;
-	      e->len = 0;
-	      continue;
-	    }
-	  else
-	    *p = e;
-	}
-      else
+      if (e->len > 1)
 	{
-	  struct elf_strtab_hash_entry *tem;
-
-	  c = e->root.string[e->len - 2] & 0xff;
-
-	  for (tem = last[c]; tem; tem = tem->u.next)
-	    if (tem->len > e->len
-		&& memcmp (tem->root.string + (tem->len - e->len),
-			   e->root.string, e->len - 1) == 0)
-	      break;
-	  if (tem)
+	  struct elf_strtab_hash_entry **b = a+1;
+	  int found = 0;
+	  while (b < end)
 	    {
-	      e->u.suffix = tem;
-	      e->len = 0;
-	      continue;
+	      struct elf_strtab_hash_entry *cmp = *b;
+	      if (!is_suffix(cmp, e))
+	        break;
+	      e->u.suffix = cmp;
+	      found = 1;
+	      ++b;
 	    }
+	  if (found)
+	    e->len = 0;
 	}
-
-      c = e->root.string[e->len - 2] & 0xff;
-      /* Put longest strings first.  */
-      *last_ptr[c] = e;
-      last_ptr[c] = &e->u.next;
-      e->u.next = NULL;
     }

 alloc_failure:
   if (array)
     free (array);
-  if (last4tab)
-    htab_delete (last4tab);

   /* Now assign positions to the strings we want to keep.  */
   size = 1;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]