This is the mail archive of the
binutils@sources.redhat.com
mailing list for the binutils project.
[patch] ld speedup 1/3 (suffix merge)
- From: Michael Matz <matz at suse dot de>
- To: Alan Modra <amodra at bigpond dot net dot au>, <binutils at sources dot redhat dot com>
- Cc: Lars Knoll <lars at trolltech dot com>
- Date: Tue, 9 Sep 2003 18:06:06 +0200 (CEST)
- Subject: [patch] ld speedup 1/3 (suffix merge)
Hi,
On Wed, 10 Sep 2003, Alan Modra wrote:
> No need to wait for an assignment before posting a patch..
Ok then, here it is. Background: During the last KDE conference in Nove
Hrady Lars and me sat together and profiled ld on our testcases which was
libqt itself and some support programs of it (assistant and designer).
With those results we changed the three biggest hotspots in ld to work
acceptable on debug and non-debug builds given the problem set that C++
objects usually create (many similar symbols due to name mangling, many
sections, big debug infos).
One of those changes was using a non-quadratic string suffix merge, patch
is below. The result looks like so (binutils compiled without
optimizations, time for linking libqt.so, assistant and designer, ld.cvs
is the ld as it falls out of CVS HEAD, ld.strmerge is CVS with the below
patch):
ld.cvs, no debug infos in the binaries
libqt-mt.so real 0m2.409s user 0m2.190s sys 0m0.220s
assistant real 0m2.649s user 0m2.600s sys 0m0.050s
designer real 0m3.425s user 0m3.350s sys 0m0.080s
ld.strmerge, no debug infos in the binaries
libqt-mt.so real 0m1.600s user 0m1.350s sys 0m0.250s
assistant real 0m2.666s user 0m2.660s sys 0m0.010s
designer real 0m3.497s user 0m3.190s sys 0m0.160s
ld.cvs, binaries with debug info
libqt-mt.so real 1m5.676s user 1m2.380s sys 0m1.280s
assistant real 0m4.346s user 0m4.110s sys 0m0.090s
designer real 0m24.263s user 0m19.700s sys 0m0.680s
ld.strmerge, binaries with debug info
libqt-mt.so real 0m20.820s user 0m15.910s sys 0m1.450s
assistant real 0m3.906s user 0m3.720s sys 0m0.110s
designer real 0m12.919s user 0m9.090s sys 0m0.850s
With a debug build the objects don't fit completely in the disc cache,
hence the larger real time, that user time. For entertainment here the
sizes of the three binaries in debug build:
4713844 2003-09-09 17:11 assistant
34832011 2003-09-09 17:11 designer
64162740 2003-09-09 17:11 libqt-mt.so.3.2.1
This patch doesn't introduce any regressions (and we use the whole
patchset in our distribution since some time, additionally of having it
tested by some dozen KDE developers). I'll cleanup and submit the other
two changes in some hours. Copyright assignment of Lars and me should be
on the way.
Ciao,
Michael.
2003-09-09 Lars Knoll <lars@trolltech.com>
Michael Matz <matz@suse.de>
* merge.c (cmplengthentry, last4_eq, last_eq): Delete.
(strrevcmp, is_suffix): New.
(merge_strings): Use them to implement fast suffix merging.
* elf-strtab.c (cmplengthentry, last4_eq): Delete.
(strrevcmp, is_suffix): New.
(_bfd_elf_strtab_finalize): Rework to implement fast suffix merging.
Index: merge.c
===================================================================
RCS file: /cvs/src/src/bfd/merge.c,v
retrieving revision 1.16
diff -u -p -r1.16 merge.c
--- merge.c 31 Aug 2003 10:07:46 -0000 1.16
+++ merge.c 9 Sep 2003 15:29:03 -0000
@@ -420,79 +420,6 @@ _bfd_merge_section (bfd *abfd, void **ps
return FALSE;
}
-/* Compare two sec_merge_hash_entry structures. This is called via qsort. */
-
-static int
-cmplengthentry (const void *a, const void *b)
-{
- struct sec_merge_hash_entry * A = *(struct sec_merge_hash_entry **) a;
- struct sec_merge_hash_entry * B = *(struct sec_merge_hash_entry **) b;
-
- if (A->len < B->len)
- return 1;
- else if (A->len > B->len)
- return -1;
-
- return memcmp (A->root.string, B->root.string, A->len);
-}
-
-static int
-last4_eq (const void *a, const void *b)
-{
- struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
- struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
- if (memcmp (A->root.string + A->len - 5 * A->u.entsize,
- B->root.string + B->len - 5 * A->u.entsize,
- 4 * A->u.entsize) != 0)
- /* This was a hashtable collision. */
- return 0;
-
- if (A->len <= B->len)
- /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
- not to be equal by the hash table. */
- return 0;
-
- if (A->alignment < B->alignment
- || ((A->len - B->len) & (B->alignment - 1)))
- /* The suffix is not sufficiently aligned. */
- return 0;
-
- return memcmp (A->root.string + (A->len - B->len),
- B->root.string, B->len - 5 * A->u.entsize) == 0;
-}
-
-static int
-last_eq (const void *a, const void *b)
-{
- struct sec_merge_hash_entry * A = (struct sec_merge_hash_entry *) a;
- struct sec_merge_hash_entry * B = (struct sec_merge_hash_entry *) b;
-
- if (B->len >= 5 * A->u.entsize)
- /* Longer strings are just pushed into the hash table,
- they'll be used when looking up for very short strings. */
- return 0;
-
- if (memcmp (A->root.string + A->len - 2 * A->u.entsize,
- B->root.string + B->len - 2 * A->u.entsize,
- A->u.entsize) != 0)
- /* This was a hashtable collision. */
- return 0;
-
- if (A->len <= B->len)
- /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
- not to be equal by the hash table. */
- return 0;
-
- if (A->alignment < B->alignment
- || ((A->len - B->len) & (B->alignment - 1)))
- /* The suffix is not sufficiently aligned. */
- return 0;
-
- return memcmp (A->root.string + (A->len - B->len),
- B->root.string, B->len - 2 * A->u.entsize) == 0;
-}
-
/* Record one section into the hash table. */
static bfd_boolean
record_section (struct sec_merge_info *sinfo,
@@ -576,6 +503,41 @@ error_return:
return FALSE;
}
+static int
+strrevcmp (const void *a, const void *b)
+{
+ struct sec_merge_hash_entry *A = *(struct sec_merge_hash_entry **) a;
+ struct sec_merge_hash_entry *B = *(struct sec_merge_hash_entry **) b;
+
+ const unsigned char *s = A->root.string + A->len - A->u.entsize;
+ const unsigned char *t = B->root.string + B->len - B->u.entsize;
+
+ int l = A->len < B->len ? A->len : B->len;
+ l -= (A->u.entsize - 1);
+ while (l)
+ {
+ if (*s != *t)
+ return (int)*s - (int)*t;
+ s--;
+ t--;
+ l--;
+ }
+ return A->len - B->len;
+}
+
+static int
+is_suffix (const struct sec_merge_hash_entry *A,
+ const struct sec_merge_hash_entry *B)
+{
+ if (A->len <= B->len)
+ /* B cannot be a suffix of A unless A is equal to B, which is guaranteed
+ not to be equal by the hash table. */
+ return 0;
+
+ return memcmp (A->root.string + (A->len - B->len),
+ B->root.string, B->len - 1 * A->u.entsize) == 0;
+}
+
/* This is a helper function for _bfd_merge_sections. It attempts to
merge strings matching suffixes of longer strings. */
static void
@@ -583,10 +545,9 @@ merge_strings (struct sec_merge_info *si
{
struct sec_merge_hash_entry **array, **a, **end, *e;
struct sec_merge_sec_info *secinfo;
- htab_t lasttab = NULL, last4tab = NULL;
bfd_size_type size, amt;
- /* Now sort the strings by length, longest first. */
+ /* Now sort the strings */
array = NULL;
amt = sinfo->htab->size * sizeof (struct sec_merge_hash_entry *);
array = (struct sec_merge_hash_entry **) bfd_malloc (amt);
@@ -599,86 +560,46 @@ merge_strings (struct sec_merge_info *si
sinfo->htab->size = a - array;
- qsort (array, (size_t) sinfo->htab->size,
- sizeof (struct sec_merge_hash_entry *), cmplengthentry);
+ /* The qsort predicate needs the entsize member. */
+ for (a = array, end = array + sinfo->htab->size; a < end; a++)
+ {
+ e = *a;
+ e->u.entsize = sinfo->htab->entsize;
+ }
- last4tab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
- NULL, last4_eq, NULL, calloc, free);
- lasttab = htab_create_alloc ((size_t) sinfo->htab->size * 4,
- NULL, last_eq, NULL, calloc, free);
- if (lasttab == NULL || last4tab == NULL)
- goto alloc_failure;
+ qsort (array, (size_t) sinfo->htab->size,
+ sizeof (struct sec_merge_hash_entry *), strrevcmp);
- /* Now insert the strings into hash tables (strings with last 4 characters
- and strings with last character equal), look for longer strings which
- we're suffix of. */
+ /* now loop over the sorted array and merge suffixes */
for (a = array, end = array + sinfo->htab->size; a < end; a++)
{
- register hashval_t hash;
- unsigned int c;
- unsigned int i;
- const unsigned char *s;
- void **p;
-
e = *a;
- e->u.entsize = sinfo->htab->entsize;
- if (e->len <= e->u.entsize)
- break;
- if (e->len > 4 * e->u.entsize)
+ if (e->len > e->u.entsize)
{
- s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
- hash = 0;
- for (i = 0; i < 4 * e->u.entsize; i++)
- {
- c = *--s;
- hash += c + (c << 17);
- hash ^= hash >> 2;
- }
- p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
- if (p == NULL)
- goto alloc_failure;
- if (*p)
+ struct sec_merge_hash_entry **b = a+1;
+ bfd_boolean found = FALSE;
+ while (b < end)
{
- struct sec_merge_hash_entry *ent;
-
- ent = (struct sec_merge_hash_entry *) *p;
- e->u.suffix = ent;
- e->alignment = 0;
- continue;
+ struct sec_merge_hash_entry *cmp = *b;
+ if (!is_suffix(cmp, e))
+ break;
+ if (e->alignment >= cmp->alignment
+ && !((e->len - cmp->len) & (cmp->alignment - 1)))
+ /* The suffix is sufficiently aligned. */
+ {
+ e->u.suffix = cmp;
+ found = TRUE;
+ }
+ ++b;
}
- else
- *p = e;
- }
- s = (const unsigned char *) (e->root.string + e->len - e->u.entsize);
- hash = 0;
- for (i = 0; i < e->u.entsize; i++)
- {
- c = *--s;
- hash += c + (c << 17);
- hash ^= hash >> 2;
- }
- p = htab_find_slot_with_hash (lasttab, e, hash, INSERT);
- if (p == NULL)
- goto alloc_failure;
- if (*p)
- {
- struct sec_merge_hash_entry *ent;
-
- ent = (struct sec_merge_hash_entry *) *p;
- e->u.suffix = ent;
- e->alignment = 0;
+ if (found)
+ e->alignment = 0;
}
- else
- *p = e;
}
alloc_failure:
if (array)
free (array);
- if (lasttab)
- htab_delete (lasttab);
- if (last4tab)
- htab_delete (last4tab);
/* Now assign positions to the strings we want to keep. */
size = 0;
Index: elf-strtab.c
===================================================================
RCS file: /cvs/src/src/bfd/elf-strtab.c,v
retrieving revision 1.7
diff -u -p -r1.7 elf-strtab.c
--- elf-strtab.c 7 Aug 2003 07:25:34 -0000 1.7
+++ elf-strtab.c 9 Sep 2003 15:29:03 -0000
@@ -256,37 +256,37 @@ _bfd_elf_strtab_emit (register bfd *abfd
/* Compare two elf_strtab_hash_entry structures. This is called via qsort. */
static int
-cmplengthentry (const void *a, const void *b)
+strrevcmp(const void *a, const void *b)
{
struct elf_strtab_hash_entry *A = *(struct elf_strtab_hash_entry **) a;
struct elf_strtab_hash_entry *B = *(struct elf_strtab_hash_entry **) b;
- if (A->len < B->len)
- return 1;
- else if (A->len > B->len)
- return -1;
+ const unsigned char *s = A->root.string + A->len - 1;
+ const unsigned char *t = B->root.string + B->len - 1;
- return memcmp (A->root.string, B->root.string, A->len);
+ int l = A->len < B->len ? A->len : B->len;
+ while (l)
+ {
+ if (*s != *t)
+ return (int)*s - (int)*t;
+ s--;
+ t--;
+ l--;
+ }
+ return A->len - B->len;
}
static int
-last4_eq (const void *a, const void *b)
+is_suffix(const struct elf_strtab_hash_entry *A,
+ const struct elf_strtab_hash_entry *B)
{
- const struct elf_strtab_hash_entry *A = a;
- const struct elf_strtab_hash_entry *B = b;
-
- if (memcmp (A->root.string + A->len - 5, B->root.string + B->len - 5, 4)
- != 0)
- /* This was a hashtable collision. */
- return 0;
-
if (A->len <= B->len)
/* B cannot be a suffix of A unless A is equal to B, which is guaranteed
not to be equal by the hash table. */
return 0;
return memcmp (A->root.string + (A->len - B->len),
- B->root.string, B->len - 5) == 0;
+ B->root.string, B->len - 1) == 0;
}
/* This function assigns final string table offsets for used strings,
@@ -296,9 +296,7 @@ void
_bfd_elf_strtab_finalize (struct elf_strtab_hash *tab)
{
struct elf_strtab_hash_entry **array, **a, **end, *e;
- htab_t last4tab = NULL;
bfd_size_type size, amt;
- struct elf_strtab_hash_entry *last[256], **last_ptr[256];
/* GCC 2.91.66 (egcs-1.1.2) on i386 miscompiles this function when i is
a 64-bit bfd_size_type: a 64-bit target or --enable-64-bit-bfd.
@@ -306,16 +304,13 @@ _bfd_elf_strtab_finalize (struct elf_str
cycles. */
size_t i;
- /* Now sort the strings by length, longest first. */
+ /* Now sort the strings by suffix and length. */
array = NULL;
amt = tab->size * sizeof (struct elf_strtab_hash_entry *);
array = bfd_malloc (amt);
if (array == NULL)
goto alloc_failure;
- memset (last, 0, sizeof (last));
- for (i = 0; i < 256; ++i)
- last_ptr[i] = &last[i];
for (i = 1, a = array; i < tab->size; ++i)
if (tab->array[i]->refcount)
*a++ = tab->array[i];
@@ -324,80 +319,33 @@ _bfd_elf_strtab_finalize (struct elf_str
size = a - array;
- qsort (array, size, sizeof (struct elf_strtab_hash_entry *), cmplengthentry);
+ qsort (array, size, sizeof (struct elf_strtab_hash_entry *), strrevcmp);
- last4tab = htab_create_alloc (size * 4, NULL, last4_eq, NULL, calloc, free);
- if (last4tab == NULL)
- goto alloc_failure;
-
- /* Now insert the strings into hash tables (strings with last 4 characters
- and strings with last character equal), look for longer strings which
- we're suffix of. */
+ /* now loop over the sorted array and merge suffixes */
for (a = array, end = array + size; a < end; a++)
{
- register hashval_t hash;
- unsigned int c;
- unsigned int j;
- const unsigned char *s;
- void **p;
-
e = *a;
- if (e->len > 4)
- {
- s = e->root.string + e->len - 1;
- hash = 0;
- for (j = 0; j < 4; j++)
- {
- c = *--s;
- hash += c + (c << 17);
- hash ^= hash >> 2;
- }
- p = htab_find_slot_with_hash (last4tab, e, hash, INSERT);
- if (p == NULL)
- goto alloc_failure;
- if (*p)
- {
- struct elf_strtab_hash_entry *ent;
-
- ent = *p;
- e->u.suffix = ent;
- e->len = 0;
- continue;
- }
- else
- *p = e;
- }
- else
+ if (e->len > 1)
{
- struct elf_strtab_hash_entry *tem;
-
- c = e->root.string[e->len - 2] & 0xff;
-
- for (tem = last[c]; tem; tem = tem->u.next)
- if (tem->len > e->len
- && memcmp (tem->root.string + (tem->len - e->len),
- e->root.string, e->len - 1) == 0)
- break;
- if (tem)
+ struct elf_strtab_hash_entry **b = a+1;
+ int found = 0;
+ while (b < end)
{
- e->u.suffix = tem;
- e->len = 0;
- continue;
+ struct elf_strtab_hash_entry *cmp = *b;
+ if (!is_suffix(cmp, e))
+ break;
+ e->u.suffix = cmp;
+ found = 1;
+ ++b;
}
+ if (found)
+ e->len = 0;
}
-
- c = e->root.string[e->len - 2] & 0xff;
- /* Put longest strings first. */
- *last_ptr[c] = e;
- last_ptr[c] = &e->u.next;
- e->u.next = NULL;
}
alloc_failure:
if (array)
free (array);
- if (last4tab)
- htab_delete (last4tab);
/* Now assign positions to the strings we want to keep. */
size = 1;