This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
RFC: Profiling multiple DSOs
- To: GNU C Library <libc-alpha at sourceware dot cygnus dot com>
- Subject: RFC: Profiling multiple DSOs
- From: "H . J . Lu" <hjl at lucon dot org>
- Date: Tue, 25 Sep 2001 17:55:32 -0700
Here is a patch to support profiling multiple DSOs. It works with
# LD_PROFILE_ALL=1 ./a.out
# LD_PROFILE=libc.so.6:foo.so:bar.so ./a.out
Any comments?
H.J.
2001-09-25 H.J. Lu <hjl@gnu.org>
* elf/Versions (ld): Add _dl_stop_profile to GLIBC_2.2.5.
* elf/dl-close.c (_dl_close): Call _dl_stop_profile if
_dl_profile != NULL.
* elf/dl-open.c (dl_open_worker): Call _dl_start_profile if
_dl_profile != NULL.
* elf/dl-profile.c: Updated to support profiling multiple
DSOs.
* elf/dl-profstub.c (_dl_profile_map): Removed.
(_dl_mcount_wrapper): Check _dl_profile instead of
_dl_profile_map.
* elf/dl-support.c (_dl_profile_map): Removed.
* elf/rtld.c: Likewise.
* sysdeps/generic/ldsodefs.h: Likewise.
* elf/rtld.c (dl_main): Call _dl_init_profile if necessary.
Call _dl_start_profile after relocating an object
(process_envvars): Support LD_PROFILE_ALL.
* sysdeps/generic/ldsodefs.h (_dl_init_profile): New.
(_dl_stop_profile): Likewise.
* /sysdeps/i386/dl-machine.h (elf_machine_runtime_setup): Don't
set _dl_profile_map.
* sysdeps/posix/dl-profil.h: New.
--- libc/elf/Versions.prof-dso Fri Jul 27 11:01:30 2001
+++ libc/elf/Versions Tue Sep 25 17:00:29 2001
@@ -66,4 +66,7 @@ ld {
GLIBC_2.2.3 {
_dl_debug_mask; _dl_debug_printf;
}
+ GLIBC_2.2.5 {
+ _dl_stop_profile;
+ }
}
--- libc/elf/dl-close.c.prof-dso Tue Sep 11 07:42:39 2001
+++ libc/elf/dl-close.c Tue Sep 25 17:00:29 2001
@@ -206,6 +206,9 @@ _dl_close (void *_map)
--_dl_main_searchlist->r_nlist;
}
+ if (__builtin_expect (_dl_profile != NULL, 0))
+ _dl_stop_profile (imap);
+
/* We can unmap all the maps at once. We determined the
start address and length when we loaded the object and
the `munmap' call does the rest. */
--- libc/elf/dl-open.c.prof-dso Tue Sep 25 16:53:56 2001
+++ libc/elf/dl-open.c Tue Sep 25 17:24:55 2001
@@ -277,17 +277,12 @@ dl_open_worker (void *a)
if (_dl_profile != NULL)
{
/* If this here is the shared object which we want to profile
- make sure the profile is started. We can find out whether
- this is necessary or not by observing the `_dl_profile_map'
- variable. If was NULL but is not NULL afterwars we must
- start the profiling. */
- struct link_map *old_profile_map = _dl_profile_map;
-
+ make sure the profile is started. */
_dl_relocate_object (l, l->l_scope, 1, 1);
- if (old_profile_map == NULL && _dl_profile_map != NULL)
+ if (_dl_profile != NULL)
/* We must prepare the profiling. */
- _dl_start_profile (_dl_profile_map, _dl_profile_output);
+ _dl_start_profile (l, _dl_profile_output);
}
else
#endif
--- libc/elf/dl-profile.c.prof-dso Sat Sep 22 23:28:25 2001
+++ libc/elf/dl-profile.c Tue Sep 25 17:39:49 2001
@@ -34,6 +34,11 @@
#include <sys/param.h>
#include <sys/stat.h>
#include <atomicity.h>
+#ifdef _LIBC_REENTRANT
+# include <pt-machine.h>
+# include <signal.h>
+# include <time.h>
+#endif
/* The LD_PROFILE feature has to be implemented different to the
normal profiling using the gmon/ functions. The problem is that an
@@ -132,40 +137,271 @@ struct here_cg_arc_record
uint32_t count;
} __attribute__ ((packed));
-static struct here_cg_arc_record *data;
-
-/* This is the number of entry which have been incorporated in the toset. */
-static uint32_t narcs;
-/* This is a pointer to the object representing the number of entries
- currently in the mmaped file. At no point of time this has to be the
- same as NARCS. If it is equal all entries from the file are in our
- lists. */
-static volatile uint32_t *narcsp;
-
-/* Description of the currently profiled object. */
-static long int state = GMON_PROF_OFF;
-
-static volatile uint16_t *kcount;
-static size_t kcountsize;
-
struct here_fromstruct
{
struct here_cg_arc_record volatile *here;
uint16_t link;
};
-static volatile uint16_t *tos;
+struct dl_profile_record
+{
+ struct link_map *map;
+
+ struct here_cg_arc_record *data;
+
+ /* This is the number of entry which have been incorporated in the
+ toset. */
+ uint32_t narcs;
+ /* This is a pointer to the object representing the number of entries
+ currently in the mmaped file. At no point of time this has to be
+ the same as NARCS. If it is equal all entries from the file are
+ in our lists. */
+ volatile uint32_t *narcsp;
+
+ /* Description of the currently profiled object. */
+ long int state;
+
+ volatile uint16_t *kcount;
+ size_t kcountsize;
+ int scale;
+
+ volatile uint16_t *tos;
+
+ struct here_fromstruct *froms;
+ uint32_t fromlimit;
+ volatile uint32_t fromidx;
+
+ uintptr_t lowpc;
+ uintptr_t highpc;
+ size_t textsize;
+ unsigned int hashfraction;
+ unsigned int log_hashfraction;
+
+ struct dl_profile_record *next;
+};
+
+struct dl_profile_record_table
+{
+ struct dl_profile_record_table *next;
+ /* # of entries in fdesc table */
+ unsigned int len;
+ /* index of first available entry */
+ volatile long int first_unused;
+ struct dl_profile_record record [0];
+};
+
+static
+struct local
+{
+ struct dl_profile_record_table *root;
+ struct dl_profile_record *list;
+ struct dl_profile_record *free_list;
+ int npages;
+#ifdef _LIBC_REENTRANT
+ volatile int lock;
+ sigset_t full_sigset;
+#endif
+} local;
+
+/* Locking is tricky: we may get a signal while holding the lock and
+ the signal handler may end up calling into the dynamic loader
+ again. Also, if a real-time process spins on the lock, a
+ non-realtime process may never get the chance to release it's lock,
+ unless the realtime process relinquishes the CPU from time to time.
+ Hence we (a) block signals before acquiring the lock and (b) do a
+ nanosleep() when we detect prolongued contention. */
+#ifdef _LIBC_REENTRANT
+# define lock(l) \
+{ \
+ sigset_t _saved_set; \
+ int i = 10000; \
+ if (!__sigismember (&(l)->full_sigset, SIGINT)) \
+ __sigfillset (&(l)->full_sigset); \
+ \
+ while (testandset ((int *) &(l)->lock)) \
+ { \
+ struct timespec ts; \
+ if (i > 0) \
+ { \
+ --i; \
+ continue; \
+ } \
+ ts.tv_sec = 0; \
+ ts.tv_nsec = 1*1000*1000; \
+ __nanosleep (&ts, NULL); \
+ } \
+ __sigprocmask (SIG_BLOCK, &(l)->full_sigset, &_saved_set);
+# define unlock(l) \
+ __sigprocmask (SIG_SETMASK, &_saved_set, NULL); \
+ (l)->lock = 0; \
+}
+#else
+# define lock(l)
+# define unlock(l)
+#endif
+
+#ifdef MAP_ANON
+#define ANONFD (-1)
+#else
+extern int _dl_zerofd;
+#define ANONFD (_dl_zerofd)
+#endif
+
+static const char *profile_list;
+static int profile_all;
+
+static struct dl_profile_record_table *
+internal_function
+new_profile_record_table (struct local *l)
+{
+ size_t size = l->npages * _dl_pagesize;
+ struct dl_profile_record_table *new_table;
+
+ new_table = __mmap (0, size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, ANONFD, 0);
+ if (new_table == MAP_FAILED)
+ {
+ char buf[400];
+ int errnum = errno;
+ _dl_error_printf ("Cannot map pages for profiling record table: %s\n",
+ __strerror_r (errnum, buf, sizeof buf));
+ return NULL;
+ }
+
+ new_table->len = (size - sizeof (*new_table))
+ / sizeof (struct dl_profile_record);
+ new_table->first_unused = 0;
+ new_table->next = l->root;
+ l->root = new_table;
+ return new_table;
+}
+
+static struct dl_profile_record *
+internal_function
+new_profile_record (struct local *l)
+{
+ struct dl_profile_record *record;
+ struct dl_profile_record_table *new_table;
+
+ new_table = new_profile_record_table (l);
+ if (new_table == NULL);
+ return NULL;
+
+ l->npages += l->npages;
+ record = &new_table->record [new_table->first_unused++];
+ return record;
+}
+
+static struct dl_profile_record *
+internal_function
+make_profile_record (void)
+{
+ struct dl_profile_record *record = NULL;
+ struct dl_profile_record_table *t;
+ unsigned int old;
+ struct local *l = &local;
+
+ t = l->root;
+ while (l)
+ {
+ old = t->first_unused;
+ if (old >= t->len)
+ break;
+ else if (compare_and_swap (&t->first_unused, old, old + 1))
+ {
+ record = &t->record [old];
+ goto done;
+ }
+ }
+
+ if (l->free_list)
+ {
+ record = l->free_list; /* get it from free-list */
+ l->free_list = record->next;
+ }
+ else
+ record = new_profile_record (l); /* create new record */
+
+done:
+ return record;
+}
+
+static struct dl_profile_record *
+internal_function
+get_profile_record (struct link_map *map)
+{
+ struct dl_profile_record *record, **loc;
+ struct local *l = &local;
+
+ lock (l);
+
+ loc = &l->list;
+ record = *loc;
+ while (record && record->map->l_addr <= map->l_addr)
+ {
+ if (record->map == map)
+ {
+ /* We already have it. Do nothing. */
+ record = NULL;
+ goto done;
+ }
+ loc = &record->next;
+ record = *loc;
+ }
+
+ record = make_profile_record ();
+ if (record)
+ {
+ record->map = map;
+ record->next = *loc;
+ *loc = record;
+ }
+
+done:
+ unlock (l);
+
+ return record;
+}
+
+static inline struct dl_profile_record *
+internal_function
+find_profile_record (ElfW(Addr) selfpc)
+{
+ struct dl_profile_record *record, *found;
+ struct local *l = &local;
+
+ lock (l);
+
+ found = NULL;
+ for (record = l->list; record && record->lowpc <= selfpc;
+ record = record->next)
+ if (selfpc < record->highpc)
+ {
+ found = record;
+ break;
+ }
+
+ unlock (l);
-static struct here_fromstruct *froms;
-static uint32_t fromlimit;
-static volatile uint32_t fromidx;
-
-static uintptr_t lowpc;
-static size_t textsize;
-static unsigned int hashfraction;
-static unsigned int log_hashfraction;
+ return found;
+}
+#include <dl-profil.h>
+void
+internal_function
+_dl_init_profile (const char *list, int all)
+{
+ local.npages = 1;
+ if (new_profile_record_table (&local) == NULL)
+ /* Turn off the profiling. */
+ local.npages = 0;
+ profile_list = list;
+ profile_all = all;
+#ifdef DL_INIT_PROFILE
+ DL_INIT_PROFILE ();
+#endif
+}
/* Set up profiling data to profile object desribed by MAP. The output
file is found (or created) in OUTPUT_DIR. */
@@ -185,13 +421,49 @@ _dl_start_profile (struct link_map *map,
size_t idx;
size_t tossize;
size_t fromssize;
- uintptr_t highpc;
struct gmon_hdr *addr = NULL;
off_t expected_size;
/* See profil(2) where this is described. */
- int s_scale;
+ const char *soname;
+ const char *match, *end;
+ size_t i, l;
+ struct dl_profile_record *record;
#define SCALE_1_TO_1 0x10000L
+ if (local.npages == 0 || map->l_type == lt_executable)
+ return;
+
+ if (map->l_info[DT_SONAME] != NULL)
+ soname = ((const char *) D_PTR (map, l_info[DT_STRTAB])
+ + map->l_info[DT_SONAME]->d_un.d_val);
+ else
+ {
+ if (map->l_name [0])
+ soname = basename (map->l_name);
+ else
+ soname = _dl_argv [0];
+ }
+
+ if (profile_all == 0)
+ {
+ match = strstr (profile_list, soname);
+ if (match == NULL)
+ return;
+
+ l = strlen (soname);
+ end = strchr (match, ':');
+ if (end)
+ i = end - match;
+ else
+ i = strlen (match);
+ if (i != l)
+ return;
+ }
+
+ record = get_profile_record (map);
+ if (record == NULL)
+ return;
+
/* Compute the size of the sections which contain program code. */
for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
@@ -208,31 +480,34 @@ _dl_start_profile (struct link_map *map,
/* Now we can compute the size of the profiling data. This is done
with the same formulars as in `monstartup' (see gmon.c). */
- state = GMON_PROF_OFF;
- lowpc = ROUNDDOWN (mapstart + map->l_addr,
- HISTFRACTION * sizeof (HISTCOUNTER));
- highpc = ROUNDUP (mapend + map->l_addr,
- HISTFRACTION * sizeof (HISTCOUNTER));
- textsize = highpc - lowpc;
- kcountsize = textsize / HISTFRACTION;
- hashfraction = HASHFRACTION;
+ record->state = GMON_PROF_OFF;
+ record->lowpc = ROUNDDOWN (mapstart + map->l_addr,
+ HISTFRACTION * sizeof (HISTCOUNTER));
+ record->highpc = ROUNDUP (mapend + map->l_addr,
+ HISTFRACTION * sizeof (HISTCOUNTER));
+ record->textsize = record->highpc - record->lowpc;
+ record->kcountsize = record->textsize / HISTFRACTION;
+ record->hashfraction = HASHFRACTION;
if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
/* If HASHFRACTION is a power of two, mcount can use shifting
instead of integer division. Precompute shift amount. */
- log_hashfraction = __ffs (hashfraction * sizeof (*froms)) - 1;
+ record->log_hashfraction = __ffs (record->hashfraction
+ * sizeof (*record->froms)) - 1;
else
- log_hashfraction = -1;
- tossize = textsize / HASHFRACTION;
- fromlimit = textsize * ARCDENSITY / 100;
- if (fromlimit < MINARCS)
- fromlimit = MINARCS;
- if (fromlimit > MAXARCS)
- fromlimit = MAXARCS;
- fromssize = fromlimit * sizeof (struct here_fromstruct);
+ record->log_hashfraction = -1;
+ tossize = record->textsize / HASHFRACTION;
+ record->fromlimit = record->textsize * ARCDENSITY / 100;
+ if (record->fromlimit < MINARCS)
+ record->fromlimit = MINARCS;
+ if (record->fromlimit > MAXARCS)
+ record->fromlimit = MAXARCS;
+ fromssize = record->fromlimit
+ * sizeof (struct here_fromstruct);
expected_size = (sizeof (struct gmon_hdr)
- + 4 + sizeof (struct gmon_hist_hdr) + kcountsize
- + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record));
+ + 4 + sizeof (struct gmon_hist_hdr)
+ + record->kcountsize + 4 + 4
+ + fromssize * sizeof (struct here_cg_arc_record));
/* Create the gmon_hdr we expect or write. */
memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
@@ -242,7 +517,8 @@ _dl_start_profile (struct link_map *map,
/* Create the hist_hdr we expect or write. */
*(char **) hist_hdr.low_pc = (char *) mapstart;
*(char **) hist_hdr.high_pc = (char *) mapend;
- *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER);
+ *(int32_t *) hist_hdr.hist_size
+ = record->kcountsize / sizeof (HISTCOUNTER);
*(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
hist_hdr.dimen_abbrev = 's';
@@ -250,11 +526,11 @@ _dl_start_profile (struct link_map *map,
/* First determine the output name. We write in the directory
OUTPUT_DIR and the name is composed from the shared objects
soname (or the file name) and the ending ".profile". */
- filename = (char *) alloca (strlen (output_dir) + 1 + strlen (_dl_profile)
- + sizeof ".profile");
+ filename = (char *) alloca (strlen (output_dir) + 1
+ + strlen (soname) + sizeof ".profile");
cp = __stpcpy (filename, output_dir);
*cp++ = '/';
- __stpcpy (__stpcpy (cp, _dl_profile), ".profile");
+ __stpcpy (__stpcpy (cp, soname), ".profile");
#ifdef O_NOFOLLOW
# define EXTRA_FLAGS | O_NOFOLLOW
@@ -317,7 +593,7 @@ _dl_start_profile (struct link_map *map,
__munmap ((void *) addr, expected_size);
_dl_error_printf ("%s: file is no correct profile data file for `%s'\n",
- filename, _dl_profile);
+ filename, soname);
return;
}
@@ -338,12 +614,15 @@ _dl_start_profile (struct link_map *map,
/* Pointer to data after the header. */
hist = (char *) (addr + 1);
- kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
- + sizeof (struct gmon_hist_hdr));
+ record->kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
+ + sizeof (struct gmon_hist_hdr));
/* Compute pointer to array of the arc information. */
- narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t));
- data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t));
+ record->narcsp = (uint32_t *) ((char *) record->kcount
+ + record->kcountsize
+ + sizeof (uint32_t));
+ record->data = (struct here_cg_arc_record *) ((char *) record->narcsp
+ + sizeof (uint32_t));
if (st.st_size == 0)
{
@@ -354,7 +633,7 @@ _dl_start_profile (struct link_map *map,
memcpy (hist + sizeof (uint32_t), &hist_hdr,
sizeof (struct gmon_hist_hdr));
- narcsp[-1] = GMON_TAG_CG_ARC;
+ record->narcsp[-1] = GMON_TAG_CG_ARC;
}
else
{
@@ -363,21 +642,22 @@ _dl_start_profile (struct link_map *map,
|| *(uint32_t *) hist != GMON_TAG_TIME_HIST
|| memcmp (hist + sizeof (uint32_t), &hist_hdr,
sizeof (struct gmon_hist_hdr)) != 0
- || narcsp[-1] != GMON_TAG_CG_ARC)
+ || record->narcsp[-1] != GMON_TAG_CG_ARC)
goto wrong_format;
}
/* Allocate memory for the froms data and the pointer to the tos records. */
- tos = (uint16_t *) calloc (tossize + fromssize, 1);
- if (tos == NULL)
+ record->tos = (uint16_t *) calloc (tossize + fromssize, 1);
+ if (record->tos == NULL)
{
__munmap ((void *) addr, expected_size);
_dl_fatal_printf ("Out of memory while initializing profiler\n");
/* NOTREACHED */
}
- froms = (struct here_fromstruct *) ((char *) tos + tossize);
- fromidx = 0;
+ record->froms = (struct here_fromstruct *) ((char *) record->tos
+ + tossize);
+ record->fromidx = 0;
/* Now we have to process all the arc count entries. BTW: it is
not critical whether the *NARCSP value changes meanwhile. Before
@@ -386,47 +666,77 @@ _dl_start_profile (struct link_map *map,
Loading the entries in reverse order should help to get the most
frequently used entries at the front of the list. */
- for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; )
+ for (idx = record->narcs = MIN (*record->narcsp, record->fromlimit);
+ idx > 0; )
{
size_t to_index;
size_t newfromidx;
--idx;
- to_index = (data[idx].self_pc / (hashfraction * sizeof (*tos)));
- newfromidx = fromidx++;
- froms[newfromidx].here = &data[idx];
- froms[newfromidx].link = tos[to_index];
- tos[to_index] = newfromidx;
+ to_index = (record->data[idx].self_pc
+ / (record->hashfraction * sizeof (*record->tos)));
+ newfromidx = record->fromidx++;
+ record->froms[newfromidx].here = &record->data[idx];
+ record->froms[newfromidx].link = record->tos[to_index];
+ record->tos[to_index] = newfromidx;
}
/* Setup counting data. */
- if (kcountsize < highpc - lowpc)
+ if (record->kcountsize < record->highpc - record->lowpc)
{
#if 0
s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1;
#else
- size_t range = highpc - lowpc;
- size_t quot = range / kcountsize;
+ size_t range = record->highpc - record->lowpc;
+ size_t quot = range / record->kcountsize;
if (quot >= SCALE_1_TO_1)
- s_scale = 1;
+ record->scale = 1;
else if (quot >= SCALE_1_TO_1 / 256)
- s_scale = SCALE_1_TO_1 / quot;
+ record->scale = SCALE_1_TO_1 / quot;
else if (range > ULONG_MAX / 256)
- s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256));
+ record->scale = (SCALE_1_TO_1 * 256)
+ / (range / (record->kcountsize / 256));
else
- s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize);
+ record->scale = (SCALE_1_TO_1 * 256)
+ / ((range * 256) / record->kcountsize);
#endif
}
else
- s_scale = SCALE_1_TO_1;
-
- /* Start the profiler. */
- __profil ((void *) kcount, kcountsize, lowpc, s_scale);
+ record->scale = SCALE_1_TO_1;
/* Turn on profiling. */
- state = GMON_PROF_ON;
+ record->state = GMON_PROF_ON;
}
+void
+internal_function
+_dl_stop_profile (struct link_map *map)
+{
+ struct dl_profile_record *record, **loc;
+ struct local *l = &local;
+
+ lock (l);
+
+ loc = &l->list;
+ record = *loc;
+ while (record && record->map->l_addr <= map->l_addr)
+ {
+ if (record->map == map)
+ /* We find it. */
+ break;
+ loc = &record->next;
+ record = *loc;
+ }
+
+ if (record && record->map == map)
+ {
+ *loc = record->next;
+ record->next = l->free_list; /* put it on free-list */
+ l->free_list = record;
+ }
+
+ unlock (l);
+}
void
_dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
@@ -434,13 +744,19 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
volatile uint16_t *topcindex;
size_t i, fromindex;
struct here_fromstruct *fromp;
+ struct dl_profile_record *record;
+
+ /* Find the proper record. */
+ record = find_profile_record (selfpc);
+ if (record == NULL)
+ goto done;
#if 0
/* XXX I think this is now not necessary anymore. */
if (! compare_and_swap (&state, GMON_PROF_ON, GMON_PROF_BUSY))
return;
#else
- if (state != GMON_PROF_ON)
+ if (record->state != GMON_PROF_ON)
return;
#endif
@@ -449,12 +765,10 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
restrict it in any way, just set to a fixed value (0) in case it
is outside the allowed range. These calls show up as calls from
<external> in the gprof output. */
- frompc -= lowpc;
- if (frompc >= textsize)
+ frompc -= record->lowpc;
+ if (frompc >= record->textsize)
frompc = 0;
- selfpc -= lowpc;
- if (selfpc >= textsize)
- goto done;
+ selfpc -= record->lowpc;
/* Getting here we now have to find out whether the location was
already used. If yes we are lucky and only have to increment a
@@ -463,17 +777,17 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
/* Avoid integer divide if possible. */
if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
- i = selfpc >> log_hashfraction;
+ i = selfpc >> record->log_hashfraction;
else
- i = selfpc / (hashfraction * sizeof (*tos));
+ i = selfpc / (record->hashfraction * sizeof (*record->tos));
- topcindex = &tos[i];
+ topcindex = &record->tos[i];
fromindex = *topcindex;
if (fromindex == 0)
goto check_new_or_add;
- fromp = &froms[fromindex];
+ fromp = &record->froms[fromindex];
/* We have to look through the chain of arcs whether there is already
an entry for our arc. */
@@ -481,7 +795,7 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
{
if (fromp->link != 0)
do
- fromp = &froms[fromp->link];
+ fromp = &record->froms[fromp->link];
while (fromp->link != 0 && fromp->here->from_pc != frompc);
if (fromp->here->from_pc != frompc)
@@ -491,43 +805,45 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
check_new_or_add:
/* Our entry is not among the entries we read so far from the
data file. Now see whether we have to update the list. */
- while (narcs != *narcsp && narcs < fromlimit)
+ while (record->narcs != *record->narcsp
+ && record->narcs < record->fromlimit)
{
size_t to_index;
size_t newfromidx;
- to_index = (data[narcs].self_pc
- / (hashfraction * sizeof (*tos)));
- newfromidx = exchange_and_add (&fromidx, 1) + 1;
- froms[newfromidx].here = &data[narcs];
- froms[newfromidx].link = tos[to_index];
- tos[to_index] = newfromidx;
- atomic_add (&narcs, 1);
+ to_index = (record->data[record->narcs].self_pc
+ / (record->hashfraction
+ * sizeof (*record->tos)));
+ newfromidx = exchange_and_add (&record->fromidx, 1) + 1;
+ record->froms[newfromidx].here = &record->data[record->narcs];
+ record->froms[newfromidx].link = record->tos[to_index];
+ record->tos[to_index] = newfromidx;
+ atomic_add (&record->narcs, 1);
}
/* If we still have no entry stop searching and insert. */
if (*topcindex == 0)
{
- uint_fast32_t newarc = exchange_and_add (narcsp, 1);
+ uint_fast32_t newarc = exchange_and_add (record->narcsp, 1);
/* In rare cases it could happen that all entries in FROMS are
occupied. So we cannot count this anymore. */
- if (newarc >= fromlimit)
+ if (newarc >= record->fromlimit)
goto done;
- *topcindex = exchange_and_add (&fromidx, 1) + 1;
- fromp = &froms[*topcindex];
+ *topcindex = exchange_and_add (&record->fromidx, 1) + 1;
+ fromp = &record->froms[*topcindex];
- fromp->here = &data[newarc];
- data[newarc].from_pc = frompc;
- data[newarc].self_pc = selfpc;
- data[newarc].count = 0;
+ fromp->here = &record->data[newarc];
+ record->data[newarc].from_pc = frompc;
+ record->data[newarc].self_pc = selfpc;
+ record->data[newarc].count = 0;
fromp->link = 0;
- atomic_add (&narcs, 1);
+ atomic_add (&record->narcs, 1);
break;
}
- fromp = &froms[*topcindex];
+ fromp = &record->froms[*topcindex];
}
else
/* Found in. */
--- libc/elf/dl-profstub.c.prof-dso Sat Jul 7 16:44:45 2001
+++ libc/elf/dl-profstub.c Tue Sep 25 17:00:29 2001
@@ -22,10 +22,6 @@
#include <elf.h>
#include <ldsodefs.h>
-/* This is the map for the shared object we profile. It is defined here
- only because we test for this value being NULL or not. */
-extern struct link_map *_dl_profile_map;
-
void
_dl_mcount_wrapper (void *selfpc)
@@ -37,6 +33,6 @@ _dl_mcount_wrapper (void *selfpc)
void
_dl_mcount_wrapper_check (void *selfpc)
{
- if (_dl_profile_map != NULL)
+ if (_dl_profile != NULL)
_dl_mcount ((ElfW(Addr)) RETURN_ADDRESS (0), (ElfW(Addr)) selfpc);
}
--- libc/elf/dl-support.c.prof-dso Tue Sep 25 17:00:29 2001
+++ libc/elf/dl-support.c Tue Sep 25 17:36:23 2001
@@ -62,9 +62,6 @@ const char *_dl_profile;
ignored. */
const char *_dl_inhibit_rpath;
-/* The map for the object we will profile. */
-struct link_map *_dl_profile_map;
-
/* This is the address of the last stack address ever used. */
void *__libc_stack_end;
--- libc/elf/rtld.c.prof-dso Tue Sep 25 17:00:29 2001
+++ libc/elf/rtld.c Tue Sep 25 17:44:06 2001
@@ -64,9 +64,9 @@ size_t _dl_platformlen;
unsigned long _dl_hwcap;
fpu_control_t _dl_fpu_control = _FPU_DEFAULT;
struct r_search_path *_dl_search_paths;
+static int profile_all;
const char *_dl_profile;
const char *_dl_profile_output;
-struct link_map *_dl_profile_map;
int _dl_lazy = 1;
/* XXX I know about at least one case where we depend on the old weak
behavior (it has to do with librt). Until we get DSO groups implemented
@@ -1039,6 +1039,9 @@ of this helper program; chances are you
while (l->l_next)
l = l->l_next;
+ if (__builtin_expect (consider_profiling, 0))
+ _dl_init_profile (_dl_profile, profile_all);
+
HP_TIMING_NOW (start);
do
{
@@ -1054,7 +1057,12 @@ of this helper program; chances are you
}
if (l != &_dl_rtld_map)
- _dl_relocate_object (l, l->l_scope, _dl_lazy, consider_profiling);
+ {
+ _dl_relocate_object (l, l->l_scope, _dl_lazy,
+ consider_profiling);
+ if (__builtin_expect (consider_profiling, 0))
+ _dl_start_profile (l, _dl_profile_output);
+ }
l = l->l_prev;
}
@@ -1070,14 +1078,6 @@ of this helper program; chances are you
_dl_relocate_object might need to call `mprotect' for DT_TEXTREL. */
_dl_sysdep_start_cleanup ();
- /* Now enable profiling if needed. Like the previous call,
- this has to go here because the calls it makes should use the
- rtld versions of the functions (particularly calloc()), but it
- needs to have _dl_profile_map set up by the relocator. */
- if (__builtin_expect (_dl_profile_map != NULL, 0))
- /* We must prepare the profiling. */
- _dl_start_profile (_dl_profile_map, _dl_profile_output);
-
if (_dl_rtld_map.l_opencount > 1)
{
/* There was an explicit ref to the dynamic linker as a shared lib.
@@ -1344,7 +1344,7 @@ process_envvars (enum mode *modep)
break;
}
- /* Which shared object shall be profiled. */
+ /* Which shared objects shall be profiled. */
if (memcmp (envline, "PROFILE", 7) == 0)
_dl_profile = &envline[8];
break;
@@ -1378,6 +1378,13 @@ process_envvars (enum mode *modep)
if (!__libc_enable_secure
&& memcmp (envline, "ORIGIN_PATH", 11) == 0)
_dl_origin_path = &envline[12];
+
+ /* Profile all shared objects? */
+ if (memcmp (envline, "PROFILE_ALL", 11) == 0)
+ {
+ _dl_profile = &envline[12];
+ profile_all = 1;
+ }
break;
case 12:
--- libc/sysdeps/generic/ldsodefs.h.prof-dso Thu Sep 6 16:17:02 2001
+++ libc/sysdeps/generic/ldsodefs.h Tue Sep 25 17:38:52 2001
@@ -187,10 +187,8 @@ extern unsigned int _dl_osversion;
/* File descriptor referring to the zero-fill device. */
extern int _dl_zerofd;
-/* Name of the shared object to be profiled (if any). */
+/* Names of the shared objects to be profiled (if any). */
extern const char *_dl_profile;
-/* Map of shared object to be profiled. */
-extern struct link_map *_dl_profile_map;
/* Filename of the output file. */
extern const char *_dl_profile_output;
@@ -458,8 +456,12 @@ extern void _dl_init_paths (const char *
/* Gather the information needed to install the profiling tables and start
the timers. */
+extern void _dl_init_profile (const char *filelist, int profile_all)
+ internal_function;
+
extern void _dl_start_profile (struct link_map *map, const char *output_dir)
internal_function;
+extern void _dl_stop_profile (struct link_map *map) internal_function;
/* The actual functions used to keep book on the calls. */
extern void _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc);
--- libc/sysdeps/i386/dl-machine.h.prof-dso Fri Aug 31 09:36:57 2001
+++ libc/sysdeps/i386/dl-machine.h Tue Sep 25 17:00:29 2001
@@ -96,14 +96,7 @@ elf_machine_runtime_setup (struct link_m
don't store the address in the GOT so that all future calls also
end in this function. */
if (__builtin_expect (profile, 0))
- {
- got[2] = (Elf32_Addr) &_dl_runtime_profile;
-
- if (_dl_name_match_p (_dl_profile, l))
- /* This is the object we are looking for. Say that we really
- want profiling and the timers are started. */
- _dl_profile_map = l;
- }
+ got[2] = (Elf32_Addr) &_dl_runtime_profile;
else
/* This function will get called to fix up the GOT entry indicated by
the offset on the stack, and then jump to the resolved address. */
--- libc/sysdeps/posix/dl-profil.h.prof-dso Tue Sep 25 17:00:29 2001
+++ libc/sysdeps/posix/dl-profil.h Tue Sep 25 17:00:29 2001
@@ -0,0 +1,72 @@
+/* Low-level statistical profiling support function for dynamic
+ linkker. Mostly POSIX.1 version.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <signal.h>
+#include <sys/time.h>
+
+#ifdef SIGPROF
+static inline void
+profil_count (void *pc)
+{
+ struct dl_profile_record *record;
+ size_t i;
+
+ /* Find the proper record. */
+ record = find_profile_record ((ElfW(Addr)) pc);
+ if (record == NULL)
+ return;
+
+ i = (pc - record->lowpc - (void *) 0) / 2;
+ if (sizeof (unsigned long long int) > sizeof (size_t))
+ i = (unsigned long long int) i * record->scale / 65536;
+ else
+ i = i / 65536 * record->scale + i % 65536 * record->scale / 65536;
+
+ ++(record->kcount [i]);
+}
+
+/* Get the machine-dependent definition of `profil_counter', the signal
+ handler for SIGPROF. It calls `profil_count' (above) with the PC of the
+ interrupted code. */
+#include "profil-counter.h"
+
+/* Enable statistical profiling. */
+static void
+internal_function
+dl_profil (void)
+{
+ struct sigaction act;
+ struct itimerval timer;
+
+ act.sa_handler = (sighandler_t) &profil_counter;
+ act.sa_flags = SA_RESTART;
+ sigfillset (&act.sa_mask);
+ if (__sigaction (SIGPROF, &act, NULL) < 0)
+ return;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 1;
+ timer.it_interval = timer.it_value;
+ __setitimer (ITIMER_PROF, &timer, NULL);
+ return;
+}
+
+#define DL_INIT_PROFILE() dl_profil ()
+#endif