This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] SPU gprof support


Hi,

this patch adds initial support for the GNU profiler on the SPU. The flat 
profile is based on a histogram that is created using the sampling mechanism 
provided by spu_timer.h (http://sourceware.org/ml/newlib/2008/msg00316.html). 
For the call graph the compiler generates a particular prologue that causes 
every function to call _mcount. The call graph is stored in PPU memory using 
the named address space support 
(http://sourceware.org/ml/newlib/2008/msg00600.html).

Ken

libgloss/ChangeLog:

2008-11-26  Ken Werner  <ken.werner@de.ibm.com

        * spu/crt0.S: Call __monstartup if profiling is enabled.
        * spu/Makefile.in: Add gcrt1.o gcrt2.o.

newlib/ChangeLog:

2008-11-26  Ken Werner  <ken.werner@de.ibm.com>

        * libc/machine/spu/Makefile.am: Add spu-mcount.S spu-gmon.c.
        * libc/machine/spu/spu-gmon.c: New file.
        * libc/machine/spu/spu-mcount.S: New file.
Index: src/libgloss/spu/crt0.S
===================================================================
--- src.orig/libgloss/spu/crt0.S
+++ src/libgloss/spu/crt0.S
@@ -158,6 +158,12 @@ _start:
 	ila	$3, _fini
 	brsl	$LR, atexit
 
+#ifdef _PROFILE
+	/* Call monstartup if profiling is enabled
+	 */
+	brsl	$LR, __monstartup
+#endif
+
 	ori     $3,$80,0
 	ori     $4,$81,0	
 #ifndef _STD_MAIN
Index: src/libgloss/spu/Makefile.in
===================================================================
--- src.orig/libgloss/spu/Makefile.in
+++ src/libgloss/spu/Makefile.in
@@ -71,7 +71,7 @@ EVALOBJS = ${OBJS}
 GCC_LDFLAGS = `if [ -d ${objroot}/../gcc ] ; \
 	then echo -L${objroot}/../gcc ; fi`
 
-CRTOBJS = crti.o crtn.o crt1.o crt2.o
+CRTOBJS = crti.o crtn.o crt1.o crt2.o gcrt1.o gcrt2.o
 OUTPUTS = libgloss.a $(CRTOBJS)
 
 NEWLIB_CFLAGS = `if [ -d ${objroot}/newlib ]; then echo -I${objroot}/newlib/targ-include -I${srcroot}/newlib/libc/include; fi` -ffunction-sections -fdata-sections
@@ -133,6 +133,12 @@ crt1.o: $(srcdir)/crt0.S
 crt2.o: $(srcdir)/crt0.S
 	$(CC) $(srcdir)/crt0.S -D_STD_MAIN -c -o crt2.o
 
+gcrt1.o: $(srcdir)/crt0.S
+	$(CC) $(srcdir)/crt0.S -D_PROFILE -c -o gcrt1.o
+
+gcrt2.o: $(srcdir)/crt0.S
+	$(CC) $(srcdir)/crt0.S -D_PROFILE -D_STD_MAIN -c -o gcrt2.o
+
 doc:	
 
 clean mostlyclean:
Index: src/newlib/libc/machine/spu/Makefile.am
===================================================================
--- src.orig/newlib/libc/machine/spu/Makefile.am
+++ src/newlib/libc/machine/spu/Makefile.am
@@ -31,7 +31,8 @@ lib_a_SOURCES += calloc_ea.c free_ea.c m
 	munmap_ea.c posix_memalign_ea.c realloc_ea.c strcat_ea.c strchr_ea.c \
 	strcmp_ea.c strcpy_ea.c strcspn_ea.c strlen_ea.c strncat_ea.c strncmp_ea.c \
 	strncpy_ea.c strpbrk_ea.c strrchr_ea.c strspn_ea.c strstr_ea.c read_ea.c \
-	pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c
+	pread_ea.c readv_ea.c write_ea.c pwrite_ea.c writev_ea.c spu-mcount.S \
+	spu-gmon.c
 endif
 
 lib_a_CCASFLAGS = $(AM_CCASFLAGS)
Index: src/newlib/libc/machine/spu/spu-gmon.c
===================================================================
--- /dev/null
+++ src/newlib/libc/machine/spu/spu-gmon.c
@@ -0,0 +1,419 @@
+/*
+(C) Copyright IBM Corp. 2008
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of IBM nor the names of its contributors may be
+used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Author: Ken Werner <ken.werner@de.ibm.com>
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <fcntl.h>
+#include <ea.h>
+#include <spu_intrinsics.h>
+#include <spu_mfcio.h>
+#include <spu_timer.h>
+#include <limits.h>
+
+/* Magic cookie.  */
+#define GMON_MAGIC_COOKIE "gmon"
+
+/* Version number.  */
+#define GMON_VERSION 1
+
+/* Fraction of text space to allocate for histogram counters.  */
+#define HISTFRACTION 4
+
+/* Histogram counter type.  */
+#define HISTCOUNTER unsigned short
+
+/* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is
+   based on the minimum number of bytes of separation between two subroutine
+   call points in the object code.  */
+#define HASHFRACTION 4
+
+/* Percent of text space to allocate for tostructs with a minimum.  */
+#define ARCDENSITY 3
+
+/* Minimal amount of arcs.  */
+#define MINARCS 50
+
+/* Rounding macros.  */
+#define ROUNDDOWN(x,y) (((x)/(y))*(y))
+#define ROUNDUP(x,y)   ((((x)+(y)-1)/(y))*(y))
+
+/* Sampling rate in Hertz.  */
+#define SAMPLE_INTERVAL 100
+
+/* Tag definitions for the gmon.out sub headers.  */
+#define GMON_TAG_TIME_HIST 0
+#define GMON_TAG_CG_ARC 1
+
+struct tostruct
+{
+  uintptr_t selfpc;
+  long count;
+  unsigned short link;
+};
+
+struct gmon_hdr
+{
+  char cookie[4];
+  int32_t version;
+  char spare[3 * 4];
+};
+
+struct gmon_hist_hdr
+{
+  uintptr_t low_pc;
+  uintptr_t high_pc;
+  int32_t hist_size;
+  int32_t prof_rate;
+  char dimen[15];
+  char dimen_abbrev;
+} __attribute__ ((packed));
+
+struct rawarc
+{
+  uintptr_t raw_frompc;
+  uintptr_t raw_selfpc;
+  long raw_count;
+} __attribute__ ((packed));
+
+/* start and end of the text section */
+extern char _start;
+extern char _etext;
+
+/* froms are indexing tos */
+static __ea unsigned short *froms;
+static __ea struct tostruct *tos = 0;
+static long tolimit = 0;
+static uintptr_t s_lowpc = 0;
+static uintptr_t s_highpc = 0;
+static unsigned long s_textsize = 0;
+
+static int fd;
+static int hist_size;
+static int timer_id;
+
+void
+__sample (int id)
+{
+  unsigned int pc;
+  unsigned int pc_backup;
+  off_t offset;
+  unsigned short val;
+
+  if (id != timer_id)
+    return;
+
+  /* Fetch program counter.  */
+  pc = spu_read_srr0 () & ~3;
+  pc_backup = pc;
+  if (pc < s_lowpc || pc > s_highpc)
+    return;
+  pc -= (uintptr_t) & _start;
+  offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr)
+             + 1 + sizeof (struct gmon_hist_hdr);
+
+  /* Read, increment and write the counter.  */
+  if (pread (fd, &val, 2, offset) != 2)
+    {
+      perror ("can't read the histogram");
+      return;
+    }
+  if (val < USHRT_MAX)
+    ++val;
+  if (pwrite (fd, &val, 2, offset) != 2)
+    {
+      perror ("can't write the histogram");
+    }
+}
+
+static void
+write_histogram (int fd)
+{
+  struct gmon_hist_hdr hist_hdr;
+  u_char tag = GMON_TAG_TIME_HIST;
+  hist_hdr.low_pc = s_lowpc;
+  hist_hdr.high_pc = s_highpc;
+  hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins.  */
+  hist_hdr.prof_rate = 100; /* Hertz.  */
+  strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
+  hist_hdr.dimen_abbrev = 's';
+  struct iovec iov[2] = {
+    {&tag, sizeof (tag)},
+    {&hist_hdr, sizeof (struct gmon_hist_hdr)}
+  };
+  if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag))
+    perror ("can't write the histogram header");
+
+  /* Skip the already written histogram data.  */
+  lseek (fd, hist_size, SEEK_CUR);
+}
+
+static void
+write_callgraph (int fd)
+{
+  int fromindex, endfrom;
+  uintptr_t frompc;
+  int toindex;
+  struct rawarc rawarc;
+  u_char tag = GMON_TAG_CG_ARC;
+  endfrom = s_textsize / (HASHFRACTION * sizeof (*froms));
+  for (fromindex = 0; fromindex < endfrom; ++fromindex)
+    {
+      if (froms[fromindex])
+	{
+	  frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms));
+	  for (toindex = froms[fromindex]; toindex != 0;
+	       toindex = tos[toindex].link)
+	    {
+	      rawarc.raw_frompc = frompc;
+	      rawarc.raw_selfpc = tos[toindex].selfpc;
+	      rawarc.raw_count = tos[toindex].count;
+	      struct iovec iov[2] = {
+		{&tag, sizeof (tag)},
+		{&rawarc, sizeof (struct rawarc)}
+	      };
+	      if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc))
+                perror ("can't write the callgraph");
+	    }
+	}
+    }
+}
+
+void
+__mcleanup (void)
+{
+  struct gmon_hdr ghdr;
+
+  /* Disable sampling.  */
+  spu_timer_stop (timer_id);
+  spu_timer_free (timer_id);
+  spu_clock_stop ();
+
+  /* Jump to the beginning of the gmon.out file.  */
+  if (lseek (fd, 0, SEEK_SET) == -1)
+    {
+      perror ("Cannot seek to the beginning of the gmon.out file.");
+      close (fd);
+      return;
+    }
+
+  /* Write the gmon.out header.  */
+  memset (&ghdr, '\0', sizeof (struct gmon_hdr));
+  memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie));
+  ghdr.version = GMON_VERSION;
+  if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1)
+    {
+      perror ("Cannot write the gmon header to the gmon.out file.");
+      close (fd);
+      return;
+    }
+
+  /* Write the sampling buffer (histogram).  */
+  write_histogram (fd);
+
+  /* Write the call graph.  */
+  write_callgraph (fd);
+
+  close (fd);
+}
+
+void
+__monstartup (void)
+{
+  s_lowpc =
+    ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER));
+  s_highpc =
+    ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER));
+  s_textsize = s_highpc - s_lowpc;
+
+  hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER);
+
+  /* Allocate froms.  */
+  froms = malloc_ea (s_textsize / HASHFRACTION);
+  if (froms == NULL)
+    {
+      fprintf (stderr, "Cannot allocate ea memory for the froms array.\n");
+      return;
+    }
+  memset_ea (froms, 0, s_textsize / HASHFRACTION);
+
+  /* Determine tolimit.  */
+  tolimit = s_textsize * ARCDENSITY / 100;
+  if (tolimit < MINARCS)
+    tolimit = MINARCS;
+
+  /* Allocate tos. */
+  tos = malloc_ea (tolimit * sizeof (struct tostruct));
+  if (tos == NULL)
+    {
+      fprintf (stderr, "Cannot allocate ea memory for the tos array.\n");
+      return;
+    }
+  memset_ea (tos, 0, tolimit * sizeof (struct tostruct));
+
+  /* Open the gmon.out file.  */
+  fd = open ("gmon.out", O_RDWR | O_CREAT | O_TRUNC, 0644);
+  if (fd == -1)
+    {
+      perror ("can't open gmon.out file");
+      return;
+    }
+  /* Truncate the file up to the size where the histogram fits in.  */
+  if (ftruncate (fd,
+       sizeof (struct gmon_hdr) + 1 + sizeof (struct gmon_hist_hdr) + hist_size) ==
+       -1)
+    perror ("can't truncate the gmon.out file");
+
+  /* Start the histogram sampler.  */
+  spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
+  timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL,
+                              __sample);
+  spu_clock_start ();
+  spu_timer_start (timer_id);
+
+  atexit (__mcleanup);
+}
+
+void
+__mcount_internal (uintptr_t frompc, uintptr_t selfpc)
+{
+  /* sefpc: the address of the function just entered.  */
+  /* frompc: the caller of the function just entered.  */
+  unsigned int mach_stat;
+  __ea unsigned short *frompcindex;
+  unsigned short toindex;
+  __ea struct tostruct *top;
+  __ea struct tostruct *prevtop;
+
+  /* Save current state and disable interrupts.  */
+  mach_stat = spu_readch(SPU_RdMachStat);
+  spu_idisable ();
+
+  /* Sanity checks.  */
+  if (frompc < s_lowpc || frompc > s_highpc)
+    goto done;
+  frompc -= s_lowpc;
+  if (frompc > s_textsize)
+    goto done;
+
+  /* frompc indexes into the froms array the value at that position indexes
+     into the tos array.  */
+  frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))];
+  toindex = *frompcindex;
+  if (toindex == 0)
+    {
+      /* First time traversing this arc link of tos[0] incremented.  */
+      toindex = ++tos[0].link;
+      /* Sanity check.  */
+      if (toindex >= tolimit)
+	{
+	  --tos[0].link;
+	  goto done;
+	}
+      /* Save the index into the froms array for the next time we traverse this arc.  */
+      *frompcindex = toindex;
+      top = &tos[toindex];
+      /* Sets the address of the function just entered.  */
+      top->selfpc = selfpc;
+      top->count = 1;
+      top->link = 0;
+      goto done;
+    }
+
+  /* toindex points to a tostruct */
+  top = &tos[toindex];
+  if (top->selfpc == selfpc)
+    {
+      /* The arc is at front of the chain. This is the most common case.  */
+      top->count++;
+      goto done;
+    }
+
+  /* top->selfpc != selfpc
+     The pc we have got is not the pc we already stored (i.e. multiple function
+     calls to the same fuction within a function. The arc is not at front of
+     the chain.  */
+  for (;;)
+    {
+      if (top->link == 0)
+	{
+	  /* We are at the end of the chain and selfpc was not found. Thus we create
+	     a new tostruct and link it to the head of the chain.  */
+	  toindex = ++tos[0].link;
+	  /* Sanity check.  */
+	  if (toindex >= tolimit)
+	    {
+	      --tos[0].link;
+	      goto done;
+	    }
+	  top = &tos[toindex];
+	  top->selfpc = selfpc;
+	  top->count = 1;
+	  /* Link back to the old tos entry.  */
+	  top->link = *frompcindex;
+	  /* Store a link to the new top in the froms array which makes the
+	     current tos head of the chain.  */
+	  *frompcindex = toindex;
+	  goto done;
+	}
+      else
+	{
+	  /* Otherwise check the next arc on the chain.  */
+	  prevtop = top;
+	  top = &tos[top->link];
+	  if (top->selfpc == selfpc)
+	    {
+	      /* selfpc matches; increment its count.  */
+	      top->count++;
+	      /* Move it to the head of the chain.  */
+	      /* Save previous tos index.  */
+	      toindex = prevtop->link;
+	      /* Link the former to to the current tos.  */
+	      prevtop->link = top->link;
+	      /* Link back to the old tos entry.  */
+	      top->link = *frompcindex;
+	      /* Store a link to the new top in the froms array which makes the
+	         current tos head of the chain.  */
+	      *frompcindex = toindex;
+	      goto done;
+	    }
+	}
+    }
+done:
+  /* Enable interrupts if necessary.  */
+  if (__builtin_expect (mach_stat & 1, 0))
+    spu_ienable ();
+}
Index: src/newlib/libc/machine/spu/spu-mcount.S
===================================================================
--- /dev/null
+++ src/newlib/libc/machine/spu/spu-mcount.S
@@ -0,0 +1,93 @@
+/*
+(C) Copyright IBM Corp. 2008
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of IBM nor the names of its contributors may be
+used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+Author: Ken Werner <ken.werner@de.ibm.com>
+*/
+
+/* _mcount extracts the address of the function just entered and the address
+    of the caller of that function and then calls __mcount_internal. The
+    prologue calls mcount without saving any registers. The return address is
+    stored in $75. The _mcount function has to:
+     - create a new stack frame
+     - save registers $2 to $75 on the stack
+     - copy the two addresses ($0 and $75) into the argument registers $3 and $4
+     - call __mcount_internal
+     - restore registers
+     - return to $75  */
+
+/* The following two convenience macros assist in the coding of the
+   saving and restoring the register.
+
+   saveregs     first, last    Saves registers from first to the last.
+   restoreregs  first, last    Restores registers from last down to first.
+
+   Note:       first must be less than or equal to last.  */
+.macro  saveregs        first, last
+        stqd            $\first, \first*16($SP)
+.if     \last-\first
+        saveregs        "(\first+1)",\last
+.endif
+.endm
+
+.macro  restoreregs     first, last
+        lqd             $\last, \last*16($SP)
+.if     \last-\first
+        restoreregs     \first,"(\last-1)"
+.endif
+.endm
+
+/* _mcount needs to be resident since the overlay manager uses the scratch
+   registers too.  */
+.text
+  .align 3 /* 8 byte alignment.  */
+  .global _mcount
+  .type _mcount, @function
+
+_mcount:
+  stqd $lr, 16($sp)    /* Save link register in the callers stack frame.  */
+  stqd $lr, -1216($sp) /* Store back pointer.  */
+  il   $lr, -1216      /* Push a new stack frame.  */
+  a    $sp, $sp, $lr   /* Frame size: 16 * (74 + 2) = 1216.  */
+
+  /* Save registers $2 to $75 on the stack.  */
+  saveregs 2, 75
+
+  /* Bring the __mcount_internal arguments in place.  */
+  lqd $3, 1232($sp) /* frompc (the link register).  */
+  ori $4, $75, 0    /* selfpc (the gcc prologue puts "brsl $75, _mcount" in
+                       front of every function).  */
+  brsl  $lr, __mcount_internal
+
+  /* Restore register $2 to $75 from the stack.  */
+  restoreregs 2, 75
+
+  il   $lr, 1216
+  a    $sp, $sp, $lr   /* Pop the stack frame.  */
+  lqd  $lr, 16($sp)    /* Restore link register.  */
+  bi   $75             /* Branch to the called function.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]