This is the mail archive of the ecos-patches@sources.redhat.com mailing list for the eCos project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

major update to gprof support


This is a major update, close to a rewrite, of the gprof profiling
support. Among the main changes are:

1) support for callgraph info. This requires a per-architecture
   implementation of mcount(). I'll be providing this for x86 and
   synth in a following patch.

2) full documentation

3) there are now two ways of getting the data to the host. You can
   still use tftp, if the target has an ethernet device and your
   configuration includes TCP/IP. Alternatively you can use a gdb
   macro to dump the data to a file on the host. This macro can be
   found in host/gprof.gdb

Bart

Index: services/profile/gprof/current/ChangeLog
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/ChangeLog,v
retrieving revision 1.6
diff -u -r1.6 ChangeLog
--- services/profile/gprof/current/ChangeLog	27 Apr 2003 21:55:13 -0000	1.6
+++ services/profile/gprof/current/ChangeLog	21 Mar 2005 13:33:30 -0000
@@ -1,3 +1,25 @@
+2005-02-18  Bart Veer  <bartv@ecoscentric.com>
+
+	* doc/profile.sgml, misc/gprof.gdb, host/gprof.gdb,
+	  host/acinclude.m4, host/aclocal.m4, host/configure,
+	  host/configure.in, host/Makefile.am, host/Makefile.in:
+	  Move gprof.gdb to the host subdirectory, and autoconfiscate it.
+
+2003-09-29  Bart Veer  <bartv@ecoscentric.com>
+
+	* src/profile.c (profile_tftp_read_next): fix off-by-one error
+	which lost the first mcount record.
+
+	* doc/profile.sgml: fix typo
+
+	* cdl/profile_gprof.cdl: clarify description of the tftp port
+
+2003-09-26  Bart Veer  <bartv@ecoscentric.com>
+
+	* cdl/profile_gprof.cdl, include/gmon.out.h, include/profile.h,
+ 	src/profile.c, misc/gprof.gdb, doc/gprof.sgml
+	Major update, including adding call graph support.
+
 2003-04-27  Jonathan Larmour  <jifl@eCosCentric.com>
 
 	* src/profile.c (profile_on): Clear profile before use. Reported
Index: services/profile/gprof/current/cdl/profile_gprof.cdl
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/cdl/profile_gprof.cdl,v
retrieving revision 1.3
diff -u -r1.3 profile_gprof.cdl
--- services/profile/gprof/current/cdl/profile_gprof.cdl	24 Feb 2003 14:35:05 -0000	1.3
+++ services/profile/gprof/current/cdl/profile_gprof.cdl	21 Mar 2005 13:33:31 -0000
@@ -42,7 +43,7 @@
 #
 # Author(s):      Gary Thomas
 # Original data:  Gary Thomas
-# Contributors:
+# Contributors:	  Bart Veer
 # Date:           2002-11-14
 #
 #####DESCRIPTIONEND####
@@ -50,45 +51,144 @@
 # ====================================================================
 
 cdl_package CYGPKG_PROFILE_GPROF {
-    display       "Gather runtime profile statistics"
-    requires      CYGPKG_NET
-    requires      CYGPKG_NET_TFTP
-    requires      CYGPKG_MEMALLOC   
-    requires      { CYGINT_PROFILE_HAL_TIMER != 0 }
-    doc           ref/services-profile-gprof.html
+    display     "Runtime profile statistics"
+    requires    CYGPKG_MEMALLOC   
+    requires    { CYGINT_PROFILE_HAL_TIMER != 0 }
+
+    # The profiling package itself must be built without -pg.
+    # Otherwise you can end up with infinite recursion in
+    # mcount().
+    requires	{ is_substr(CYGPKG_PROFILE_GPROF_CFLAGS_REMOVE, " -pg ") }
+    doc         ref/services-profile-gprof.html
 
-    include_dir   cyg/profile
-    
-    compile profile.c
+    include_dir cyg/profile
+    compile	profile.c
 
     description "
         This package enables runtime profiling of an application.
     The actual profile collection must be turned on by the application,
-    once it has been initialized.  The data collected is exported via
-    a TFTP connection to the target."
+    once it has been initialized.  The data collected on the target can
+    be extracted using either a gdb macro or tftp."
 
-    cdl_interface     CYGINT_PROFILE_HAL_TIMER {
+    cdl_interface CYGINT_PROFILE_HAL_TIMER {
         display   "High resolution timer, implemented by platform"
         description "
 	  Profiling requires access to a high resolution timer which
           is platform dependent."
     }
 
-    cdl_option CYGNUM_PROFILE_TFTP_PORT {
-        display       "Port used by TFTP server for profile data"
-        flavor        data
-        default_value 0
-        description   "
-            This option sets the port number to use for the TFTP server
-            which exports the profiling data.  A value of 0 will set
-            the port to be the IETF standard port of 69/udp."
+    cdl_interface CYGINT_PROFILE_HAL_MCOUNT {
+	display   "HAL supports call-graph profiling"
+	description "
+          On some platforms the HAL may support an mcount() call. This
+          allows the profiling package to gather additional information,
+          used by gprof to determine a call graph."
     }
 
-    cdl_option CYGPKG_PROFILE_TESTS {
-        display "Profiling tests"
-        flavor  data   
-        no_define      
-        calculated { "tests/profile.c" }
+    cdl_component CYGPKG_PROFILE_CALLGRAPH {
+	display   	"Collect call-graph profiling data"
+	active_if 	CYGINT_PROFILE_HAL_MCOUNT
+	default_value	1
+	description "
+          There are several different types of profiling information
+          that can be gathered. One type is used by gprof to determine
+          the application callgraph. This is not required but can be used
+          to gain additional insight in the application's behaviour,
+          at the cost of increased overheads in gathering the data. It
+          requires that application code be compiled with the -pg flag,
+          and it also requires HAL support that may not be available
+          on all targets."
+
+	cdl_option CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT {
+	    display		"Control size of call-graph hash table"
+	    flavor		data
+	    default_value	8
+	    legal_values	1 to 16
+	    description "
+              The call graph code uses a hash table to map program counter
+              addresses into an array index. The hash function is a simple
+              shift operation, so the size of the hash table depends on
+              the amount of code being profiled and the shift size. With
+              a default value of 8 the hash table needs one 32-bit integer
+              for every 256 bytes of code. Increasing the shift size reduces
+              memory requirements at the cost of extra run-time processing
+              to handle hash collisions."
+	}
+
+	cdl_option CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE {
+	    display		"Control size of arc table"
+	    flavor		data
+	    default_value	5
+	    legal_values	1 to 100
+	    description "
+              Determining the call graph requires keeping track of where
+              every function is called from, in an arc table. The size of
+              this table is calculated as a percentage of the amount of
+              code being profiled. The default value of 5% is appropriate
+              for typical applications."
+	}
+    }
+    
+    cdl_component CYGPKG_PROFILE_TFTP {
+	display	    	"Allow the profile data to be collected by tftp"
+	default_value	1
+	active_if	CYGPKG_NET
+	requires    	CYGPKG_NET
+	requires    	CYGPKG_NET_TFTP
+	description "
+          There are two supported ways of transferring the profile data
+          from the target to the host. The first is to use a gdb macro
+          inside a gdb session, dumping the data directly from memory
+          to a file gmon.out. Usually this is the most convenient
+          approach. However it does require a gdb session and it can
+          be slow when debugging over e.g. a serial line. Alternatively
+          the profile package can provide a tftp daemon, allowing other
+          machines to retrieve the data over ethernet."
+	
+	cdl_option CYGNUM_PROFILE_TFTP_PORT {
+	    display       "Port used by TFTP server for profile data"
+	    flavor        data
+	    default_value 69
+	    description   "
+                By default the profiling package will create a tftp daemon
+                that listens on port 69, the standard port for this service.
+                If there are other packages that start a tftp daemon, or
+                if application code wants to provide this service, then
+                there is a conflict: each daemon requires a unique port.
+                This configuration option can be used to change the port
+                used by the profiling tftp daemon."
+        }
+    }
+    
+    cdl_component CYGPKG_PROFILE_GPROF_OPTIONS {
+        display "Profiling build options"
+        flavor  none
+        description   "
+	    Package specific build options including control over
+	    compiler flags used only in building this package,
+	    and details of which tests are built."
+
+
+        cdl_option CYGPKG_PROFILE_GPROF_CFLAGS_ADD {
+            display "Additional compiler flags"
+            flavor  data
+            no_define
+            default_value { "" }
+            description   "
+                This option modifies the set of compiler flags for
+                building this package. These flags are used in addition
+                to the set of global flags."
+        }
+
+        cdl_option CYGPKG_PROFILE_GPROF_CFLAGS_REMOVE {
+            display "Suppressed compiler flags"
+            flavor  data
+            no_define
+            default_value { "-pg" }
+            description   "
+                This option modifies the set of compiler flags for
+                building this package. These flags are removed from
+                the set of global flags if present."
+        }
     }
 }
-
Index: services/profile/gprof/current/doc/cray.xbm
===================================================================
RCS file: services/profile/gprof/current/doc/cray.xbm
diff -N services/profile/gprof/current/doc/cray.xbm
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/doc/cray.xbm	21 Mar 2005 13:33:31 -0000
@@ -0,0 +1,38 @@
+#define noname_width 64
+#define noname_height 64
+static char noname_bits[] = {
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xe7,0xe3,0x1c,
+ 0x00,0x00,0x00,0x00,0xbc,0xff,0xff,0x1f,0x00,0x00,0x00,0x00,0xfc,0xff,0xff,
+ 0xff,0x03,0x00,0x00,0x00,0xbc,0x61,0x30,0xf8,0x03,0x00,0x00,0x00,0xb4,0x61,
+ 0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,
+ 0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,
+ 0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,
+ 0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,
+ 0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,
+ 0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,
+ 0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,
+ 0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,
+ 0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,
+ 0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,
+ 0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,
+ 0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,
+ 0x00,0x00,0xb4,0x61,0x30,0x18,0x02,0x00,0x00,0x00,0xb4,0x61,0x30,0x18,0x02,
+ 0x00,0x00,0xe0,0xb7,0x61,0x30,0x18,0x02,0x00,0x00,0xfc,0xb7,0x61,0x30,0x18,
+ 0x0e,0x00,0x80,0xff,0xb7,0x61,0x30,0x18,0x7e,0x00,0xe0,0xff,0xb7,0x61,0x30,
+ 0x18,0xfe,0x03,0xf0,0xff,0xbf,0x61,0x30,0xf8,0xff,0x0f,0xf8,0xff,0xbf,0x61,
+ 0x30,0xf8,0xff,0x3f,0xfc,0xff,0xff,0x63,0x30,0x18,0x00,0x20,0xfc,0xff,0xff,
+ 0xff,0x30,0x1e,0x00,0x20,0xfc,0xff,0xff,0xff,0xff,0x3f,0x00,0x20,0xf4,0xff,
+ 0xff,0xff,0xff,0x7f,0x00,0x20,0xe4,0xff,0xff,0xff,0xff,0xff,0x00,0x20,0x44,
+ 0xff,0xff,0xff,0xff,0xff,0x01,0x20,0x44,0xfc,0xff,0xff,0xff,0xff,0x03,0x20,
+ 0x44,0xf8,0xff,0xff,0xff,0xff,0x07,0x20,0x44,0x10,0xff,0xff,0xff,0xff,0x0f,
+ 0x20,0x44,0x10,0xe0,0xff,0xff,0x07,0x08,0x20,0x44,0x10,0x20,0x00,0x02,0x00,
+ 0x08,0x20,0x44,0x10,0x20,0x00,0x02,0x00,0xf8,0x3f,0x44,0x10,0x20,0x00,0x02,
+ 0x00,0xf8,0x3f,0x4c,0x10,0x20,0x00,0x02,0x00,0x08,0x00,0x58,0x10,0x20,0x00,
+ 0x02,0x00,0x08,0x00,0x70,0x10,0x20,0x00,0x02,0x00,0x08,0x00,0xe0,0x10,0x20,
+ 0x00,0x02,0x00,0x08,0x00,0xc0,0x13,0x20,0x00,0x02,0x00,0x08,0x00,0x80,0x1f,
+ 0x20,0x00,0x02,0x00,0x08,0x00,0x00,0xfe,0x21,0x00,0x02,0x00,0x08,0x00,0x00,
+ 0xf0,0xff,0x00,0x02,0xe0,0x0f,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0x0f,0x00,
+ 0x00,0x00,0x80,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+ 0x00,0x00};
Index: services/profile/gprof/current/doc/gprofrun.fig
===================================================================
RCS file: services/profile/gprof/current/doc/gprofrun.fig
diff -N services/profile/gprof/current/doc/gprofrun.fig
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/doc/gprofrun.fig	21 Mar 2005 13:33:32 -0000
@@ -0,0 +1,66 @@
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter  
+100.00
+Single
+-2
+1200 2
+0 32 #c6b797
+0 33 #eff8ff
+0 34 #dccba6
+0 35 #404040
+0 36 #808080
+0 37 #c0c0c0
+0 38 #e0e0e0
+0 39 #8e8f8e
+0 40 #aaaaaa
+0 41 #555555
+0 42 #bebebe
+0 43 #515151
+0 44 #e7e3e7
+0 45 #000049
+0 46 #797979
+0 47 #303430
+0 48 #414141
+0 49 #c7b696
+0 50 #8e8e8e
+0 51 #414541
+0 52 #8e8e8e
+6 300 1500 1500 2475
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 300 2303 300 1500 1500 1500 1500 2188
+3 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 4
+	 1500 2188 1200 2131 700 2418 300 2303
+	 0.000 -1.000 -1.000 0.000
+-6
+6 4800 1500 6000 2475
+2 1 0 1 -1 -1 0 0 -1 0.000 0 0 -1 0 0 4
+	 4800 2303 4800 1500 6000 1500 6000 2188
+3 2 0 1 -1 -1 0 0 -1 0.000 0 0 0 4
+	 6000 2188 5700 2131 5200 2418 4800 2303
+	 0.000 -1.000 -1.000 0.000
+-6
+6 6600 1500 7800 2400
+2 3 0 1 0 0 0 0 -1 0.000 0 0 -1 0 0 5
+	 6600 1500 7800 1500 7800 2400 6600 2400 6600 1500
+-6
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 1500 1950 2100 1950
+2 5 0 2 0 -1 50 0 -1 0.000 0 0 -1 0 0 5
+	0 cray.xbm
+	 2100 1200 3956 1200 3956 2850 2100 2850 2100 1200
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 4200 1950 4800 1950
+2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
+	1 1 1.00 60.00 120.00
+	 6000 1950 6600 1950
+4 0 0 50 0 0 16 0.0000 4 165 495 600 1950 hello\001
+4 0 0 50 0 0 14 0.0000 4 180 795 4950 1950 gmon.out\001
+4 0 0 50 0 0 16 0.0000 4 225 540 6825 1950 gprof\001
+4 0 0 50 0 0 16 0.0000 4 165 915 2625 3225 hardware\001
+4 0 0 50 0 0 16 0.0000 4 165 1035 375 3225 executable\001
+4 0 0 50 0 0 16 0.0000 4 225 1350 4725 3225 profiling data\001
Index: services/profile/gprof/current/doc/gprofrun.png
===================================================================
RCS file: services/profile/gprof/current/doc/gprofrun.png
diff -N services/profile/gprof/current/doc/gprofrun.png
Binary files /dev/null and gprofrun.png differ
Index: services/profile/gprof/current/doc/profile.sgml
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/doc/profile.sgml,v
retrieving revision 1.2
diff -u -r1.2 profile.sgml
--- services/profile/gprof/current/doc/profile.sgml	15 Nov 2002 18:36:27 -0000	1.2
+++ services/profile/gprof/current/doc/profile.sgml	21 Mar 2005 13:33:37 -0000
@@ -1,98 +1,561 @@
-<PART ID="services-profile-gprof">
-<TITLE>Application profiling</TITLE>
-<PARTINTRO>
-<PARA>
-The profile_gprof package provides a mechanism to measure the
-runtime performance of an application.  This is done by gathering
-an execution histogram.
-</PARA>
-<para>
-When profiling is started on the target device, a 
-<function>TFTP</function> 
-server will be started
-which exports the single file
-<filename>PROFILE.DAT</filename>
-This analysis data can then be fetched
-by connecting to the target with a <function>TFTP</function> 
-client program
-and then be processed by the 
-<function>gprof</function>
-utility program.
-<note><title>NOTE</title>
-<para>
-Be sure and specify binary mode transfers for this data file,
-which may not be the default with on some <function>TFTP</function>
-client programs.
-</para>
-</note>
-<note><title>NOTE</title>
-<para>
-The port used for this <function>TFTP</function> server is
-configurable.  The default will be the IETF standard port
-of 69/UDP, but it may be changed to any UDP port via the
-<literal>CYGNUM_PROFILE_TFTP_PORT</literal> CDL option.
-</para>
-</note>
-</para>
-</PARTINTRO>
-<CHAPTER id="profile-functions">
-<TITLE>Profiling functions</TITLE>
-<SECT1 id="services-profile-api">
-<title> API </title>
-<para>
-In order for profile data to be gathered for an application, the
-program has to initiate the process.
-Once started, execution histogram data will be collected in a
-dynamic memory buffer.
-This data can be uploaded to a host using <emphasis>TFTP</emphasis>.
-A side effect of the upload of the data is that the histogram
-is reset.
-This is useful, especially for high resolution histograms, since
-the histogram data are collected as 16-bit counters which can be quickly
-saturated.
-For example, if the histogram is being collected at a rate of 10,000
-samples per second, a hot spot in the program could saturate after
-only 6.5 seconds.
-</para>
-<para> The API for the application profiling functions can be
-found in the file <filename>&lt;cyg/profile/profile.h&gt;</filename>.
-</para>
-<sect2 id="services-profile-api-profile-on">
-<title>profile_on</title>
-<para>
-This function is used to initiate the gathering of the
-runtime execution histogram data.
-</para>
-<programlisting>
-void profile_on(void *start, void *end, int bucket_size, int resolution);
-</programlisting>
-<para>
-Calling this function will initiate execution profiling.
-An execution histogram is collected at the rate of
-<parameter>resolution</parameter> times per second.
-The area between <parameter>start</parameter> and <parameter>end</parameter>
-will be divided up into a number of buckets, each representing 
-<parameter>bucket_size</parameter> 
-program bytes in length.  Using statistical sampling (via a high speed timer), when
-the program counter is found to be within the range 
-<parameter>start</parameter>..<parameter>end</parameter>, the appropriate
-bucket (histogram entry) will be incremented.
-</para>
-<para>
-The choice of <parameter>resolution</parameter> and <parameter>bucket_size</parameter>
-control how large the data gathered will be, as well as how much overhead is 
-encumbered for gathering the histogram.
-Smaller values for <parameter>bucket_size</parameter> will garner better
-results (<function>gprof</function> can more closely align the data with
-actual function names) at the expense of a larger data buffer.
-</para>
-<note><title>NOTE</title>
-<para>
-The value of <parameter>bucket_size</parameter> will be rounded up to a power of two.
-</para>
-</note>
-</sect2>
-</SECT1>
-</CHAPTER>
-</PART>
+<!-- DOCTYPE part  PUBLIC "-//OASIS//DTD DocBook V3.1//EN" -->
 
+<!-- {{{ Banner                         -->
+
+<!-- =============================================================== -->
+<!--                                                                 -->
+<!--     profile.sgml                                                -->
+<!--                                                                 -->
+<!--     gprof profiling documentation.                              -->
+<!--                                                                 -->
+<!-- =============================================================== -->
+<!-- ####COPYRIGHTBEGIN####                                          -->
+<!--                                                                 -->
+<!-- =============================================================== -->
+<!-- Copyright (C) 2003, 2005 eCosCentric Ltd.                       -->
+<!-- This material may be distributed only subject to the terms      -->
+<!-- and conditions set forth in the Open Publication License, v1.0  -->
+<!-- or later (the latest version is presently available at          -->
+<!-- http://www.opencontent.org/openpub/)                            -->
+<!-- Distribution of the work or derivative of the work in any       -->
+<!-- standard (paper) book form is prohibited unless prior           -->
+<!-- permission obtained from the copyright holder                   -->
+<!-- =============================================================== -->
+<!--                                                                 -->      
+<!-- ####COPYRIGHTEND####                                            -->
+<!-- =============================================================== -->
+<!-- =============================================================== -->
+<!-- #####DESCRIPTIONBEGIN####                                       -->
+<!--                                                                 -->
+<!-- Author(s):   bartv                                              -->
+<!-- Date:        2003/09/01                                         -->
+<!-- Version:     0.01                                               -->
+<!--                                                                 -->
+<!-- ####DESCRIPTIONEND####                                          -->
+<!-- =============================================================== -->
+
+<!-- }}} -->
+
+<part id="services-profile-gprof"><title>gprof Profiling Support</title> 
+
+<refentry id="gprof">
+  <refmeta>
+    <refentrytitle>Profiling</refentrytitle>
+  </refmeta>
+  <refnamediv>
+    <refname><varname>CYGPKG_PROFILE_GPROF</varname></refname>
+    <refpurpose>eCos Support for the gprof profiling tool</refpurpose>
+  </refnamediv>
+
+  <refsect1 id="gprof-description"><title>Description</title>
+    <para>
+The GNU gprof tool provides profiling support. After a test run it can
+be used to find where the application spent most of its time, and that
+information can then be used to guide optimization effort. Typical
+gprof output will look something like this:
+    </para>
+    <screen>
+Each sample counts as 0.003003 seconds.
+  %   cumulative   self              self     total           
+ time   seconds   seconds    calls  us/call  us/call  name    
+ 14.15      1.45     1.45   120000    12.05    12.05  Proc_7
+ 11.55      2.63     1.18   120000     9.84     9.84  Func_1
+  8.04      3.45     0.82                             main
+  7.60      4.22     0.78    40000    19.41    86.75  Proc_1
+  6.89      4.93     0.70    40000    17.60    28.99  Proc_6
+  6.77      5.62     0.69    40000    17.31    27.14  Func_2
+  6.62      6.30     0.68    40000    16.92    16.92  Proc_8
+  5.94      6.90     0.61                             strcmp
+  5.58      7.47     0.57    40000    14.26    26.31  Proc_3
+  5.01      7.99     0.51    40000    12.79    12.79  Proc_4
+  4.46      8.44     0.46    40000    11.39    11.39  Func_3
+  3.68      8.82     0.38    40000     9.40     9.40  Proc_5
+  3.32      9.16     0.34    40000     8.48     8.48  Proc_2
+&hellip;
+    </screen>
+    <para>
+This output is known as the flat profile. The data is obtained by
+having a hardware timer generate regular interrupts. The interrupt
+handler stores the program counter of the interrupted code. gprof
+performs a statistical analysis of the resulting data and works out
+where the time was spent.
+    </para>
+    <para>
+gprof can also provide information about the call graph, for example:
+    </para>
+    <screen>
+index % time    self  children    called     name
+&hellip;
+                0.78    2.69   40000/40000       main [1]
+[2]     34.0    0.78    2.69   40000         Proc_1 [2]
+                0.70    0.46   40000/40000       Proc_6 [5]
+                0.57    0.48   40000/40000       Proc_3 [7]
+                0.48    0.00   40000/120000      Proc_7 [3]
+    </screen>
+    <para>
+This shows that function <function>Proc_1</function> was called only
+from <function>main</function>, and <function>Proc_1</function> in
+turn called three other functions. Callgraph information is obtained
+only if the application code is compiled with the <option>-pg</option>
+option. This causes the compiler to insert extra code into each
+compiled function, specifically a call to <function>mcount</function>,
+and the implementation of <function>mcount</function> stores away the
+data for subsequent processing by gprof.
+    </para>
+    <caution><para>
+There are a number of reasons why the output will not be 100%
+accurate. Collecting the flat profile typically involves timer
+interrupts so any code that runs with interrupts disabled will not
+appear. The current host-side gprof implementation maps program
+counter values onto symbols using a bin mechanism. When a bin spans
+the end of one function and the start of the next gprof may report the
+wrong function. This is especially likely on architectures with
+single-byte instructions such as an x86. When examining gprof output
+it may prove useful to look at a linker map or program disassembly.
+    </para></caution>
+    <para>
+The eCos profiling package requires some additional support from the
+HAL packages, and this may not be available on all platforms:
+    </para>
+    <orderedlist>
+      <listitem><para>
+There must be an implementation of the profiling timer. Typically this
+is provided by the variant or platform HAL using one of the hardware
+timers. If there is no implementation then the configuration tools
+will report an unresolved conflict related to
+<varname>CYGINT_PROFILE_HAL_TIMER</varname> and profiling is not
+possible. Some implementations overload the system clock, which means
+that profiling is only possible in configurations containing the eCos
+kernel and <varname>CYGVAR_KERNEL_COUNTERS_CLOCK</varname>.
+      </para></listitem>
+      <listitem><para>
+There should be a hardware-specific implementation of
+<function>mcount</function>, which in turn will call the generic
+functionality provided by this package. It is still possible to do
+some profiling without <function>mcount</function> but the resulting
+data will be less useful. To check whether or not
+<function>mcount</function> is available, look at the current value of
+the CDL interface <varname>CYGINT_PROFILE_HAL_MCOUNT</varname> in the
+graphical configuration tool or in an <filename>ecos.ecc</filename>
+save file.
+      </para></listitem>
+    </orderedlist>
+    <para>
+This document only describes the eCos profiling support. Full details
+of gprof functionality and output formats can be found in the gprof
+documentation. However it should be noted that that documentation
+describes some functionality which cannot be implemented using current
+versions of the gcc compiler: the section on annotated source listings
+is not relevant, and neither are associated command line options like
+<option>-A</option> and <option>-y</option>.
+    </para>
+  </refsect1>
+
+  <refsect1 id="gprof-process"><title>Building Applications for Profiling</title>
+    <para>
+To perform application profiling the gprof package
+<varname>CYGPKG_PROFILE_GPROF</varname> must first be added to the
+eCos configuration. On the command line this can be achieved using:
+    </para>
+    <screen>
+$ ecosconfig add profile_gprof
+$ ecosconfig tree
+$ make
+    </screen>
+    <para>
+Alternatively the same steps can be performed using the graphical
+configuration tool.
+    </para>
+    <para>
+If the HAL packages implement <function>mcount</function> for the
+target platform then usually application code should be compiled with
+<option>-pg</option>. Optionally eCos itself can also be compiled with
+this option by modifying the configuration option
+<varname>CYGBLD_GLOBAL_CFLAGS</varname>. Compiling with
+<option>-pg</option> is optional but gives more complete profiling
+data.
+    </para>
+    <note><para>
+The profiling package itself must not be compiled with
+<option>-pg</option> because that could lead to infinite recursion
+when doing <function>mcount</function> processing. This is handled
+automatically by the package's CDL.
+    </para></note>
+    <para>
+Profiling does not happen automatically. Instead it must be started
+explicitly by the application, using a call to
+<function>profile_on</function>. A typical example would be:
+    </para>
+    <programlisting>
+#include &lt;pkgconf/system.h&gt;
+#ifdef CYGPKG_PROFILE_GPROF
+# include &lt;cyg/profile/profile.h&gt;
+#endif
+&hellip;
+int
+main(int argc, char** argv)
+{
+    &hellip;
+#ifdef CYGPKG_PROFILE_GPROF
+    {
+        extern char _stext[], _etext[];
+        profile_on(_stext, _etext, 16, 3500);
+    }
+#endif
+    &hellip;
+}
+    </programlisting>
+    <para>
+The <function>profile_on</function> takes four arguments:
+    </para>
+    <variablelist>
+      <varlistentry>
+        <term><literal>start address</literal></term>
+        <term><literal>end address</literal></term>
+        <listitem><para>
+These specify the range of addresses that will be profiled. Usually
+profiling should cover the entire application. On most targets the
+linker script will export symbols <varname>_stext</varname> and
+<varname>_etext</varname> corresponding to the beginning and end of
+code, so these can be used as the addresses. It is possible to
+perform profiling on a subset of the code if that code is
+located contiguously in memory.
+        </para></listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><literal>bucket size</literal></term>
+        <listitem><para>
+<function>profile_on</function> divides the range of addresses into a
+number of buckets of this size. It then allocates a single array of
+16-bit counters with one entry for each bucket. When the profiling
+timer interrupts the interrupt handler will examine the program
+counter of the interrupted code and, assuming it is within the range
+of valid addresses, find the containing bucket and increment the
+appropriate counter.
+        </para>
+        <para>
+The size of the array counters is determined by the range of addresses
+being profiled and by the bucket size. For a bucket size of 16, one
+counter is needed for every 16 bytes of code. For an application with
+say 512K of code that means dynamically allocating a 64K array. If the
+target hardware is low on memory then this may be unacceptable, and
+the requirements can be reduced by increasing the bucket size. However
+this will affect the accuracy of the results and gprof is more likely
+to report the wrong function. It also increases the risk of a counter
+overflow.
+        </para>
+        <para>
+For the sake of run-time efficiency the bucket size must be a power of
+2, and it will be adjusted if necessary.
+        </para></listitem>
+      </varlistentry>
+      <varlistentry>
+        <term><literal>time interval</literal></term>
+        <listitem><para>
+The final argument specifies the interval between profile timer
+interrupts, in units of microseconds. Increasing the interrupt
+frequency gives more accurate profiling results, but at the cost of
+higher run-time overheads and a greater risk of a counter overflow.
+The HAL package may modify this interval because of hardware
+restrictions, and the generated profile data will contain the actual
+interval that was used. Usually it is a good idea to use an interval
+that is not a simple fraction of the system clock, typically 10000
+microseconds. Otherwise there is a risk that the profiling timer will
+disproportionally sample code that runs only in response to the system
+clock.
+        </para></listitem>
+      </varlistentry>
+    </variablelist>
+    <para>
+If the eCos configuration includes a TCP/IP stack and if a tftp daemon
+will be used to <link linkend="gprof-extract">extract</link> the data
+from the target then the call to <function>profile_on</function>
+should happen after the network is up. <filename>profile_on</filename>
+will attempt to start a tftp daemon thread, and this will fail if
+networking has not yet been enabled.
+    </para>
+    <programlisting>
+int
+main(int argc, char** argv)
+{
+    &hellip;
+    init_all_network_interfaces();
+    &hellip;
+#ifdef CYGPKG_PROFILE_GPROF
+    {
+        extern char _stext[], _etext[];
+        profile_on(_stext, _etext, 16, 3000);
+    }
+#endif
+    &hellip;
+}
+    </programlisting>
+    <para>
+The application can then be linked and run as usual.
+    </para>
+    <informalfigure PgWide=1>
+      <mediaobject>
+        <imageobject>
+          <imagedata fileref="gprofrun.png" Scalefit=1 Align="Center">
+        </imageobject>
+      </mediaobject>
+    </informalfigure>
+    <para>
+When gprof is used for native development rather than for embedded
+targets the profiling data will automatically be written out to a file
+<filename>gmon.out</filename> when the program exits. This is not
+possible on an embedded target because the code has no direct access
+to the host's file system. Instead the <filename>gmon.out</filename>
+file has to be <link linkend="gprof-extract">extracted</link> from
+the target as described below. gprof can then be invoked normally:
+    </para>
+    <screen>
+$ gprof dhrystone
+Flat profile:
+ 
+Each sample counts as 0.003003 seconds.
+  %   cumulative   self              self     total
+ time   seconds   seconds    calls  us/call  us/call  name
+ 14.15      1.45     1.45   120000    12.05    12.05  Proc_7
+ 11.55      2.63     1.18   120000     9.84     9.84  Func_1
+  8.04      3.45     0.82                             main
+&hellip;
+    </screen>
+    <para>
+If <filename>gmon.out</filename> does not contain call graph data,
+either because <function>mcount</function> is not supported or because
+this functionality was explicitly disabled, then the
+<option>-no-graph</option> must be used.
+    </para>
+    <screen>
+$ gprof --no-graph dhrystone
+Flat profile:
+ 
+Each sample counts as 0.003003 seconds.
+  %   cumulative   self              self     total
+ time   seconds   seconds    calls  us/call  us/call  name
+ 14.15      1.45     1.45                             Proc_7
+ 11.55      2.63     1.18                             Func_1
+  8.04      3.45     0.82                             main
+&hellip;
+    </screen>
+  </refsect1>
+
+  <refsect1 id="gprof-extract"><title>Extracting the Data</title>
+    <para>
+By default gprof expects to find the profiling data in a file
+<function>gmon.out</function> in the current directory. This package
+provides two ways of extracting data: a gdb macro or tftp transfers.
+Using tftp is faster but requires a TCP/IP stack on the target. It
+also consumes some additional target-side resources, including an
+extra tftp daemon thread and its stack. The gdb macro can be used even
+when the eCos configuration does not include a TCP/IP stack. However
+it is much slower, typically taking tens of seconds to retrieve all
+the data for a non-trivial application.
+    </para>
+    <para>
+The gdb macro is called <command>gprof_dump</command>, and can be
+found in the file <filename>gprof.gdb</filename> in the <filename
+class="directory">host</filename> subdirectory of this package. A
+typical way of using this macro is:
+    </para>
+    <screen>
+(gdb) source &lt;repo&gt;/services/profile/gprof/&lt;version&gt;/host/gprof.gdb
+(gdb) gprof_dump
+    </screen>
+    <para>
+This macro can be used any time after the call to
+<function>profile_on</function>. It will store the profiling data
+accumulated so far to the file <filename>gmon.out</filename> in the
+current directory, and then reset all counts. gprof uses only a 16 bit
+counter for every bucket of code. These counters can easily saturate
+if the profiling run goes on for a long time, or if the application
+code spends nearly all its time in just a few tight inner loops. The
+counters will not actually wrap around back to zero, instead they will
+stick at 0xFFFF, but this will still affect the accuracy of the gprof
+output. Hence it is desirable to reset the counters once the profiling
+data has been extracted.
+    </para>
+    <para>
+The file <filename>gprof.gdb</filename> contains two other macros
+which may prove useful. <command>gprof_fetch</command> extracts the
+profiling data and generates the file <filename>gmon.out</filename>,
+but does not reset the counters. <command>gprof_reset</command> only
+resets the counters, without extracting the data or overwriting
+<filename>gmon.out</filename>.
+    </para>
+    <para>
+If the configuration includes a TCP/IP stack then the profiling data
+can be extracted using tftp instead. There are two relevant
+configuration options. <varname>CYGPKG_PROFILE_TFTP</varname>
+controls whether or not tftp is supported. It is enabled by default if
+the configuration includes a TCP/IP stack, but can be disabled to save
+target-side resources.
+<varname>CYGNUM_PROFILE_TFTP_PORT</varname> controls the UDP
+port which will be used. This port cannot be shared with other tftp
+daemons. If neither application code nor any other package (for
+example the gcov test coverage package) provides a tftp service then
+the default port can be used. Otherwise it will be necessary to assign
+unique ports to each daemon.
+    </para>
+    <para>
+If enabled the tftp daemon will be started automatically by
+<function>profile_on</function>. This should only happen once the
+network is up and running, typically after the call to
+<function>init_all_network_interfaces</function>.
+    </para>
+    <para>
+The data can then be retrieved using a standard tftp client. There are
+a number of such clients available with very different interfaces, but
+a typical session might look something like this:
+    </para>
+    <screen>
+$ tftp
+tftp&gt; connect 10.1.1.134
+tftp&gt; binary
+tftp&gt; get gmon.out
+Received 64712 bytes in 0.9 seconds
+tftp&gt; quit
+    </screen>
+    <para>
+The address <literal>10.1.1.134</literal> should be replaced with the
+target's IP address. Extracting the profiling data by tftp will
+automatically reset the counters.
+    </para>
+  </refsect1>
+
+  <refsect1 id="gprof-configuration"><title>Configuration Options</title>
+    <para>
+This package contains a number of configuration options. Two of these,
+<varname>CYGPKG_PROFILE_TFTP</varname> and
+<varname>CYGNUM_PROFILE_TFTP_PORT</varname>, related to support for
+<link linkend="gprof-extract">tftp transfers</link> and have already
+been described.
+    </para>
+    <para>
+Support for collecting the call graph data via
+<function>mcount</function> is optional and can be controlled via
+<varname>CYGPKG_PROFILE_CALLGRAPH</varname>. This option will only be
+active if the HAL provides the underlying <function>mcount</function>
+support and implements <varname>CYGINT_PROFILE_HAL_MCOUNT</varname>.
+The call graph data allows gprof to produce more useful output, but at
+the cost of extra run-time and memory overheads. If this option is
+disabled then the <option>-pg</option> compiler flag should not be used.
+    </para>
+    <para>
+If <varname>CYGPKG_PROFILE_CALLGRAPH</varname> is enabled then there
+are two further options which can be used to control memory
+requirements. Collecting the data requires two blocks of memory, a
+simple hash table and an array of arc records. The
+<function>mcount</function> code uses the program counter address to
+index into the hash table, giving the first element of a singly linked
+list. The array of arc records contains the various linked lists for
+each hash slot. The required number of arc records depends on the
+number of function calls in the application. For example if a function
+<function>Proc_7</function> is called from three different places in
+the application then three arc records will be needed.
+    </para>
+    <para>
+<varname>CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT</varname> controls the
+size of the hash table. The default value of 8 means that the program
+counter is shifted right by eight places to give a hash table index.
+Hence each hash table slot corresponds to 256 bytes of code, and for
+an application with say 512K of code <filename>profile_on</filename>
+will dynamically allocate an 8K hash table. Increasing the shift size
+reduces the memory requirement, but means that each hash table slot
+will correspond to more code and hence <function>mcount</function>
+will need to traverse a longer linked list of arc records.
+    </para>
+    <para>
+<varname>CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE</varname> controls
+how much memory <function>profile_on</function> will allocate for the
+arc records. This uses a simple heuristic, a percentage of the overall
+code size. By default the amount of arc record space allocated will be
+5% of the code size, so for a 512K executable that requires
+approximately 26K. This default should suffice for most applications.
+In exceptional cases it may be insufficient and a diagnostic will be
+generated when the profiling data is extracted.
+    </para>
+  </refsect1>
+
+  <refsect1 id="gprof-hal"><title>Implementing the HAL Support</title>
+    <para>
+The profiling package requires two lots of HAL support: a function
+<function>hal_enable_profile_timer</function> and an implementation
+of <function>mcount</function>. The profile timer is required.
+Typically it will be implemented by the variant or platform HAL
+using a spare hardware timer, and that HAL package will also
+implement the CDL interface
+<varname>CYGINT_PROFILE_HAL_TIMER</varname>. Support for
+<function>mcount</function> is optional but very desirable. Typically
+it will be implemented by the architectural HAL, which will also
+implement the CDL interface
+<varname>CYGINT_PROFILE_HAL_MCOUNT</varname>. 
+    </para>
+    <programlisting>
+#include &lt;pkgconf/system.h&gt;
+#ifdef CYGPKG_PROFILE_GPROF
+# include &lt;cyg/profile/profile.h&gt;
+#endif
+
+int
+hal_enable_profile_timer(int resolution)
+{
+    &hellip;
+    return actual_resolution;
+}
+    </programlisting>
+    <para>
+This function takes a single argument, a time interval in
+microseconds. It should arrange for a timer interrupt to go off
+after every interval. The timer VSR or ISR should then determine the
+program counter of the interrupted code and register this with the
+profiling package:
+    </para>
+    <programlisting>
+    &hellip;
+    __profile_hit(interrupted_pc);
+    &hellip;
+    </programlisting>
+    <para>
+The exact details of how this is achieved, especially obtaining the
+interrupted PC, are left to the HAL implementor. The HAL is allowed to
+modify the requested time interval because of hardware constraints,
+and should return the interval that is actually used.
+    </para>
+    <para>
+<function>mcount</function> can be more difficult. The calls to
+<function>mcount</function> are generated internally by the compiler
+and the details depend on the target architecture. In fact
+<function>mcount</function> may not use the standard calling
+conventions at all. Typically implementing <function>mcount</function>
+requires looking at the code that is actually generated, and possibly
+at the sources of the appropriate compiler back end.
+    </para>
+    <para>
+The HAL <function>mcount</function> function should call into the
+profiling package using standard calling conventions:
+    </para>
+    <programlisting>
+    &hellip;
+    __profile_mcount((CYG_ADDRWORD) caller_pc, (CYG_ADDRWORD) callee_pc);
+    &hellip;
+    </programlisting>
+    <para>
+If <function>mcount</function> was invoked because
+<function>main</function> called <function>Proc_1</function> then the
+caller pc should be an address inside <function>main</function>,
+typically corresponding to the return location, and the callee pc
+should be an address inside <function>Proc_1</function>, usually near
+the start of the function.
+    </para>
+    <para>
+For some targets the compiler does additional work, for example
+automatically allocating a per-function word of memory to eliminate
+the need for the hash table. This is too target-specific and hence
+cannot easily be used by the generic profiling package.
+    </para>
+  </refsect1>
+
+</refentry>
+</part>
Index: services/profile/gprof/current/host/Makefile.am
===================================================================
RCS file: services/profile/gprof/current/host/Makefile.am
diff -N services/profile/gprof/current/host/Makefile.am
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/host/Makefile.am	21 Mar 2005 13:33:37 -0000
@@ -0,0 +1,48 @@
+## Process this file with automake to produce Makefile.in
+## =====================================================================
+##
+##     Makefile.am
+##
+##     Build/install the host-side of the gprof support package
+##
+## =====================================================================
+######COPYRIGHTBEGIN####
+#                                                                          
+# ----------------------------------------------------------------------------
+# Copyright (C) 2003, 2005 eCosCentric Limited
+#
+# This file is part of the eCos gcov support.
+#
+# This program is free software; you can redistribute it and/or modify it 
+# under the terms of the GNU General Public License as published by the Free 
+# Software Foundation; either version 2 of the License, or (at your option) 
+# any later version.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT 
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+# more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# ----------------------------------------------------------------------------
+#                                                                          
+######COPYRIGHTEND####
+## =====================================================================
+#######DESCRIPTIONBEGIN####
+##
+## Author(s):	bartv
+## Date:	2003/09/01
+## Version:	0.01
+##
+######DESCRIPTIONEND####
+## =====================================================================
+
+AUTOMAKE_OPTIONS = 1.3 foreign
+
+## There is just a set of gdb macros
+gdbdir			= @datadir@/ecos/gdbscripts
+dist_gdb_DATA		= gprof.gdb
+
+
Index: services/profile/gprof/current/host/acinclude.m4
===================================================================
RCS file: services/profile/gprof/current/host/acinclude.m4
diff -N services/profile/gprof/current/host/acinclude.m4
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/host/acinclude.m4	21 Mar 2005 13:33:37 -0000
@@ -0,0 +1,44 @@
+dnl Process this file with aclocal to get an aclocal.m4 file. Then
+dnl process that with autoconf.
+dnl ====================================================================
+dnl
+dnl     acinclude.m4
+dnl
+dnl ====================================================================
+dnl####COPYRIGHTBEGIN####
+dnl                                                                         
+dnl ----------------------------------------------------------------------------
+dnl Copyright (C) 2003 eCosCentric Ltd
+dnl
+dnl This file is part of the eCos host tools.
+dnl
+dnl This program is free software; you can redistribute it and/or modify it 
+dnl under the terms of the GNU General Public License as published by the Free 
+dnl Software Foundation; either version 2 of the License, or (at your option) 
+dnl any later version.
+dnl 
+dnl This program is distributed in the hope that it will be useful, but WITHOUT 
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+dnl more details.
+dnl 
+dnl You should have received a copy of the GNU General Public License along with
+dnl this program; if not, write to the Free Software Foundation, Inc., 
+dnl 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+dnl ----------------------------------------------------------------------------
+dnl                                                                          
+dnl####COPYRIGHTEND####
+dnl ====================================================================
+dnl#####DESCRIPTIONBEGIN####
+dnl
+dnl Author(s):	bartv
+dnl Date:	2003/09/01
+dnl Version:	0.01
+dnl
+dnl####DESCRIPTIONEND####
+dnl ====================================================================
+
+dnl Access shared macros.
+dnl AM_CONDITIONAL needs to be mentioned here or else aclocal does not
+dnl incorporate the macro into aclocal.m4
+sinclude(../../../../../../acsupport/acinclude.m4)
Index: services/profile/gprof/current/host/configure.in
===================================================================
RCS file: services/profile/gprof/current/host/configure.in
diff -N services/profile/gprof/current/host/configure.in
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/host/configure.in	21 Mar 2005 13:33:51 -0000
@@ -0,0 +1,53 @@
+dnl Process this file with autoconf to produce a configure script.
+dnl ====================================================================
+dnl
+dnl     configure.in
+dnl
+dnl     configure script for gprof host-side support
+dnl
+dnl ====================================================================
+dnl####COPYRIGHTBEGIN####
+dnl                                                                         
+dnl ----------------------------------------------------------------------------
+dnl Copyright (C) 2003 eCosCentric Ltd
+dnl
+dnl This file is part of the eCos gcov support
+dnl
+dnl This program is free software; you can redistribute it and/or modify it 
+dnl under the terms of the GNU General Public License as published by the Free 
+dnl Software Foundation; either version 2 of the License, or (at your option) 
+dnl any later version.
+dnl 
+dnl This program is distributed in the hope that it will be useful, but WITHOUT 
+dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+dnl more details.
+dnl 
+dnl You should have received a copy of the GNU General Public License along with
+dnl this program; if not, write to the Free Software Foundation, Inc., 
+dnl 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+dnl ----------------------------------------------------------------------------
+dnl                                                                          
+dnl####COPYRIGHTEND####
+dnl ====================================================================
+dnl#####DESCRIPTIONBEGIN####
+dnl
+dnl Author(s):	bartv
+dnl Date:	2003/09/01
+dnl Version:	0.01
+dnl
+dnl####DESCRIPTIONEND####
+dnl ====================================================================
+
+
+AC_INIT(gprof.gdb)
+
+dnl Pick up the support files from the top-level acsupport directory.
+AC_CONFIG_AUX_DIR(../../../../../../acsupport)
+
+ECOS_CHECK_BUILD_ne_SRC
+AC_CANONICAL_HOST
+AM_INIT_AUTOMAKE(eCos_gprof,0.1,0)
+AM_MAINTAINER_MODE
+
+AC_OUTPUT(Makefile:Makefile.in)
Index: services/profile/gprof/current/host/gprof.gdb
===================================================================
RCS file: services/profile/gprof/current/host/gprof.gdb
diff -N services/profile/gprof/current/host/gprof.gdb
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ services/profile/gprof/current/host/gprof.gdb	21 Mar 2005 13:33:51 -0000
@@ -0,0 +1,115 @@
+# =====================================================================
+#
+#     gprof.gdb
+#
+# =====================================================================
+######ECOSHOSTGPLCOPYRIGHTBEGIN####
+# ----------------------------------------------------------------------------
+# Copyright (C) 2003 eCosCentric Ltd.
+#
+# This file is part of the eCos host tools.
+#
+# This program is free software; you can redistribute it and/or modify it 
+# under the terms of the GNU General Public License as published by the Free 
+# Software Foundation; either version 2 of the License, or (at your option) 
+# any later version.
+# 
+# This program is distributed in the hope that it will be useful, but WITHOUT 
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+# more details.
+# 
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 
+# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# ----------------------------------------------------------------------------
+######ECOSHOSTGPLCOPYRIGHTEND####
+# =====================================================================
+######DESCRIPTIONBEGIN####
+#
+# Author(s):	bartv
+# Date:		2003-09-01
+# Purpose:	extract gprof data from the target
+#
+#####DESCRIPTIONEND####
+# =====================================================================
+
+define gprof_fetch
+  if 0 == profile_hist_data
+    echo Target-side profiling has not been initialized.\n
+  else
+    if 0 != profile_reset_pending
+      echo No profiling data has accumulated since the last reset.\n
+    else
+      dump   binary memory gmon.out &profile_gmon_hdr ((char*)&profile_gmon_hdr + sizeof(struct gmon_hdr))
+      append binary memory gmon.out &profile_tags[0] &profile_tags[1]
+      append binary memory gmon.out &profile_hist_hdr ((char*)&profile_hist_hdr + sizeof(struct gmon_hist_hdr))
+      append binary memory gmon.out profile_hist_data (profile_hist_data + profile_hist_hdr.hist_size)
+      if 0 != profile_arc_records
+        if 0 != profile_arc_overflow
+          echo Warning: the table of callgraph arcs has overflowed\n
+          echo   This can be avoided by increasing CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE\n
+        end
+        # This loop is slow, but there is not much that can be done about it.
+        # The gmon.out file should contain 13 bytes per arc record. Keeping
+        # all the data in a packed array would involve non-aligned data,
+        # which would add significantly to the run-time overheads.
+        set $profile_i = 1
+        set $profile_count = profile_arc_next
+        while $profile_i != $profile_count
+          append binary memory gmon.out &(profile_arc_records[$profile_i].tags[3]) &(profile_arc_records[$profile_i + 1])
+          set $profile_i = $profile_i + 1
+        end
+      end
+    end
+  end
+end
+
+define gprof_reset
+  if 0 == profile_hist_data
+    echo Target-side profiling has not been initialized.\n
+  else
+    # An initial attempt to implement this by modifying profile_hist_data
+    # and profile_arc_hashtable was not satisfactory. gdb does not have
+    # commands to clear whole arrays, and doing it a slot at a time proved
+    # too slow. An alternative approach is to call a target-side reset
+    # function but that only works if the target is in a sensible state.
+    # The final implementation is to just set a flag and let the target
+    # sort things out.
+    set profile_reset_pending = 1
+    set profile_enabled = 0
+  end
+end
+
+define gprof_dump
+  if 0 == profile_hist_data
+    echo Target-side profiling has not been initialized.\n
+  else
+    gprof_fetch
+    gprof_reset
+  end
+end
+
+document gprof_fetch
+Extract gprof profiling data from the target and write it to a file
+gmon.out in the current directory. This macro should only be used
+with eCos configurations that contain the profiling package, and
+only after the target has called the profile_on() initialization
+routine.
+end
+
+document gprof_reset
+Reset all gprof profiling data in the target. This macro should only
+be used with eCos configurations that contain the profiling package,
+and only after the target has called the profile_on() initialization
+routine.
+end
+
+document gprof_dump
+Extract gprof profiling data from the target and write it to a file
+gmon.out in the current directory. The profiling data is then reset.
+This macro should only be used with eCos configurations that contain
+the profiling package, and only after the target has called the
+profile_on() initialization routine.
+end
Index: services/profile/gprof/current/include/gmon_out.h
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/include/gmon_out.h,v
retrieving revision 1.2
diff -u -r1.2 gmon_out.h
--- services/profile/gprof/current/include/gmon_out.h	15 Nov 2002 18:36:28 -0000	1.2
+++ services/profile/gprof/current/include/gmon_out.h	21 Mar 2005 13:33:51 -0000
@@ -14,6 +14,8 @@
 #ifndef gmon_out_h
 #define gmon_out_h
 
+#include <cyg/infra/cyg_type.h>
+
 #define	GMON_MAGIC	"gmon"	/* magic cookie */
 #define GMON_VERSION	1	/* version number */
 
@@ -21,34 +23,57 @@
  * Raw header as it appears on file (without padding):
  */
 struct gmon_hdr
-  {
+{
     char cookie[4];
-    char version[4];
+    char version[4];    // a cyg_uint32, target-side endianness
     char spare[3 * 4];
-  };
+};
 
 /* types of records in this file: */
 typedef enum
-  {
+{
     GMON_TAG_TIME_HIST = 0, GMON_TAG_CG_ARC = 1, GMON_TAG_BB_COUNT = 2
-  }
+}
 GMON_Record_Tag;
 
+/* The histogram tag is followed by this header, and then an array of       */
+/* cyg_uint16's for the actual counts.                                      */
+
 struct gmon_hist_hdr
-  {
-    char low_pc[sizeof (char*)];	/* base pc address of sample buffer */
-    char high_pc[sizeof (char*)];	/* max pc address of sampled buffer */
-    char hist_size[4];			/* size of sample buffer */
-    char prof_rate[4];			/* profiling clock rate */
-    char dimen[15];			/* phys. dim., usually "seconds" */
-    char dimen_abbrev;			/* usually 's' for "seconds" */
-  };
+{
+    /* host-side gprof adapts to sizeof(void*) and endianness.              */
+    /* It is assumed that the compiler does not insert padding around the   */
+    /* cyg_uint32's or the char arrays.                                     */
+    void*       low_pc;             /* base pc address of sample buffer     */
+    void*       high_pc;            /* max pc address of sampled buffer     */
+    cyg_uint32  hist_size;          /* size of sample buffer                */
+    cyg_uint32  prof_rate;          /* profiling clock rate                 */
+    char        dimen[15];			/* phys. dim., usually "seconds"        */
+    char        dimen_abbrev;		/* usually 's' for "seconds"            */
+};
+
+/* An arc tag is followed by a single arc record. self_pc corresponds to    */
+/* the location of an mcount() call, at the start of a function. from_pc    */
+/* corresponds to the return address, i.e. where the function was called    */
+/* from. count is the number of calls.                                      */
 
 struct gmon_cg_arc_record
-  {
-    char from_pc[sizeof (char*)];	/* address within caller's body */
-    char self_pc[sizeof (char*)];	/* address within callee's body */
-    char count[4];			/* number of arc traversals */
-  };
+{
+    void*       from_pc;            /* address within caller's body         */
+    void*       self_pc;        	/* address within callee's body         */
+    cyg_uint32  count;              /* number of arc traversals             */
+};
+
+/* In theory gprof can also process basic block counts, as per the          */
+/* compiler's -fprofile-arcs flag. The compiler-generated basic block       */
+/* structure should contain a table of addresses and a table of counts,     */
+/* and the compiled code updates those counts. Current versions of the      */
+/* compiler (~3.2.1) do not output the table of addresses, and without      */
+/* that table gprof cannot process the counts. Possibly gprof should read   */
+/* in the .bb and .bbg files generated for gcov processing, but that does   */
+/* not happen at the moment.                                                */
+/*                                                                          */
+/* So for now gmon.out does not contain basic block counts and gprof        */
+/* operations that depend on it, e.g. --annotated-source, won't work.       */
 
 #endif /* gmon_out_h */
Index: services/profile/gprof/current/include/profile.h
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/include/profile.h,v
retrieving revision 1.2
diff -u -r1.2 profile.h
--- services/profile/gprof/current/include/profile.h	15 Nov 2002 14:32:33 -0000	1.2
+++ services/profile/gprof/current/include/profile.h	21 Mar 2005 13:33:51 -0000
@@ -11,6 +11,7 @@
 //####ECOSGPLCOPYRIGHTBEGIN####
 // -------------------------------------------
 // This file is part of eCos, the Embedded Configurable Operating System.
+// Copyright (C) 2003 eCosCentric Limited
 // Copyright (C) 2002 Gary Thomas
 //
 // eCos is free software; you can redistribute it and/or modify it under
@@ -44,7 +45,7 @@
 //#####DESCRIPTIONBEGIN####
 //
 // Author(s):    gthomas
-// Contributors: 
+// Contributors: bartv
 // Date:         2002-11-14
 // Purpose:      Define profiling support
 // Description:  
@@ -65,9 +66,12 @@
                           int bucket_size, int sample_resolution);
 
 // Callback used by timer routine
-__externC void __profile_hit(unsigned long pc);
+__externC void __profile_hit(CYG_ADDRWORD pc);
+
+// Callgraph support
+__externC void __profile_mcount(CYG_ADDRWORD /* caller_pc */, CYG_ADDRWORD /* callee_pc */);
 
 // Timer setup routine, used when enabling profiling
-__externC void hal_enable_profile_timer(int resolution);
+__externC int hal_enable_profile_timer(int resolution);
 
 #endif // CYGONCE_PROFILE_H
Index: services/profile/gprof/current/src/profile.c
===================================================================
RCS file: /cvs/ecos/ecos/packages/services/profile/gprof/current/src/profile.c,v
retrieving revision 1.4
diff -u -r1.4 profile.c
--- services/profile/gprof/current/src/profile.c	27 Apr 2003 21:55:13 -0000	1.4
+++ services/profile/gprof/current/src/profile.c	21 Mar 2005 13:33:53 -0000
@@ -8,6 +8,7 @@
 //####ECOSGPLCOPYRIGHTBEGIN####
 // -------------------------------------------
 // This file is part of eCos, the Embedded Configurable Operating System.
+// Copyright (C) 2003 eCosCentric Ltd.
 // Copyright (C) 2002 Gary Thomas
 //
 // eCos is free software; you can redistribute it and/or modify it under
@@ -41,7 +42,7 @@
 //#####DESCRIPTIONBEGIN####
 //
 // Author(s):    Gary Thomas
-// Contributors: 
+// Contributors: Bart Veer
 // Date:         2002-11-14
 // Purpose:      Application profiling support
 // Description:  
@@ -49,198 +50,498 @@
 //####DESCRIPTIONEND####
 //
 //===========================================================================
+
+#include <pkgconf/system.h>
 #include <pkgconf/profile_gprof.h>
 
 #include <stdlib.h>
 #include <string.h>
+#include <cyg/infra/cyg_type.h>
 #include <cyg/infra/diag.h>
-#include <network.h>
-#include <tftp_support.h>
 #include <cyg/profile/profile.h>
 #include <cyg/profile/gmon_out.h>
 
-static int num_buckets;
-static unsigned short *profile;
-static int bucket_size, bucket_shift;
-static unsigned long start_addr, end_addr;
-static int enabled;
-static int tftp_server_id;
-static int prof_rate;
-
-static int profile_open(const char *, int);
-static int profile_close(int);
-static int profile_write(int, const void *, int);
-static int profile_read(int, void *, int);
-
-struct tftpd_fileops profile_fileops = {
-    profile_open, profile_close, profile_write, profile_read
+#ifdef CYGPKG_PROFILE_TFTP
+# include <network.h>
+# include <tftp_support.h>
+#endif
+
+// ----------------------------------------------------------------------------
+// A gmon.out file starts with a struct gmon_hdr containing a cookie
+// "gmon", a format version number, and some spare bytes. The structure
+// is initialized by the profile_on() entry point so that it does not
+// get garbage collected by the collector and hence a gdb script can
+// always access it.
+static struct gmon_hdr  profile_gmon_hdr;
+
+// The header is followed by data blocks. Each data block consists of a
+// one-byte tag (HIST, ARC, or BB_COUNT), followed by data in a specific
+// format.
+static unsigned char    profile_tags[3];
+
+// The profiling data always contains histogram data. Typically an
+// extra hardware timer is made to interrupt at the desired rate
+// and stores the interrupted pc.
+static struct gmon_hist_hdr    profile_hist_hdr;
+
+// The actual histogram counts. The file format only allows for 16-bit
+// counts, which means overflow is a real possibility.
+static cyg_uint16*      profile_hist_data;
+
+// Each slot in the histogram data covers a range of pc addresses,
+// allowing a trade off between memory requirements and precision.
+static int              bucket_shift;
+
+// Profiling is disabled on start-up and while a tftp transfer takes place.
+static int              profile_enabled;
+
+// This is used by the gdb script to reset the profile data.
+static int              profile_reset_pending;
+
+// The callgraph data. There is no header for this. Instead each non-zero
+// entry is output separately, prefixed by an ARC tag. The data is accessed
+// via a hash table/linked list combination. The tag is part of the
+// structure to reduce the number of I/O operations needed for writing
+// gmon.out.
+struct profile_arc {
+    cyg_uint32                  next;
+    unsigned char               tags[4];
+    struct gmon_cg_arc_record   record;
 };
 
-struct _file {
-    unsigned char      *pos, *eof, *data;
-    int                 flags;
-    int                 mode;
-};
-#define FILE_OPEN 0x0001
+static struct profile_arc*  profile_arc_records;
 
-#define NUM_FILES 1
-static struct _file files[NUM_FILES];
+// The next free slot in the arc_records table.
+static int                  profile_arc_next    = 1;
 
-static inline struct _file *
-profile_fp(int fd)
+#ifdef CYGPKG_PROFILE_CALLGRAPH
+// The callgraph is accessed via a hash table. The hashing function is
+// trivial, it just involves shifting an address an appropriate number
+// of places.
+static int*         profile_arc_hashtable;
+
+// The sizes of these tables
+static int          profile_arc_hash_count;
+static int          profile_arc_records_count;
+
+// Is the hashtable too small? Used for diagnostics.
+static int          profile_arc_overflow;
+#endif
+
+// Reset current profiling data.
+static void
+profile_reset(void)
 {
-    struct _file *fp;
-    if ((fd < 0) || (fd >= NUM_FILES)) return (struct _file *)0;
-    fp = &files[fd];
-    if (!(fp->flags & FILE_OPEN)) return (struct _file *)0;
-    return fp;
+    memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
+
+#ifdef CYGPKG_PROFILE_CALLGRAPH
+    // Zeroing the callgraph can be achieved by zeroing the hash
+    // table and resetting the next field used for indexing into
+    // the arc data itself. Whenever an arc data slot is allocated
+    // the count and addresses are reset.
+    memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
+    profile_arc_next     = 1;
+    profile_arc_overflow = 0;
+#endif
 }
 
+// ----------------------------------------------------------------------------
+// Accumulate profiling data.
+
+// __profile_hit() will be called by HAL-specific code, typically in an ISR
+// associated with a timer.
+
+void
+__profile_hit(CYG_ADDRWORD pc)
+{
+    int bucket;
+    if (! profile_enabled ) {
+        if (! profile_reset_pending) {
+            return;
+        }
+        // reset_pending can be set by the gdb script to request resetting
+        // the data. It avoids having to do lots of memory updates via the
+        // gdb protocol, which is too slow.
+        profile_reset_pending   = 0;
+        profile_reset();
+        profile_enabled         = 1;
+    }
+    
+    if ((pc >= (CYG_ADDRWORD)profile_hist_hdr.low_pc) && (pc <= (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
+        bucket = (pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> bucket_shift;
+        if (profile_hist_data[bucket] < (unsigned short)0xFFFF) {
+            profile_hist_data[bucket]++;
+        }
+    }
+}
+
+#ifdef CYGPKG_PROFILE_CALLGRAPH
+// __profile_mcount() will be called by the HAL-specific mcount() routine.
+// When code is compiled with -pg the compiler inserts calls to mcount()
+// at the start of each function. Typically mcount() will not use standard
+// calling conventions so it has to be provided by the HAL.
+//
+// The from_pc/to_pc data should end up in profile_arc_records. A hash table
+// maps a PC into a list chained through the records array. The hash function
+// is a simple shift, so a range of PC addresses (usually 256 bytes) map
+// onto a single linked list of arc records.
+//
+// We can hash on either the caller_pc, the callee_pc, or some combination.
+// The caller PC will typically be in the middle of some function. The
+// number of arcs that hash into the same list will depend on the number of
+// function calls within a 256-byte region of code, multiplied by the
+// number of different functions called at each location. The latter will
+// be 1 unless the code uses changing function pointers. The callee pc
+// is near the start of a function, and the number of hash collisions will
+// depend on the number of places that function is called from. Usually this
+// will be small, but some utility functions may be called from many different
+// places.
+//
+// Hashing on the caller PC should give more deterministic results.
+//
+// On some targets the compiler does additional work. For example on
+// the 68K in theory there is no need for a hash table because the
+// compiler provides a word with each callee for the head of the
+// linked list. It is not easy to cope with that in generic code, so
+// for now this code ignores such compiler assistance.
+//
+// It is assumed that __profile_mcount() will be called with interrupts
+// disabled. 
+
+void
+__profile_mcount(CYG_ADDRWORD caller_pc, CYG_ADDRWORD callee_pc)
+{
+    int                 hash_index;
+    struct profile_arc* current;
+
+    // mcount() may be called at any time, even before profile_arc_records
+    // is enabled. There is an assumption here that .bss has been zeroed
+    // before the first call into C code, i.e. by the initial assembler
+    // start-up.
+    if (!profile_enabled) {
+        if (! profile_reset_pending) {
+            return;
+        }
+        profile_reset_pending   = 0;
+        profile_reset();
+        profile_enabled         = 1;
+    }
+
+    // Check the caller_pc because that is what is used to index the
+    // hash table. Checking the callee_pc is optional and depends on
+    // exactly how you interpret the start and end addresses passed to
+    // profile_on().
+    if ((caller_pc < (CYG_ADDRWORD)profile_hist_hdr.low_pc) ||
+        (caller_pc > (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
+        return;
+    }
+
+    hash_index = (int) ((caller_pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
+    if (0 == profile_arc_hashtable[hash_index]) {
+        if (profile_arc_next == profile_arc_records_count) {
+            profile_arc_overflow = 1;
+        } else {
+            profile_arc_hashtable[hash_index]   = profile_arc_next;
+            current = &(profile_arc_records[profile_arc_next]);
+            profile_arc_next++;
+            current->next   = 0;
+            current->record.from_pc = (void*) caller_pc;
+            current->record.self_pc = (void*) callee_pc;
+            current->record.count   = 1;
+        }
+    } else {
+        current = &(profile_arc_records[profile_arc_hashtable[hash_index]]);
+        while (1) {
+            if ((current->record.from_pc == (void*) caller_pc) && (current->record.self_pc == (void*) callee_pc)) {
+                current->record.count++;
+                break;
+            } else if (0 == current->next) {
+                if (profile_arc_next == profile_arc_records_count) {
+                    profile_arc_overflow = 1;
+                } else {
+                    current->next   = profile_arc_next;
+                    current         = &(profile_arc_records[profile_arc_next]);
+                    profile_arc_next++;
+                    current->next   = 0;
+                    current->record.from_pc = (void*) caller_pc;
+                    current->record.self_pc = (void*) callee_pc;
+                    current->record.count   = 1;
+                }
+                break;
+            } else {
+                current = &(profile_arc_records[current->next]);
+            }
+        }
+    }
+}
+#endif
+
+#ifdef CYGPKG_PROFILE_TFTP
+// ----------------------------------------------------------------------------
+// TFTP support
+//
+// To keep things simple this code only supports one open file at a time,
+// and only gmon.out is supported.
+
+static int              profile_tftp_next_index     = 0;
+static unsigned char*   profile_tftp_current_block  = (unsigned char*) 0;
+static int              profile_tftp_current_len    = 0;
+static int              profile_tftp_is_open        = 0;
+
 static int
-profile_open(const char *fn, int flags)
+profile_tftp_open(const char *filename, int flags)
 {
-    int fd = 0;
-    struct _file *fp;
-    int len = sizeof(struct gmon_hdr) + sizeof(struct gmon_hist_hdr) + (num_buckets*2) + 1;
-    struct gmon_hdr _hdr;
-    struct gmon_hist_hdr _hist_hdr;
-    unsigned char *bp;
-    int version = GMON_VERSION;
-    int hist_size = num_buckets;
-    int file_type = -1;
-
-    fp = files;
-    if (fp->flags & FILE_OPEN) {
-        // File already open
+    // Only allow one open file for now.
+    if (profile_tftp_is_open) {
         return -1;
     }
-    if (!(flags & O_RDONLY)) {
-        // Only read supported
+    // Only read-only access is supported.
+    if ((0 != (flags & ~O_RDONLY)) || (0 == (flags & O_RDONLY))) {
         return -1;
     }
-    if (strcmp(fn, "PROFILE.DAT") == 0) {
-        file_type = 0;
-    }
-    if (file_type < 0) {
-        // No supported file
+    // Only gmon.out can be retrieved using this tftp daemon
+    if (0 != strcmp(filename, "gmon.out")) {
         return -1;
     }
-    // Set up file info
-    enabled = 0;
-    switch (file_type) { // In case another type ever supported
-    case 0: // profile data
-        fp->data = malloc(len);
-        if (!fp->data) {
-            diag_printf("Can't allocate buffer for profile data!\n");
-            return -1;
-        }
-        fp->flags = FILE_OPEN;
-        fp->mode = flags;
-        fp->pos = fp->data;
-        fp->eof = fp->pos + len;
-        // Fill in profile data
-        bp = fp->data;
-        memset(&_hdr, 0, sizeof(_hdr));
-        strcpy(_hdr.cookie, GMON_MAGIC);
-        memcpy(_hdr.version, &version, sizeof(version));
-        memcpy(bp, &_hdr, sizeof(_hdr));
-        bp += sizeof(_hdr);
-        memcpy(&_hist_hdr.low_pc, &start_addr, sizeof(start_addr));
-        memcpy(&_hist_hdr.high_pc, &end_addr, sizeof(end_addr));    
-        memcpy(&_hist_hdr.hist_size, &hist_size, sizeof(hist_size));    
-        memcpy(&_hist_hdr.prof_rate, &prof_rate, sizeof(prof_rate));    
-        strcpy(_hist_hdr.dimen, "seconds");
-        _hist_hdr.dimen_abbrev = 's';
-        *bp++ = GMON_TAG_TIME_HIST;
-        memcpy(bp, &_hist_hdr, sizeof(_hist_hdr));
-        bp += sizeof(_hist_hdr);
-        memcpy(bp, profile, (num_buckets*2));
-        memset(profile, 0, (num_buckets*2));
-        break;    
+    // Everything is in order. Prepare for the first read. Profiling
+    // is suspended while the tftp transfer is in progress to avoid
+    // inconsistent results.
+    profile_enabled             = 0;
+    profile_tftp_is_open        = 1;
+    profile_tftp_next_index     = 0;
+    profile_tftp_current_len    = 0;
+
+    // Report any callgraph overflows. This is best done when retrieving
+    // the results, either in the gdb script or at tftp open time.
+#ifdef CYGPKG_PROFILE_CALLGRAPH
+    if (profile_arc_overflow) {
+        diag_printf("Profiling: warning, the table of callgraph arcs has overflowed\n");
+        diag_printf("This can be avoided by increasing CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE\n");
     }
-    return fd;
+#endif
+    
+    return 1;
 }
 
 static int
-profile_close(int fd)
+profile_tftp_close(int fd)
 {
-    struct _file *fp = profile_fp(fd);
-    if (!fp) return -1;
-    if (fp->data) free(fp->data);
-    fp->flags = 0;  // No longer open
-    enabled = 1;
+    if (! profile_tftp_is_open) {
+        return -1;
+    }
+    profile_tftp_is_open = 0;
+
+    // The histogram counters are only 16 bits, so can easily overflow
+    // during a long run. Resetting the counters here makes it possible
+    // to examine profile data during different parts of the run with
+    // a reduced risk of overflow.
+    profile_reset();
+
+    // Profiling was disabled in the open() call
+    profile_enabled     = 1;
     return 0;
 }
 
+// gmon.out can only be read, not written.
 static int 
-profile_write(int fd, const void *buf, int len)
+profile_tftp_write(int fd, const void *buf, int len)
 {
     return -1;
 }
 
-static int
-profile_read(int fd, void *buf, int len)
+// The data that should go into gmon.out is spread all over memory.
+// This utility is used to move from one block to the next.
+static void
+profile_tftp_read_next(void)
 {
-    struct _file *fp = profile_fp(fd);
-    int res;
-    if (!fp) return -1;
-    res = fp->eof - fp->pos;  // Number of bytes left in "file"
-    if (res > len) res = len;
-    if (res <= 0) return 0;  // End of file
-    bcopy(fp->pos, buf, res);
-    fp->pos += res;
-    return res;
+    switch (profile_tftp_next_index) {
+      case 0 :      // The current block is the gmon hdr
+        profile_tftp_current_block  = (unsigned char*) &profile_gmon_hdr;
+        profile_tftp_current_len    = sizeof(struct gmon_hdr);
+        break;
+      case 1 :      // The histogram tag
+        profile_tftp_current_block  = &(profile_tags[0]);
+        profile_tftp_current_len    = 1;
+        break;
+      case 2 :      // The histogram header
+        profile_tftp_current_block  = (unsigned char*) &profile_hist_hdr;
+        profile_tftp_current_len    = sizeof(struct gmon_hist_hdr);
+        break;
+      case 3 :      // The histogram data
+        profile_tftp_current_block  = (unsigned char*) profile_hist_data;
+        profile_tftp_current_len    = profile_hist_hdr.hist_size * sizeof(cyg_uint16);
+        break;
+      default :     // One of the arc records. These start at array offset 1.
+        {
+            int arc_index    = profile_tftp_next_index - 3;
+            if (arc_index >= profile_arc_next) {
+                profile_tftp_current_block  = (unsigned char*) 0;
+                profile_tftp_current_len    = 0;
+            } else {
+                // gmon.out should contain a 1 byte tag followed by each
+                // arc record.
+                profile_tftp_current_block  = (unsigned char*) &(profile_arc_records[arc_index].tags[3]);
+                profile_tftp_current_len    = sizeof(struct gmon_cg_arc_record) + 1;
+            }
+            break;
+        }
+    }
+    profile_tftp_next_index++;
 }
-
-void
-__profile_hit(unsigned long pc)
+    
+// Read the next block of data. There is no seek operation so no need
+// to worry about the current position. State from the previous reads
+// is held in profile_tftp_current_block and profile_tftp_current_len
+static int
+profile_tftp_read(int fd, void *buf_arg, int len)
 {
-    int bucket;
-    if (enabled) {
-        if ((pc >= start_addr) && (pc <= end_addr)) {
-            bucket = (pc - start_addr) >> bucket_shift;
-            if (profile[bucket] < (unsigned short)0xFFFF) {
-                profile[bucket]++;
+    unsigned char*  buf     = (unsigned char*) buf_arg;
+    int             read    = 0;
+    
+    if ( ! profile_tftp_is_open ) {
+        return -1;
+    }
+
+    while (len > 0) {
+        if (0 == profile_tftp_current_len) {
+            profile_tftp_read_next();
+            if (0 == profile_tftp_current_len) {
+                break;
             }
         }
+        if (profile_tftp_current_len >= len) {
+            // The request can be satisfied by the current block
+            memcpy(&(buf[read]), profile_tftp_current_block, len);
+            profile_tftp_current_block += len;
+            profile_tftp_current_len   -= len;
+            read += len;
+            break;
+        } else {
+            memcpy(&(buf[read]), profile_tftp_current_block, profile_tftp_current_len);
+            len  -= profile_tftp_current_len;
+            read += profile_tftp_current_len;
+            profile_tftp_current_len = 0;
+        }
     }
+    return read;
 }
 
+static struct tftpd_fileops profile_tftp_fileops = {
+    &profile_tftp_open,
+    &profile_tftp_close,
+    &profile_tftp_write,
+    &profile_tftp_read
+};
+#endif
+
+// ----------------------------------------------------------------------------
+// profile_on() has to be called by application code to start profiling.
+// Application code will determine the start and end addresses, usually
+// _stext and _etext, but it is possible to limit profiling to only
+// some of the code. The bucket size controls how many PC addresses
+// will be treated as a single hit: a smaller bucket increases precision
+// but requires more memory. The resolution is used to initialize the
+// profiling timer: more frequent interrupts means more accurate results
+// but increases the risk of an overflow.
+
 void 
 profile_on(void *_start, void *_end, int _bucket_size, int resolution)
-{    
-    start_addr = (unsigned long)_start;
-    end_addr = (unsigned long)_end;
-    // Adjust bucket size to be a power of 2
-    bucket_size = 1;
-    bucket_shift = 0;
+{
+    int             bucket_size;
+    cyg_uint32      version     = GMON_VERSION;
+    CYG_ADDRWORD    text_size   = (CYG_ADDRWORD)_end - (CYG_ADDRWORD)_start;
+
+    // Initialize statics. This also ensures that they won't be
+    // garbage collected by the linker so a gdb script can safely
+    // reference them.
+    memcpy(profile_gmon_hdr.cookie, GMON_MAGIC, 4);
+    memcpy(profile_gmon_hdr.version, &version, 4);
+    profile_tags[0] = GMON_TAG_TIME_HIST;
+    profile_tags[1] = GMON_TAG_CG_ARC;
+    profile_tags[2] = GMON_TAG_BB_COUNT;
+    strcpy(profile_hist_hdr.dimen, "seconds");
+    profile_hist_hdr.dimen_abbrev   = 's';
+
+    // The actual bucket size. For efficiency this should be a power of 2.
+    bucket_size             = 1;
+    bucket_shift            = 0;
     while (bucket_size < _bucket_size) {
-        bucket_size <<= 1;
-        bucket_shift++;
+        bucket_size     <<= 1;
+        bucket_shift    += 1;
     }
-    if (bucket_size != _bucket_size) {
-        bucket_size <<= 1;
-        bucket_shift++;
-    }
-    // Calculate number of buckets
-    num_buckets = ((end_addr - start_addr) + (bucket_size - 1)) / bucket_size;
-    // Adjust end address so calculations come out
-    end_addr = start_addr + (bucket_size * num_buckets);
-    prof_rate = 1000000 / resolution;
-    // Allocate buffer for profile data
-    if (!(profile = malloc(num_buckets*2))) {
-        diag_printf("Can't allocate profile buffer - ignored\n");
+
+    // The gprof documentation claims that this should be the size in
+    // bytes. The implementation treats it as a count.
+    profile_hist_hdr.hist_size  = (cyg_uint32) ((text_size + bucket_size - 1) / bucket_size);
+    profile_hist_hdr.low_pc     = _start;
+    profile_hist_hdr.high_pc    = (void*)((cyg_uint8*)_end - 1);
+    // The prof_rate is the frequency in hz. The resolution argument is
+    // an interval in microseconds.
+    profile_hist_hdr.prof_rate  = 1000000 / resolution;
+        
+    // Now allocate a buffer for the histogram data.
+    profile_hist_data = (cyg_uint16*) malloc(profile_hist_hdr.hist_size * sizeof(cyg_uint16));
+    if ((cyg_uint16*)0 == profile_hist_data) {
+        diag_printf("profile_on(): cannot allocate histogram buffer - ignored\n");
         return;
     }
-    memset(profile, 0, (num_buckets*2));
-    enabled = 1;
-    diag_printf("Profile from %p..%p[%p], in %d buckets of size %d\n", 
-                start_addr, end_addr, _end, num_buckets, bucket_size);
+    memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
+
+#ifdef CYGPKG_PROFILE_CALLGRAPH
+    // Two arrays are needed for keeping track of the callgraph. The
+    // first is a hash table. The second holds the arc data. The
+    // latter array contains an extra 50 slots to cope with degenerate
+    // programs (including testcases).
+    {
+        int i;
+        
+        profile_arc_hash_count  = (int) ((text_size + (0x01 << CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT) - 1)
+                                         >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
+        profile_arc_records_count = (int)
+            (CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE * (text_size / 100)) /
+            sizeof(struct profile_arc)
+            + 50;
+
+        profile_arc_hashtable = (int*) malloc(profile_arc_hash_count * sizeof(int));
+        if ((int*)0 == profile_arc_hashtable) {
+            diag_printf("profile_on(): cannot allocate call graph hash table\n  call graph profiling disabled\n");
+        } else {
+            memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
+            profile_arc_records = (struct profile_arc*) malloc(profile_arc_records_count * sizeof(struct profile_arc));
+            if ((struct profile_arc*)0 == profile_arc_records) {
+                diag_printf("profile_on(): cannot allocate call graph arc table\n  call graph profiling disabled\n");
+                free(profile_arc_hashtable);
+                profile_arc_hashtable = (int*) 0;
+            } else {
+                memset(profile_arc_records, 0, profile_arc_records_count * sizeof(struct profile_arc));
+                for (i = 0; i < profile_arc_records_count; i++) {
+                    profile_arc_records[i].tags[3] = GMON_TAG_CG_ARC;
+                }
+                profile_arc_next    = 1;    // slot 0 cannot be used because 0 marks an unused hash slot.
+            }
+        }
+    }
+#else
+    profile_arc_records     = (struct profile_arc*) 0;
+#endif
+    
+    diag_printf("Profile from %p..%p in %d buckets of size %d\n",
+                profile_hist_hdr.low_pc, profile_hist_hdr.high_pc,
+                profile_hist_hdr.hist_size, bucket_size);
+
+    // Activate the profiling timer, which is usually provided by the
+    // variant or target HAL. The requested resolution may not be
+    // possible on the current hardware, so the HAL is allowed to
+    // tweak it.
+    resolution = hal_enable_profile_timer(resolution);
+    profile_hist_hdr.prof_rate = 1000000 / resolution;
 
-    hal_enable_profile_timer(resolution);
+    profile_enabled = 1;
 
+#ifdef CYGPKG_PROFILE_TFTP    
     // Create a TFTP server to provide the data
-    tftp_server_id = tftpd_start(CYGNUM_PROFILE_TFTP_PORT, &profile_fileops);
+    (void) tftpd_start(CYGNUM_PROFILE_TFTP_PORT, &profile_tftp_fileops);
+#endif    
 }
 
 // EOF profile.c



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]