This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Simplistic profiling with the runtime
- From: "Spirakis, Charles" <charles dot spirakis at intel dot com>
- To: <systemtap at sources dot redhat dot com>
- Date: Wed, 24 Aug 2005 15:33:08 -0700
- Subject: Simplistic profiling with the runtime
All --
Enclosed is a simplistic example of using the runtime to generate time
based profiling data. In theory, the code will work on an SMP system
(taking interrupts on all CPU's), but I'm on a Pentium M laptop, so I
never tried that...
The system was running multiple windows, during the test run, one window
had a "spinloop" app was running to make sure the cpu was never idle
while I ran some other programs on the various other windows. Source is
below along with the corresponding output.
In dtrace, there is a distinction made between code to be run via the OS
timer (one interrupt on a random CPU) vs. code to be run via the profile
timer (one interrupt on all CPUs). (See
http://docs.sun.com/app/docs/doc/817-6223/6mlkidljh?a=view).
Has any discussion happen on what we want the systemtap translator
syntax to look like in this regard? I'd be interested in taking a stab
at what it would take to get the translator to handle the os timer
version of the interrupt if this hasn't already been done.
-- charles
[root@cspiraki-lnote profile_timer]# cat profile_timer.c
/* Use 2.6 profile timer (register_timer_hook) as an smp capable timer
*/
/* Demonstrates the beginnings of a generic framework for */
/* asynchronous probes using the runtime */
/* os includes */
#include "linux/profile.h"
/* How many strings to allocate. see strings.c. Default is 0. */
#define STP_NUM_STRINGS 1
/* maximum size for a string. default is 2048 */
#define STP_STRING_SIZE 2048
/* size of strings saved in maps */
#define MAP_STRING_LENGTH 256
/* width of histograms. Default 50 */
#define HIST_WIDTH 50
/* always include this. Put all non-map defines above it. */
#include "runtime.h"
/* @todo since we don't have aggregation maps yet, try regular maps */
#define NEED_INT64_VALS
#define KEY1_TYPE STRING
#define KEY2_TYPE INT64
#define KEY3_TYPE STRING
#define KEY4_TYPE INT64
#include "map-keys.c"
#include "map.c"
#include "stat.c"
#include "stack.c"
MODULE_DESCRIPTION("SystemTap probe: profile_timer");
MODULE_AUTHOR("Charles Spirakis <charles.spirakis@intel.com>");
Stat timing;
MAP cur_addr;
/* Would probably be better to have the kernel code do a lookup_dcookie
(ala */
/* oprofile) and save the dcookie here and then have user mode code */
/* dereference it (also, ala oprofile). But that would require changes
to */
/* the runtime (possibly a new type? dcookie?) */
/* for the moment, just do it all here as an experiment */
char *get_image_path(struct mm_struct *mm, unsigned long addr, off_t
*offset, char *full_path, u32 len)
{
struct vm_area_struct *vma;
char *ret_val;
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
if ((addr < vma->vm_start) || (addr >= vma->vm_end)) {
continue;
}
if (vma->vm_file) {
*offset = (vma->vm_pgoff << PAGE_SHIFT) + addr -
vma->vm_start;
ret_val = d_path(vma->vm_file->f_dentry,
vma->vm_file->f_vfsmnt,
full_path,
len);
#if 0
printk(KERN_INFO "d_path: %p, %p, <%s>\n",
ret_val,
full_path,
ret_val);
#endif
if (IS_ERR(ret_val)) {
ret_val = NULL;
}
}
else {
*offset = addr;
ret_val = NULL;
}
break;
}
return ret_val;
}
/* A generic asynchorous probe entry point */
void inst_async(struct pt_regs *regs)
{
u64 start;
u64 end;
unsigned long ip;
char *path;
off_t offset;
/* This is smp safe, but the stack alloc's on each interrupt takes
time */
/* ah well, just a prototype, worry about a better way to do it
later */
char full_path[512];
rdtscll(start);
ip = REG_IP(regs);
if (user_mode(regs)) {
path = get_image_path(current->mm, ip, &offset, full_path,
sizeof(full_path));
if (path) {
_stp_map_key_str_int64_str_int64(cur_addr, current->comm,
ip, path, offset);
}
else {
_stp_map_key_str_int64_str_int64(cur_addr, current->comm,
ip, "unknown", offset);
}
}
else {
_stp_map_key_str_int64_str_int64(cur_addr, current->comm, ip,
"vmlinux", ip);
}
/* Create a map of interrupted addresses seen */
/* This is where a "write-only" mapping concept would be useful */
/* (as is done in dtrace) since the only readers of this data are */
/* going to be in user-mode doing post-processing. It would allow */
/* the runtime to do final aggregation in user space and allow */
/* us to avoid defining the size of the map ahead of time */
/* which is required now */
_stp_map_add_int64(cur_addr, 1);
/* We will want _stp_stack() and _stp_ustack() to work here */
/* at some point in the future... */
rdtscll(end);
_stp_stat_add(timing, end - start);
}
/* Helper function to convert from profile timer callback into */
/* generic asynchronous entry point form */
/* In this case, there is nothing to do and according to Frank, it */
/* isn't needed. But still here because that was they way I initially */
/* did things... It is only a sample/prototype, after all... */
static void timer_callback(struct pt_regs *regs)
{
/* Call the asynchronous probe with a ptregs struct */
inst_async(regs);
}
/* called when the module loads. */
int probe_start(void)
{
int ret;
timing = _stp_stat_init(HIST_LINEAR, 0, 5000, 250);
cur_addr = _stp_map_new_str_int64_str_int64(10000, INT64);
/* register the profile timer */
ret = register_timer_hook(timer_callback);
if (!ret) {
printk(KERN_ERR "couldn't hook timer callback\n");
}
return ret;
}
void probe_exit (void)
{
/* unregister the os_timer */
unregister_timer_hook(timer_callback);
/* print out any colledted data, etc */
_stp_stat_print (timing, "timing (cpu cycles): # calls:%C avg:%A
min:%m max:%M\n%H", 0);
_stp_print("Process\tIp\tImage\tOffset\tCount\n");
_stp_map_print (cur_addr, "%1s\t%2P\t%3s\t%4p\t%d");
_stp_map_del(cur_addr);
_stp_print_flush();
}
[root@cspiraki-lnote profile_timer]# time stpd profile_timer.ko > pt.out
real 0m7.134s
user 0m0.005s
sys 0m0.014s
[root@cspiraki-lnote profile_timer]# wc pt.out
209 1073 11804 pt.out
[root@cspiraki-lnote profile_timer]# cat pt.out
timing (cpu cycles): # calls:5949 avg:1272 min:448 max:12500
value |-------------------------------------------------- count
0 | 0
250 | 1
500 | 0
750 |@@@@@@@@@@@@@@@@ 1068
1000 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 3240
1250 |@@@@@@@@@@@@@@@@ 1073
1500 |@@ 177
1750 |@ 92
2000 |@ 71
2250 | 42
2500 | 17
2750 | 17
3000 | 15
3250 | 6
3500 | 10
3750 | 8
4000 | 7
4250 | 10
4500 | 9
4750 |@ 86
Process Ip Image Offset Count
stpd 0xc011beda : __wake_up+0x74/0x14d vmlinux c011beda
1
udev 0xc015c363 : kmem_cache_alloc+0x29/0x49 vmlinux c015c363
1
hald 0x80588bb /usr/sbin/hald 000118bb 1
default.hotplug 0x80993f4 /bin/bash 000523f4 1
default.hotplug 0x49e2a9 /lib/libc-2.3.5.so 000632a9
1
default.hotplug 0x805fcfc /bin/bash 00018cfc 1
default.hotplug 0x8084dd5 /bin/bash 0003ddd5 1
spinloop 0x804849d /home/cspiraki/sep_testing/spinloop
0000049d 709
spinloop 0x804848a /home/cspiraki/sep_testing/spinloop
0000048a 308
spinloop 0x8048496 /home/cspiraki/sep_testing/spinloop
00000496 214
spinloop 0x80484ba /home/cspiraki/sep_testing/spinloop
000004ba 304
spinloop 0x80484cd /home/cspiraki/sep_testing/spinloop
000004cd 765
spinloop 0x80484fd /home/cspiraki/sep_testing/spinloop
000004fd 744
spinloop 0x80484ea /home/cspiraki/sep_testing/spinloop
000004ea 324
spinloop 0x80484f4 /home/cspiraki/sep_testing/spinloop
000004f4 201
spinloop 0x8048464 /home/cspiraki/sep_testing/spinloop
00000464 191
spinloop 0x804846d /home/cspiraki/sep_testing/spinloop
0000046d 695
spinloop 0x8048494 /home/cspiraki/sep_testing/spinloop
00000494 214
spinloop 0x80484c4 /home/cspiraki/sep_testing/spinloop
000004c4 220
spinloop 0x804845a /home/cspiraki/sep_testing/spinloop
0000045a 291
spinloop 0x80484c6 /home/cspiraki/sep_testing/spinloop
000004c6 156
spinloop 0x80484f6 /home/cspiraki/sep_testing/spinloop
000004f6 164
spinloop 0x8048466 /home/cspiraki/sep_testing/spinloop
00000466 259
hald-addon-stor 0xc02ae2f8 : ide_find_setting_by_ioctl+0x14/0x1f
vmlinux
c02ae2f8 1
gnome-terminal 0x72b6ef /usr/lib/libgobject-2.0.so.0.600.4
000116ef 1
gnome-terminal 0x4a6bbe /lib/libc-2.3.5.so 0006bbbe
1
gnome-terminal 0x7ed29a /lib/libpthread-2.3.5.so 0000429a
2
spinloop 0xc030795d : net_rx_action+0x1a2/0x2bf vmlinux c030795d
1
Xvnc 0x81297a1 /usr/bin/Xvnc 000e37a1 1
Xvnc 0xc017c8cf : fget+0x7d/0x164 vmlinux c017c8cf 1
spinloop 0xc01282bc : __do_softirq+0x2c/0x8a vmlinux c01282bc
2
Xvnc 0x932402 unknown 00932402 2
gnome-terminal 0x30e6a8 /usr/lib/libvte.so.4.4.0 0001f6a8
1
gnome-terminal 0xc025c725 : n_tty_chars_in_buffer+0x0/0x192 vmlinux
c025c725 1
metacity 0x7209c9 /usr/lib/libgobject-2.0.so.0.600.4
000069c9 1
Xvnc 0xc036c245 : packet_rcv_spkt+0x12b/0x371 vmlinux c036c245
1
spinloop 0xc0334e46 : tcp_ack_no_tstamp+0x2b/0x3e vmlinux
c0334e46 1
metacity 0x65f410 /usr/X11R6/lib/libX11.so.6.2 0002c410
1
gnome-terminal 0x49f0db /lib/libc-2.3.5.so 000640db
1
Xvnc 0xc011aa60 : kmap_atomic+0x4e/0x66 vmlinux c011aa60
1
gnome-terminal 0xbfdf59 /usr/X11R6/lib/libXft.so.2.1.2 0000df59
1
Xvnc 0x4a66f3 /lib/libc-2.3.5.so 0006b6f3 1
Xvnc 0x81215a7 /usr/bin/Xvnc 000db5a7 1
Xvnc 0x49f033 /lib/libc-2.3.5.so 00064033 1
... and more. Truncated to keep the email reasonable ...
... Profiling can generate a lot of data ...