This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Perfmon systemtap runtime support
- From: William Cohen <wcohen at redhat dot com>
- To: "systemtap at sourceware dot org" <systemtap at sourceware dot org>
- Date: Thu, 13 Jul 2006 18:21:02 -0400
- Subject: Perfmon systemtap runtime support
Hi
I have been working on getting some performance monitoring support into
systemtap. The perfmon1.diff patch is a very simple addition to the
runtime. It just has functions to setup the perfmon monitoring hardware,
read a counter, and shutdown the performance monitoring hardware. It
uses the perfmon2 kernel ABI to configure the hardware.
I have completed changes to the translator to use the runtime functions.
I took Marin's suggestion of using guru mode to allow access to the
various C functions and wrote some examples that used the runtime functions.
The cost is relatively high for accessing the counters. Below is the
output from p2x.stp, counting the number of cycles between consecutive
calls to read the cycle count:
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g p2x.stp
interval = 15491
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g p2x.stp
interval = 16317
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g p2x.stp
interval = 15431
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g p2x.stp
interval = 15392
I would appreciate any comments or feedback on this code.
-Will
? runtime/bench2/bench.stp
? runtime/bench2/itest
? runtime/bench2/stap.out
? runtime/probes/perf
? runtime/probes/os_timer/.built-in.o.cmd
? runtime/probes/os_timer/.os_timer.o.d
? runtime/probes/os_timer/.tmp_versions
? runtime/probes/os_timer/Makefile
? runtime/probes/os_timer/compile.errors
? runtime/probes/scf/.built-in.o.cmd
? runtime/probes/scf/.scf.o.d
? runtime/probes/scf/.tmp_versions
? runtime/probes/scf/Makefile
? runtime/probes/scf/compile.errors
? runtime/probes/test4/.built-in.o.cmd
? runtime/probes/test4/.test4.o.d
? runtime/probes/test4/.tmp_versions
? runtime/probes/test4/Makefile
? runtime/probes/test4/compile.errors
Index: runtime/perf.c
===================================================================
RCS file: runtime/perf.c
diff -N runtime/perf.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ runtime/perf.c 13 Jul 2006 22:13:19 -0000
@@ -0,0 +1,132 @@
+/* -*- linux-c -*-
+ * Perf Functions
+ * Copyright (C) 2006 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _PERF_C_
+#define _PERF_C_
+
+#include <linux/perfmon.h>
+
+#include "perf.h"
+
+/** @file perf.c
+ * @brief Implements performance monitoring hardware support
+ */
+
+/* TODO fix so this works on SMP machines
+ * Need to do context load, register setup, and start on each processor
+ *
+ * Similarly need to stop and unload on each processor
+ */
+
+/* TODO make this work with sampling. There needs to be a help thread
+ * handling the sampling. */
+
+
+static int _stp_pfm_register_setup(void *desc,
+ struct pfarg_pmc pmc[], int pmc_count,
+ struct pfarg_pmd pmd[], int pmd_count)
+{
+ int err = 0;
+
+ err = pfmk_write_pmcs(desc, pmc, pmc_count);
+ if (err) return err;
+
+ err = pfmk_write_pmds(desc, pmd, pmd_count);
+ return err;
+}
+
+static struct completion c;
+static struct pfarg_load load_args;
+static struct pfarg_start start_args;
+
+/** Sets up the performance monitoring hardware.
+ * The locations desc and context point to are modified as
+ * side-effects of the setup. desc is a unique pointer used
+ * by the various routines.
+ * @param desc pointer to void *, handle to describe perfmon config
+ * @param context pointer to context information
+ * @param pmc, pointer to array describing control register setup
+ * @param pmc_count, number of entries in pmc
+ * @param pmd, pointer to array describing data register setup
+ * @param pmd_count, number of entries in pmd
+ * @returns an int, 0 if no errors encountered during setup
+ */
+int _stp_perfmon_setup(void **desc,
+ struct pfarg_ctx *context,
+ struct pfarg_pmc pmc[], int pmc_count,
+ struct pfarg_pmd pmd[], int pmd_count)
+{
+ int err = 0;
+
+ /* create a context */
+ err = pfmk_create_context(context, NULL, 0, &c, desc, NULL);
+ if (err) goto cleanup;
+
+ /* set up the counters */
+ err = _stp_pfm_register_setup(*desc, pmc, pmc_count, pmd, pmd_count);
+ if (err) goto cleanup2;
+
+ /* start measuring */
+ err = pfmk_load_context(*desc, &load_args);
+ if (err) {
+ printk("pfmk_load_context error\n");
+ goto cleanup2;
+ }
+ err = pfmk_start(*desc, &start_args);
+ if (err) {
+ printk("pfmk_start error\n");
+ goto cleanup3;
+ }
+
+ return err;
+
+cleanup3: pfmk_unload_context(*desc);
+cleanup2: pfmk_close(*desc);
+cleanup: *desc=NULL;
+ return err;
+}
+
+/** Shuts down the performance monitoring hardware.
+ * @param desc unique pointer to describe configuration
+ * @returns an int, 0 if no errors encountered during shutdown
+ */
+int _stp_perfmon_shutdown(void *desc)
+{
+ int err=0;
+ /* stop the counters */
+ err=pfmk_stop(desc);
+ if (err) return err;
+ err=pfmk_unload_context(desc);
+ if (err) return err;
+ err=pfmk_close(desc);
+ return err;
+}
+
+/** Reads the performance counter
+ * @param desc unique pointer to describe configuration
+ * @returns an int64, raw value of counter
+ */
+int64_t _stp_perfmon_read(void *desc, int counter)
+{
+ struct pfarg_pmd storage;
+
+ storage.reg_set = 0;
+ storage.reg_num = counter;
+
+ if ( desc != NULL) {
+ if (pfmk_read_pmds(desc, &storage, 1))
+ printk( "pfm_read_pmds error\n");
+ }
+
+ return storage.reg_value;
+}
+
+#endif /* _PERF_C_ */
+
Index: runtime/perf.h
===================================================================
RCS file: runtime/perf.h
diff -N runtime/perf.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ runtime/perf.h 13 Jul 2006 22:13:19 -0000
@@ -0,0 +1,27 @@
+/* -*- linux-c -*-
+ * Perf Header File
+ * Copyright (C) 2006 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software. You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _PERF_H_
+#define _PERF_H_
+
+/** @file perf.h
+ * @brief Header file for performance monitoring hardware support
+ */
+
+int _stp_perfmon_setup(void **desc,
+ struct pfarg_ctx *context,
+ struct pfarg_pmc pmc[], int pmc_count,
+ struct pfarg_pmd pmd[], int pmd_count);
+
+int _stp_perfmon_shutdown(void *desc);
+
+int64_t _stp_perfmon_read(void *desc, int counter);
+
+#endif /* _PERF_H_ */
Index: runtime/runtime.h
===================================================================
RCS file: /cvs/systemtap/src/runtime/runtime.h,v
retrieving revision 1.28
diff -u -r1.28 runtime.h
--- runtime/runtime.h 28 Nov 2005 22:08:39 -0000 1.28
+++ runtime/runtime.h 13 Jul 2006 22:13:19 -0000
@@ -64,6 +64,7 @@
#include "copy.c"
#include "sym.h"
#include "alloc.c"
+#include "perf.c"
/************* Module Stuff ********************/
/* stap -g p1x.stp
Make use of guru mode to check that the runtime functions are in place
This code only works on AMD64 processors.
*/
%{
static struct pfarg_ctx context;
static void *desc;
/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}
function cpu_pfm_init:long ()
%{
int err = 0;
/* set up context information */
/* only does system-wide contexts */
context.ctx_flags |= PFM_FL_SYSTEM_WIDE;
err = _stp_perfmon_setup(&desc, &context,
pmc, num_pfm_pmc,
pmd, num_pfm_pmd);
printk("err = %d, desc = 0x%p\n", err, desc);
if (err) {
printk("unable to set up counters\n");
}
%}
function cpu_pfm_getreg:long (reg:long)
%{
THIS->__retvalue = _stp_perfmon_read(desc, THIS->reg);
%}
function cpu_pfm_cleanup:long ()
%{
if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}
probe begin { cpu_pfm_init(); }
probe end
{
printf("pmd = %d\n", cpu_pfm_getreg(0));
cpu_pfm_cleanup();
}
/* stap -g p2x.stp
Quick check to see how expensive the reading of the perfmon hw is.
This code only works on AMD64 processors.
*/
%{
static struct pfarg_ctx context;
static void *desc;
/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}
function cpu_pfm_init:long ()
%{
int err = 0;
/* set up context information */
/* only does system-wide contexts */
context.ctx_flags |= PFM_FL_SYSTEM_WIDE;
err = _stp_perfmon_setup(&desc, &context,
pmc, num_pfm_pmc,
pmd, num_pfm_pmd);
printk("err = %d, desc = 0x%p\n", err, desc);
if (err) {
printk("unable to set up counters\n");
}
%}
function cpu_pfm_getreg:long (reg:long)
%{
THIS->__retvalue = _stp_perfmon_read(desc, THIS->reg);
%}
function cpu_pfm_cleanup:long ()
%{
if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}
global first
global second
probe begin
{
cpu_pfm_init();
first = cpu_pfm_getreg(0);
second = cpu_pfm_getreg(0);
printf("interval = %d\n", second-first);
cpu_pfm_cleanup();
}
/* stap -g p3x.stp
Quick check to see how expensive the reading of the perfmon hw is
from the C code. Check to see if how much overhead is in stap generated
code in p2x.stp.
This code only works on AMD64 processors.
*/
%{
static struct pfarg_ctx context;
static void *desc;
/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}
function cpu_pfm_init:long ()
%{
int err = 0;
/* set up context information */
/* only does system-wide contexts */
context.ctx_flags |= PFM_FL_SYSTEM_WIDE;
err = _stp_perfmon_setup(&desc, &context,
pmc, num_pfm_pmc,
pmd, num_pfm_pmd);
printk("err = %d, desc = 0x%p\n", err, desc);
if (err) {
printk("unable to set up counters\n");
}
%}
function cpu_pfm_getdiff:long ()
%{
int64_t first, second;
first = _stp_perfmon_read(desc, 0);
second = _stp_perfmon_read(desc, 0);
THIS->__retvalue = (second-first);
%}
function cpu_pfm_cleanup:long ()
%{
if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}
probe begin
{
cpu_pfm_init();
printf("interval = %d\n", cpu_pfm_getdiff());
cpu_pfm_cleanup();
}