This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[v4.1][PATCH] Framework for performance benchmarking of functions
- From: Siddhesh Poyarekar <siddhesh at redhat dot com>
- To: Richard Henderson <rth at twiddle dot net>
- Cc: libc-alpha at sourceware dot org
- Date: Thu, 14 Mar 2013 14:06:01 +0530
- Subject: [v4.1][PATCH] Framework for performance benchmarking of functions
- References: <20130108093115.GA27464@spoyarek.pnq.redhat.com><20130111065846.GC16859@spoyarek.pnq.redhat.com><511CA91C.6000306@redhat.com><20130220101719.GA26842@spoyarek.pnq.redhat.com><51257685.8000707@twiddle.net><20130221052256.GI26842@spoyarek.pnq.redhat.com><5125BA3C.6010501@twiddle.net><20130228121216.GA2358@spoyarek.pnq.redhat.com>
Hi,
I've updated the patch a bit because I found that in some cases the
function calls were eliminated since their return values were not
used. So here's v4.1
Siddhesh
* Makefile.in (bench): New target.
* Rules (bench): Likewise.
(binaries-bench): Generate binaries for functions to
benchmark.
* benchtests/Makefile: New makefile for benchmark tests.
* benchtests/bench-skeleton.c: New skeleton file for benchmark
programs.
* benchtests/exp-inputs: New input file for EXP function.
* benchtests/pow-inputs: New input file for POW function.
* scripts/bench.pl: New script to generate source files for
benchmark programs.
diff --git a/Makefile.in b/Makefile.in
index d73a78f..df75b8f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -3,7 +3,7 @@ srcdir = @srcdir@
# Uncomment the line below if you want to do parallel build.
# PARALLELMFLAGS = -j 4
-.PHONY: all install
+.PHONY: all install bench
all .DEFAULT:
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
@@ -11,3 +11,6 @@ all .DEFAULT:
install:
LANGUAGE=C LC_ALL=C; export LANGUAGE LC_ALL; \
$(MAKE) -r PARALLELMFLAGS="$(PARALLELMFLAGS)" -C $(srcdir) objdir=`pwd` $@
+
+bench:
+ $(MAKE) -C $(srcdir)/benchtests $(PARALLELMFLAGS) objdir=`pwd` $@
diff --git a/Rules b/Rules
index 301a748..bc5dacd 100644
--- a/Rules
+++ b/Rules
@@ -83,7 +83,7 @@ common-generated += dummy.o dummy.c
# This makes all the auxiliary and test programs.
-.PHONY: others tests
+.PHONY: others tests bench
ifeq ($(multi-arch),no)
tests := $(filter-out $(tests-ifunc), $(tests))
xtests := $(filter-out $(xtests-ifunc), $(xtests))
@@ -188,6 +188,31 @@ $(objpfx)%.out: /dev/null $(objpfx)% # Make it 2nd arg for canned sequence.
$(make-test-out) > $@
endif # tests
+
+# Build and run benchmark programs.
+binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
+
+run-bench = $(test-wrapper-env) \
+ GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
+ $($*-ENV) $(run-via-rtld-prefix) $${run}
+
+bench: $(binaries-bench)
+ if [ -f $(objpfx)bench.out ]; then \
+ mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
+ fi
+ for run in $^; do \
+ eval $(run-bench) >> $(objpfx)bench.out; \
+ done
+
+$(binaries-bench): %: %.o \
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+ $(+link)
+
+$(objpfx)bench-%.c: %-inputs bench-skeleton.c
+ $(..)scripts/bench.pl $(patsubst %-inputs,%,$<) \
+ $($*-ITER) $($*-ARGLIST) $($*-RET) > $@
+
.PHONY: distclean realclean subdir_distclean subdir_realclean \
subdir_clean subdir_mostlyclean subdir_testclean
diff --git a/benchtests/Makefile b/benchtests/Makefile
new file mode 100644
index 0000000..74938b9
--- /dev/null
+++ b/benchtests/Makefile
@@ -0,0 +1,59 @@
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# Makefile for benchmark tests. The only useful target here is `bench`.
+
+# Adding a new function `foo`:
+# ---------------------------
+
+# - Append the function name to the bench variable
+
+# - Define foo-ITER with the number of iterations you want to run. Keep it
+# high enough that the overhead of clock_gettime is only a small fraction of
+# the total run time of the test.
+
+# - Define foo-ARGLIST as a colon separated list of types of the input
+# arguments. Use `void` if function does not take any inputs. Put in quotes
+# if the input argument is a pointer, e.g.:
+
+# malloc-ARGLIST: "void *"
+
+# - Define foo-RET as the type the function returns. Skip if the function
+# returns void. One could even skip foo-ARGLIST if the function does not
+# take any inputs AND the function returns void.
+
+
+# - Make a file called `foo-inputs` with one input value per line, an input
+# being a comma separated list of arguments to be passed into the function.
+# See pow-inputs for an example.
+
+subdir := benchtests
+bench := exp pow
+
+exp-ITER = 100000
+exp-ARGLIST = double
+exp-RET = double
+LDFLAGS-bench-exp = -lm
+
+pow-ITER = 100000
+pow-ARGLIST = double:double
+pow-RET = double
+LDFLAGS-bench-pow = -lm
+
+include ../Makeconfig
+include ../Rules
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
new file mode 100644
index 0000000..06be376
--- /dev/null
+++ b/benchtests/bench-skeleton.c
@@ -0,0 +1,73 @@
+/* Skeleton for benchmark programs.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <inttypes.h>
+
+int
+main (int argc, char **argv)
+{
+ unsigned long i, j, k;
+ uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
+ struct timespec start, end;
+
+ memset (&start, 0, sizeof (start));
+ memset (&end, 0, sizeof (end));
+
+ clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+
+ /* Measure 1000 times the resolution of the clock. So for a 1ns resolution
+ clock, we measure 1000 iterations of the function call at a time.
+ Measurements close to the minimum clock resolution won't make much sense,
+ but it's better than having nothing at all. */
+ unsigned long iters = 1000 * start.tv_nsec;
+ unsigned long total_iters = ITER / iters;
+
+ for (i = 0; i < NUM_SAMPLES; i++)
+ {
+ for (j = 0; j < total_iters; j ++)
+ {
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+ for (k = 0; k < iters; k++)
+ BENCH_FUNC(i);
+ clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+
+ uint64_t cur = (end.tv_nsec - start.tv_nsec
+ + ((end.tv_sec - start.tv_sec)
+ * (uint64_t) 1000000000));
+
+ if (cur > max)
+ max = cur;
+
+ if (cur < min)
+ min = cur;
+
+ total += cur;
+ }
+ }
+
+ printf (FUNCNAME ": ITERS:%"PRId64": TOTAL:%"PRId64"ns, MAX:%lfns, "
+ "MIN:%lfns, %lf/ms\n", ITER * NUM_SAMPLES, total,
+ max / (double) iters, min / (double) iters,
+ ITER * NUM_SAMPLES * 1e6 / total);
+
+ return 0;
+}
diff --git a/benchtests/exp-inputs b/benchtests/exp-inputs
new file mode 100644
index 0000000..a2086ba
--- /dev/null
+++ b/benchtests/exp-inputs
@@ -0,0 +1 @@
+708.00096423260981737257679924368858
diff --git a/benchtests/pow-inputs b/benchtests/pow-inputs
new file mode 100644
index 0000000..dbb1270
--- /dev/null
+++ b/benchtests/pow-inputs
@@ -0,0 +1 @@
+1.0000000000000020, 1.5
diff --git a/scripts/bench.pl b/scripts/bench.pl
new file mode 100755
index 0000000..bb7f648
--- /dev/null
+++ b/scripts/bench.pl
@@ -0,0 +1,93 @@
+#! /usr/bin/perl -w
+# Copyright (C) 2013 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+
+use strict;
+use warnings;
+# Generate a benchmark source file for a given input.
+
+if (@ARGV < 2) {
+ die "Usage: bench.pl <function> <iterations> [parameter types] [return type]"
+}
+
+my $arg;
+my $func = $ARGV[0];
+my $iters = $ARGV[1];
+my @args;
+my $ret = "void";
+my $getret = "";
+my $retval = "";
+
+if (@ARGV >= 3) {
+ @args = split(':', $ARGV[2]);
+}
+
+if (@ARGV == 4) {
+ $ret = $ARGV[3];
+}
+
+my $decl = "extern $ret $func (";
+
+if (@args == 0 || $args[0] eq "void") {
+ print "$decl void);\n";
+ print "#define CALL_BENCH_FUNC(j) $func();\n";
+ print "#define NUM_SAMPLES (1)\n";
+}
+else {
+ my $num = 0;
+ my $bench_func = "#define CALL_BENCH_FUNC(j) $func (";
+ my $struct = "struct args {";
+
+ foreach $arg (@args) {
+ if ($num > 0) {
+ $bench_func = "$bench_func,";
+ $decl = "$decl,";
+ }
+
+ $struct = "$struct $arg arg$num;";
+ $bench_func = "$bench_func in[j].arg$num";
+ $decl = "$decl $arg";
+ $num = $num + 1;
+ }
+
+ print "$decl);\n";
+ print "$bench_func);\n";
+ print "$struct } in[] = {";
+
+ open INPUTS, "<$func-inputs" or die $!;
+
+ while (<INPUTS>) {
+ chomp;
+ print "{$_},\n";
+ }
+ print "};\n";
+ print "#define NUM_SAMPLES (sizeof (in) / sizeof (struct args))\n"
+}
+
+# In some cases not storing a return value seems to result in the function call
+# being optimized out.
+if ($ret ne "void") {
+ print "static volatile $ret ret = 0.0;\n";
+ $getret = "ret = ";
+}
+
+print "#define BENCH_FUNC(j) ({$getret CALL_BENCH_FUNC (j);})\n";
+
+print "#define ITER $iters\n";
+print "#define FUNCNAME \"$func\"\n";
+print "#include \"bench-skeleton.c\"\n";