#include "config.h"
+#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include "alloc.h"
#include "bench.h"
#include "bits.h"
+#include "dstr.h"
#include "linreg.h"
#include "macros.h"
/*----- Data structures ---------------------------------------------------*/
+enum { CLK, CY, NTIMER };
+
struct timer {
struct bench_timer _t;
- const struct timer_ops *clkops, *cyops; /* time and cycle measurements */
+ const struct timer_ops *ops[NTIMER]; /* subtimers for clock and cycles */
union { int fd; } u_cy; /* state for cycle measurement */
};
struct timer_ops {
+ const char *name; /* timer name */
+ unsigned f; /* flags */
+#define TF_SECRET 1u /* don't try this automatically */
+ int (*init)(struct timer */*t*/); /* initialization function */
void (*now)(struct bench_time *t_out, struct timer *t); /* read current */
void (*teardown)(struct timer *t); /* release held resources */
};
#undef FLOATK64
}
-/*----- The null clock ----------------------------------------------------*/
+/*----- The null timer ----------------------------------------------------*/
-/* This is a cycle counter which does nothing, in case we don't have any
- * better ideas.
+/* This is a timer which does nothing, in case we don't have any better
+ * ideas.
*/
+static int null_init(struct timer *t) { return (0); }
static void null_now(struct bench_time *t_out, struct timer *t) { ; }
static void null_teardown(struct timer *t) { ; }
-static const struct timer_ops null_ops = { null_now, null_teardown };
-static int null_cyinit(struct timer *t)
- { t->cyops = &null_ops; return (0); }
+static const struct timer_ops null_ops =
+ { "null", 0, null_init, null_now, null_teardown };
+#define NULL_ENT &null_ops,
+
+/*----- The broken clock --------------------------------------------------*/
+
+/* This is a cycle counter which does nothing, in case we don't have any
+ * better ideas.
+ */
-#define NULL_CYENT { "null", null_cyinit },
+static int broken_init(struct timer *t) { return (-1); }
+
+static const struct timer_ops broken_ops =
+ { "broken", TF_SECRET, broken_init, null_now, null_teardown };
+#define BROKEN_ENT &broken_ops,
/*----- Linux performance counters ----------------------------------------*/
static void perfevent_teardown(struct timer *t)
{ close(t->u_cy.fd); }
-static const struct timer_ops perfevent_ops =
- { perfevent_now, perfevent_teardown };
-
static int perfevent_init(struct timer *t)
{
struct perf_event_attr attr = { 0 };
tm.f = 0; perfevent_now(&tm, t);
if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); }
- t->cyops = &perfevent_ops; return (0);
+ return (0);
}
-# define PERFEVENT_CYENT { "linux-perf-event", perfevent_init },
+
+static const struct timer_ops perfevent_ops =
+ { "linux-perf-hw-cycles", 0,
+ perfevent_init, perfevent_now, perfevent_teardown };
+
+# define PERFEVENT_CYENT &perfevent_ops,
#else
# define PERFEVENT_CYENT
#endif
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define EFLAGS_ID (1u << 21)
-#define CPUID_1D_TSC (1u << 4)
-
-static uint32 set_flags(unsigned long m, unsigned long x)
-{
- unsigned long r;
-
-#ifdef __x86_64__
-# define TMP "%%rcx"
-#else
-# define TMP "%%ecx"
-#endif
-
- __asm__ ("pushf\n\t"
- "pop %0\n\t"
- "mov %0, " TMP "\n\t"
- "and %1, %0\n\t"
- "xor %2, %0\n\t"
- "push %0\n\t"
- "popf\n\t"
- "pushf\n\t"
- "pop %0\n\t"
- "push " TMP "\n\t"
- "popf"
- : "=r"(r)
- : "g"(m), "g"(x)
- : "ecx");
- return (r);
-}
+#include <cpuid.h>
-struct cpuid { uint32 a, b, c, d; };
-
-static void cpuid(struct cpuid *info_out, uint32 a, uint32 c)
-{
- __asm__ ("movl %1, %%eax\n\t"
- "movl %2, %%ecx\n\t"
- "cpuid\n\t"
- "movl %%eax, 0(%0)\n\t"
- "movl %%ebx, 4(%0)\n\t"
- "movl %%ecx, 8(%0)\n\t"
- "movl %%edx, 12(%0)\n\t"
- : /* no outputs */
- : "r"(info_out), "g"(a), "g"(c)
- : "eax", "ebx", "ecx", "edx", "cc");
-}
+#define CPUID_1D_TSC (1u << 4)
static void x86rdtsc_now(struct bench_time *t_out, struct timer *t)
-{
- uint32 lo, hi;
-
- __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
- SET64(t_out->cy, hi, lo); t_out->f |= BTF_CYOK;
-}
-
-static const struct timer_ops x86rdtsc_ops =
- { x86rdtsc_now, null_teardown };
+ { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; }
static int x86rdtsc_init(struct timer *t)
{
- struct cpuid info;
-
- if ((set_flags(~EFLAGS_ID, 0)&EFLAGS_ID) ||
- !(set_flags(~EFLAGS_ID, EFLAGS_ID)&EFLAGS_ID))
- { debug("no `cpuid' instruction"); return (-1); }
- cpuid(&info, 0, 0);
- if (info.a < 1) { debug("no `cpuid' leaf 1"); return (-1); }
- cpuid(&info, 1, 0);
- if (!(info.d&CPUID_1D_TSC))
+ unsigned a, b, c, d;
+
+ if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC))
{ debug("no `rdtsc' instrunction"); return (-1); }
- t->cyops = &x86rdtsc_ops; return (0);
+ return (0);
}
-# define X86RDTSC_CYENT { "x86-rdtsc", x86rdtsc_init },
+static const struct timer_ops x86rdtsc_ops =
+ { "x86-rdtsc", 0, x86rdtsc_init, x86rdtsc_now, null_teardown };
+
+# define X86RDTSC_CYENT &x86rdtsc_ops,
#else
-# define X86RDTWC_CYENT
+# define X86RDTSC_CYENT
#endif
/*----- POSIX `clock_gettime' ---------------------------------------------*/
t_out->f |= BTF_TIMEOK;
}
-static const struct timer_ops gettime_ops = { gettime_now, null_teardown };
-
static int gettime_init(struct timer *t)
{
struct bench_time tm;
tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
- t->clkops = &gettime_ops; return (0);
+ return (0);
}
-# define GETTIME_CLKENT { "posix-clock_gettime", gettime_init },
+static const struct timer_ops gettime_ops =
+ { "posix-thread-cputime", 0, gettime_init, gettime_now, null_teardown };
+
+# define GETTIME_CLKENT &gettime_ops,
#else
# define GETTIME_CLKENT
#endif
ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK;
}
-static const struct timer_ops clock_ops = { clock_now, null_teardown };
-
static int clock_init(struct timer *t)
{
struct bench_time tm;
tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
- t->clkops = &clock_ops; return (0);
+ return (0);
}
-#define CLOCK_CLKENT { "clock", clock_init },
+static const struct timer_ops clock_ops =
+ { "stdc-clock", 0, clock_init, clock_now, null_teardown };
+
+#define CLOCK_CLKENT &clock_ops,
/*----- Timing setup ------------------------------------------------------*/
/* Tables of timing sources. */
-static const struct timerent {
- const char *name;
- int (*init)(struct timer */*t*/);
-}
- clktab[] = { GETTIME_CLKENT CLOCK_CLKENT { 0, 0 } },
- cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_CYENT { 0, 0 } };
+static const struct timer_ops
+ *const clktab[] = { GETTIME_CLKENT CLOCK_CLKENT BROKEN_ENT 0 },
+ *const cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_ENT BROKEN_ENT 0 };
+
+static const struct timertab {
+ const char *what;
+ const char *env;
+ const struct timer_ops *const *opstab;
+} timertab[] = {
+ { "clock", "MLIB_BENCH_CLKTIMER", clktab },
+ { "cycle", "MLIB_BENCH_CYCLETIMER", cytab }
+};
/* --- @find_timer@ --- *
*
* Arguments: @const char *name@ = timer name
* @size_t sz@ = length of name
- * @const struct timerent *timers@ = table to search
- * @const char *what@ = adjective describing table
+ * @unsigned tm@ = which subtimer we're looking for
*
* Returns: The table entry matching the given name, or null if there
* isn't one.
*/
-static const struct timerent *find_timer_n(const char *name, size_t sz,
- const struct timerent *timers,
- const char *what)
+static const struct timer_ops *find_timer(const char *name, size_t sz,
+ unsigned tm)
{
- while (timers->name) {
- if (strlen(timers->name) == sz && MEMCMP(name, ==, timers->name, sz))
- return (timers);
- timers++;
+ const struct timer_ops *const *tt;
+
+ for (tt = timertab[tm].opstab; *tt; tt++) {
+ if (strlen((*tt)->name) == sz &&
+ MEMCMP(name, ==, (*tt)->name, sz))
+ return (*tt);
}
- debug("%s timer `%.*s' not found", what, (int)sz, name); return (0);
+ debug("%s timer `%.*s' not found",
+ timertab[tm].what, (int)sz, name); return (0);
}
/* --- @try_timer@ --- *
*
* Arguments: @struct timer *t@ = timer structure
- * @const struct timerent *timer@ = timer table entry
- * @const char *what@ = adjective describing table
+ * @const struct timer_ops *ops@ = timer ops
+ * @unsigned tm@ = which subtimer we're setting
*
- * Returns: Zero on success, @-1@ if timer failed.
+ * Returns: Zero on success, %$-1$% if timer failed.
*
* Use: Tries to initialize the timer @t@, reporting a debug message
* if it worked.
*/
static int try_timer(struct timer *t,
- const struct timerent *timer, const char *what)
+ const struct timer_ops *ops, unsigned tm)
{
- if (timer->init(t)) return (-1);
- debug("selected %s timer `%s'", what, timer->name); return (0);
+ if (ops->init(t)) return (-1);
+ debug("selected %s timer `%s'", timertab[tm].what, ops->name);
+ t->ops[tm] = ops; return (0);
}
/* --- @select_timer@ --- *
*
* Arguments: @struct timer *t@ = timer structure
- * @const struct timerent *timer@ = timer table
- * @const char *varname@ = environment variable to consult
- * @const char *what@ = adjective describing table
+ * @unsigned tm@ = which subtimer we're setting
+ * @const char *config@, @size_t sz@ = config string
*
- * Returns: Zero on success, @-1@ if timer failed.
+ * Returns: Zero on success, %$-1$% if timer failed.
*
* Use: Select a timer from the table. If the environment variable
* is set, then parse a comma-separated list of timer names and
* the timers in the table in order.
*/
-static int select_timer(struct timer *t, const struct timerent *timers,
- const char *varname, const char *what)
+static int select_timer(struct timer *t, unsigned tm,
+ const char *config, size_t sz)
{
- const char *p; size_t n;
- const struct timerent *timer;
+ const char *p, *l;
+ const struct timer_ops *ops, *const *tt;
- p = getenv(varname);
- if (!p) {
- while (timers->name)
- if (!try_timer(t, timers++, what)) return (0);
+ if (!config) {
+ for (tt = timertab[tm].opstab; *tt; tt++)
+ if (!((*tt)->f&TF_SECRET) && !try_timer(t, *tt, tm)) return (0);
} else {
+ l = config + sz;
for (;;) {
- n = strcspn(p, ",");
- timer = find_timer_n(p, n, timers, what);
- if (timer && !try_timer(t, timer, what)) return (0);
- if (!p[n]) break;
- p += n + 1;
+ p = memchr(config, ',', l - config); if (!p) p = l;
+ ops = find_timer(config, p - config, tm);
+ if (ops && !try_timer(t, ops, tm)) return (0);
+ if (p >= l) break;
+ config = p + 1;
}
}
- debug("no suitable %s timer found", what); return (-1);
+ debug("no suitable %s timer found", timertab[tm].what); return (-1);
}
/* Bench timer operations. */
+static void timer_describe(struct bench_timer *tm, dstr *d)
+{
+ struct timer *t = (struct timer *)tm;
+ unsigned i;
+
+ dstr_puts(d, "builtin: ");
+ for (i = 0; i < NTIMER; i++) {
+ if (i) dstr_puts(d, ", ");
+ dstr_putf(d, "%s = %s", timertab[i].what, t->ops[i]->name);
+ }
+}
+
static void timer_now(struct bench_timer *tm, struct bench_time *t_out)
{
struct timer *t = (struct timer *)tm;
+ unsigned i;
- t->clkops->now(t_out, t);
- t->cyops->now(t_out, t);
+ for (i = 0; i < NTIMER; i++) t->ops[i]->now(t_out, t);
}
+
static void timer_destroy(struct bench_timer *tm)
{
struct timer *t = (struct timer *)tm;
+ unsigned i;
if (!t) return;
- if (t->clkops) t->clkops->teardown(t);
- if (t->cyops) t->cyops->teardown(t);
+ for (i = 0; i < NTIMER; i++)
+ if (t->ops[i]) t->ops[i]->teardown(t);
xfree(t);
}
-static const struct bench_timerops timer_ops = { timer_now, timer_destroy };
+static const struct bench_timerops timer_ops =
+ { timer_describe, timer_now, timer_destroy };
/* --- @bench_createtimer@ --- *
*
- * Arguments: ---
+ * Arguments: @const char *config@ = timer configuration string
*
* Returns: A freshly constructed standard timer object.
*
* Use: Allocate a timer. Dispose of it by calling
* @tm->ops->destroy(tm)@ when you're done.
+ *
+ * Applications should not set configuration strings except as
+ * established by user action, e.g., from a command-line option,
+ * environment variable, or configuration file.
*/
-struct bench_timer *bench_createtimer(void)
+struct bench_timer *bench_createtimer(const char *config)
{
struct timer *t = 0;
struct bench_timer *ret = 0;
+ struct { const char *p; size_t sz; } tmconf[NTIMER] = { 0 };
+ const struct timer_ops *const *tt;
+ const char *p, *l; size_t n, nn;
+ unsigned i;
- t = xmalloc(sizeof(*t)); t->cyops = 0; t->clkops = 0;
- if (select_timer(t, clktab, "MLIB_BENCH_CLKTIMER", "clock")) goto end;
- if (select_timer(t, cytab, "MLIB_BENCH_CYCLETIMER", "cycle")) goto end;
+ /* Parse the configuration string. */
+ if (config) {
+
+ /* The first thing to do is find the end of the string. */
+ l = config + strlen(config);
+
+ for (;;) {
+ /* Process the whitespace-sparated words of the string one by one. */
+
+ /* Skip over any initial whitespace. If we hit the end of the string
+ * then we're done.
+ */
+ for (;;)
+ if (config >= l) goto done_config;
+ else if (!ISSPACE(*config)) break;
+ else config++;
+
+ /* There's definitely a word here. Find the end of it. */
+ for (p = config; p < l && !ISSPACE(*p); p++);
+ nn = p - config;
+
+ /* Try various simple keywords. */
+#define MATCHP(lit) (nn == sizeof(lit) - 1 && MEMCMP(config, ==, lit, nn))
+
+ if (MATCHP("list")) {
+ /* The `list' keyword requests lists of the available timer
+ * implementations.
+ */
+
+ for (i = 0; i < NTIMER; i++) {
+ printf("%s timers:", timertab[i].what);
+ for (tt = timertab[i].opstab; *tt; tt++)
+ if (!((*tt)->f)&TF_SECRET) printf(" %s", (*tt)->name);
+ putchar('\n');
+ }
+ goto next_config;
+ }
+
+#undef MATCHP
+
+ /* Otherwise it's an assignment, setting a subtimer list. */
+ p = memchr(config, '=', nn);
+ if (!p)
+ n = nn;
+ else {
+ n = p - config;
+ for (i = 0; i < NTIMER; i++)
+ if (STRNCMP(config, ==, timertab[i].what, n) &&
+ !timertab[i].what[n]) {
+ if (tmconf[i].p)
+ debug("duplicate %s timer list", timertab[i].what);
+ tmconf[i].p = config + n + 1; tmconf[i].sz = nn - n - 1;
+ goto next_config;
+ }
+ }
+ debug("unrecognized config keyword `%.*s'", (int)n, config);
+
+ /* Move on to the next word. */
+ next_config:
+ config += nn;
+ }
+ done_config:;
+ }
+
+ /* Override these settings from the environment. */
+ for (i = 0; i < NTIMER; i++) {
+ p = getenv(timertab[i].env);
+ if (p) { tmconf[i].p = p; tmconf[i].sz = strlen(p); }
+ }
+
+ /* All seems well. Allocate the timer object. */
+ t = xmalloc(sizeof(*t));
+ for (i = 0; i < NTIMER; i++) t->ops[i] = 0;
+
+ /* Try to set up the subtimers. */
+ for (i = 0; i < NTIMER; i++)
+ if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end;
+
+ /* All is done. */
t->_t.ops = &timer_ops; ret = &t->_t; t = 0;
end:
if (t) timer_destroy(&t->_t);
/* --- @bench_init@ --- *
*
* Arguments: @struct bench_state *b@ = bench state to initialize
- * @struct bench_timer *tm@ = timer to attach
+ * @struct bench_timer *tm@ = timer to attach, or null
*
- * Returns: ---
+ * Returns: Zero on success, %$-1$% on failure.
*
- * Use: Initialize the benchmark state. It still needs to be
- * calibrated (use @bench_calibrate@) before it can be used, but
- * this will be done automatically by @bench_measure@ if it's
- * not done by hand earlier. The timer is now owned by the
- * benchmark state and will be destroyed by @bench_destroy@.
+ * Use: Initialize the benchmark state. On success, the timer state
+ * still needs to be calibrated (use @bench_calibrate@) before
+ * it can be used, but this will be done automatically by
+ * @bench_measure@ if it's not done by hand earlier. The timer
+ * is now owned by the benchmark state and will be destroyed by
+ * @bench_destroy@.
+ *
+ * The only reason for failure is if @tm@ was null on entry,
+ * and automatic construction of a timer failed. The state is
+ * safe to discard, but calling @bench_destroy@ is safe too.
*/
-void bench_init(struct bench_state *b, struct bench_timer *tm)
- { b->tm = tm; b->target_s = 1.0; b->f = 0; }
+int bench_init(struct bench_state *b, struct bench_timer *tm)
+{
+ int rc;
+
+ b->tm = 0;
+
+ if (!tm) {
+ tm = bench_createtimer(0);
+ if (!tm) { rc = -1; goto end; }
+ }
+
+ b->tm = tm; b->target_s = 1.0; b->f = 0; rc = 0;
+end:
+ return (rc);
+}
/* --- @bench_destroy@ --- *
*
*/
void bench_destroy(struct bench_state *b)
- { b->tm->ops->destroy(b->tm); }
+ { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
/* --- @do_nothing@ --- *
*
*
* Arguments: @struct bench_state *b@ = bench state
*
- * Returns: Zero on success, @-1@ if calibration failed.
+ * Returns: Zero on success, %$-1$% if calibration failed.
*
* Use: Calibrate the benchmark state, so that it can be used to
* measure performance reasonably accurately.
*/
+#define T_CLB 0.0625 /* calibration time limit */
+
int bench_calibrate(struct bench_state *b)
{
struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT;
struct bench_timer *tm = b->tm;
struct bench_time t0, t1;
struct bench_timing delta;
+ double r;
bench_fn *fn = LAUNDER(&do_nothing);
unsigned f = BTF_ANY;
int rc;
*/
/* If we've already calibrated then there's nothing to do. */
- if (b->f&BTF_ANY) return (0);
+ if (b->f&BTF_CLB) return (b->f&BTF_ANY ? 0 : -1);
/* Exercise the inner loop a few times to educate the branch predictor. */
for (i = 0; i < 10; i++)
}
/* If we're done then stop. */
- if (delta.t >= b->target_s/20.0) break;
+ if (delta.t >= T_CLB) break;
if (n >= ULONG_MAX - n/3) break;
/* Update the counter and continue. */
/* Now run the linear regression to extract the constant and per-iteration
* overheads.
*/
- linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0);
- debug("clock overhead = (%g n + %g) s", b->clk.m, b->clk.c);
+ linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, &r);
+ debug("clock overhead = (%g n + %g) s (r = %g)", b->clk.m, b->clk.c, r);
if (f&BTF_CYOK) {
- linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0);
- debug("cycle overhead = (%g n + %g) cy", b->cy.m, b->cy.c);
+ linreg_fit(&lr_cy, &b->cy.m, &b->cy.c, &r);
+ debug("cycle overhead = (%g n + %g) cy (r = %g)", b->cy.m, b->cy.c, r);
}
/* We're done. */
- b->f |= f; rc = 0;
+ rc = 0;
end:
+ b->f |= f | BTF_CLB; /* no point trying again */
return (rc);
}
/* --- @bench_measure@ --- *
*
- * Arguments: @struct bench_timing *t_out@ = where to leave the timing
- * @struct bench_state *b@ = benchmark state
+ * Arguments: @struct bench_state *b@ = benchmark state
+ * @struct bench_timing *t_out@ = where to leave the timing
* @double base@ = number of internal units per call
* @bench_fn *fn@, @void *ctx@ = benchmark function to run
*
- * Returns: Zero on success, @-1@ if timing failed.
+ * Returns: Zero on success, %$-1$% if timing failed.
*
* Use: Measure a function. The function @fn@ is called adaptively
* with an iteration count @n@ set so as to run for
* performs, or the number of bytes it processes per iteration).
*/
-int bench_measure(struct bench_timing *t_out, struct bench_state *b,
+int bench_measure(struct bench_state *b, struct bench_timing *t_out,
double base, bench_fn *fn, void *ctx)
{
struct bench_timer *tm = b->tm;
struct bench_time t0, t1;
unsigned long n, nn;
- /* Make sure the state is calibrated. */
- if (bench_calibrate(b)) return (-1);
+ /* Make sure the state is calibrated and usable. */
+ if (!(b->f&BTF_CLB) && bench_calibrate(b)) return (-1);
+ if (!(b->f&BTF_TIMEOK)) return (-1);
/* Main adaptive measurement loop.
*