X-Git-Url: https://git.distorted.org.uk/~mdw/mLib/blobdiff_plain/e63124bc579bfd97cfe2f620ddd84df9f20477d8..67b5031ec6d160b5cae425466a34d1be3b211dd4:/test/bench.c diff --git a/test/bench.c b/test/bench.c index d1a00ea..9e65a6b 100644 --- a/test/bench.c +++ b/test/bench.c @@ -46,19 +46,29 @@ struct timer { struct bench_timer _t; - const struct timer_ops *clkops, *cyops; - union { int fd; } u_cy; + const struct timer_ops *clkops, *cyops; /* time and cycle measurements */ + union { int fd; } u_cy; /* state for cycle measurement */ }; struct timer_ops { - void (*now)(struct bench_time *t_out, struct timer *t); - void (*teardown)(struct timer *t); + void (*now)(struct bench_time *t_out, struct timer *t); /* read current */ + void (*teardown)(struct timer *t); /* release held resources */ }; /*----- Preliminaries -----------------------------------------------------*/ #define NS_PER_S 1000000000 +/* --- @debug@ --- * + * + * Arguments: @const char *fmt@ = format control string + * @...@ = format arguemnts + * + * Returns: --- + * + * Use: Maybe report a debugging message to standard error. + */ + static void PRINTF_LIKE(1, 2) debug(const char *fmt, ...) { const char *p; @@ -74,8 +84,58 @@ static void PRINTF_LIKE(1, 2) debug(const char *fmt, ...) } } +/* --- @timer_diff@ --- * + * + * Arguments: @struct bench_timing *delta_out@ = where to putt the result + * @const struct bench_time *t0, *t1@ = two times captured by a + * timer's @now@ function + * + * Returns: --- + * + * Use: Calculates the difference between two captured times. The + * flags are set according to whether the differences are + * meaningful; @delta_out->n@ is left unset. + */ + +static void timer_diff(struct bench_timing *delta_out, + const struct bench_time *t0, + const struct bench_time *t1) +{ + unsigned f = t0->f&t1->f; + kludge64 k; + +#ifdef HAVE_UINT64 +# define FLOATK64(k) ((double)(k).i) +#else +# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi) +#endif + + if (!(f&BTF_TIMEOK)) + delta_out->t = 0.0; + else { + SUB64(k, t1->s, t0->s); + delta_out->t = FLOATK64(k) - 1 + + (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S; + } + + if (!(f&BTF_CYOK)) + delta_out->cy = 0.0; + else { + SUB64(k, t1->cy, t0->cy); + delta_out->cy = FLOATK64(k); + } + + delta_out->f = f; + +#undef FLOATK64 +} + /*----- The null clock ----------------------------------------------------*/ +/* This is a cycle counter which does nothing, in case we don't have any + * better ideas. + */ + static void null_now(struct bench_time *t_out, struct timer *t) { ; } static void null_teardown(struct timer *t) { ; } static const struct timer_ops null_ops = { null_now, null_teardown }; @@ -87,6 +147,10 @@ static int null_cyinit(struct timer *t) /*----- Linux performance counters ----------------------------------------*/ +/* This is a cycle counter which uses the Linux performance event system, + * which is probably the best choice if it's available. + */ + #if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64) #include @@ -143,6 +207,11 @@ static int perfevent_init(struct timer *t) /*----- Intel time-stamp counter ------------------------------------------*/ +/* This is a cycle counter based on the Intel `rdtsc' instruction. It's not + * really suitable for performance measurement because it gets confused by + * CPU frequency adjustments. + */ + #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define EFLAGS_ID (1u << 21) @@ -224,6 +293,10 @@ static int x86rdtsc_init(struct timer *t) /*----- POSIX `clock_gettime' ---------------------------------------------*/ +/* This is a real-time clock based on the POSIX time interface, with up to + * nanosecond precision. + */ + #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) static void gettime_now(struct bench_time *t_out, struct timer *t) @@ -253,6 +326,10 @@ static int gettime_init(struct timer *t) /*----- Standard C `clock' ------------------------------------------------*/ +/* This is a real-time clock based on the C `clock' function which is + * guaranteed to be available, though it's not likely to be very good. + */ + static void clock_now(struct bench_time *t_out, struct timer *t) { clock_t now, x; @@ -290,6 +367,7 @@ static int clock_init(struct timer *t) /*----- Timing setup ------------------------------------------------------*/ +/* Tables of timing sources. */ static const struct timerent { const char *name; int (*init)(struct timer */*t*/); @@ -297,6 +375,17 @@ static const struct timerent { clktab[] = { GETTIME_CLKENT CLOCK_CLKENT { 0, 0 } }, cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_CYENT { 0, 0 } }; +/* --- @find_timer@ --- * + * + * Arguments: @const char *name@ = timer name + * @size_t sz@ = length of name + * @const struct timerent *timers@ = table to search + * @const char *what@ = adjective describing table + * + * Returns: The table entry matching the given name, or null if there + * isn't one. + */ + static const struct timerent *find_timer_n(const char *name, size_t sz, const struct timerent *timers, const char *what) @@ -309,6 +398,18 @@ static const struct timerent *find_timer_n(const char *name, size_t sz, debug("%s timer `%.*s' not found", what, (int)sz, name); return (0); } +/* --- @try_timer@ --- * + * + * Arguments: @struct timer *t@ = timer structure + * @const struct timerent *timer@ = timer table entry + * @const char *what@ = adjective describing table + * + * Returns: Zero on success, @-1@ if timer failed. + * + * Use: Tries to initialize the timer @t@, reporting a debug message + * if it worked. + */ + static int try_timer(struct timer *t, const struct timerent *timer, const char *what) { @@ -316,6 +417,21 @@ static int try_timer(struct timer *t, debug("selected %s timer `%s'", what, timer->name); return (0); } +/* --- @select_timer@ --- * + * + * Arguments: @struct timer *t@ = timer structure + * @const struct timerent *timer@ = timer table + * @const char *varname@ = environment variable to consult + * @const char *what@ = adjective describing table + * + * Returns: Zero on success, @-1@ if timer failed. + * + * Use: Select a timer from the table. If the environment variable + * is set, then parse a comma-separated list of timer names and + * use the first one listed that seems to work; otherwise, try + * the timers in the table in order. + */ + static int select_timer(struct timer *t, const struct timerent *timers, const char *varname, const char *what) { @@ -338,6 +454,7 @@ static int select_timer(struct timer *t, const struct timerent *timers, debug("no suitable %s timer found", what); return (-1); } +/* Bench timer operations. */ static void timer_now(struct bench_timer *tm, struct bench_time *t_out) { struct timer *t = (struct timer *)tm; @@ -345,7 +462,6 @@ static void timer_now(struct bench_timer *tm, struct bench_time *t_out) t->clkops->now(t_out, t); t->cyops->now(t_out, t); } - static void timer_destroy(struct bench_timer *tm) { struct timer *t = (struct timer *)tm; @@ -358,6 +474,16 @@ static void timer_destroy(struct bench_timer *tm) static const struct bench_timerops timer_ops = { timer_now, timer_destroy }; +/* --- @bench_createtimer@ --- * + * + * Arguments: --- + * + * Returns: A freshly constructed standard timer object. + * + * Use: Allocate a timer. Dispose of it by calling + * @tm->ops->destroy(tm)@ when you're done. + */ + struct bench_timer *bench_createtimer(void) { struct timer *t = 0; @@ -372,46 +498,62 @@ end: return (ret); } -#ifdef HAVE_UINT64 -# define FLOATK64(k) ((double)(k).i) -#else -# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi) -#endif - -static void timer_diff(struct bench_timing *delta_out, - const struct bench_time *t0, - const struct bench_time *t1) -{ - delta_out->f = t0->f&t1->f; - kludge64 k; - - if (!(delta_out->f&BTF_TIMEOK)) - delta_out->t = 0.0; - else { - SUB64(k, t1->s, t0->s); - delta_out->t = FLOATK64(k) - 1 + - (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S; - } - - if (!(delta_out->f&BTF_CYOK)) - delta_out->cy = 0.0; - else { - SUB64(k, t1->cy, t0->cy); - delta_out->cy = FLOATK64(k); - } -} +/*----- Benchmarking ------------------------------------------------------*/ -/*----- Calibration -------------------------------------------------------*/ +/* --- @bench_init@ --- * + * + * Arguments: @struct bench_state *b@ = bench state to initialize + * @struct bench_timer *tm@ = timer to attach + * + * Returns: --- + * + * Use: Initialize the benchmark state. It still needs to be + * calibrated (use @bench_calibrate@) before it can be used, but + * this will be done automatically by @bench_measure@ if it's + * not done by hand earlier. The timer is now owned by the + * benchmark state and will be destroyed by @bench_destroy@. + */ void bench_init(struct bench_state *b, struct bench_timer *tm) { b->tm = tm; b->target_s = 1.0; b->f = 0; } +/* --- @bench_destroy@ --- * + * + * Arguments: @struct bench_state *b@ = bench state + * + * Returns: --- + * + * Use: Destroy the benchmark state, releasing the resources that it + * holds. + */ + void bench_destroy(struct bench_state *b) { b->tm->ops->destroy(b->tm); } -static void do_nothing(unsigned long n, void *p) +/* --- @do_nothing@ --- * + * + * Arguments: @unsigned long n@ = iteration count + * @void *ctx@ = context pointer (ignored) + * + * Returns: --- + * + * Use: Does nothing at all for @n@ iterations. Used to calibrate + * the benchmarking state. + */ + +static void do_nothing(unsigned long n, void *ctx) { while (n--) RELAX; } +/* --- @bench_calibrate@ --- * + * + * Arguments: @struct bench_state *b@ = bench state + * + * Returns: Zero on success, @-1@ if calibration failed. + * + * Use: Calibrate the benchmark state, so that it can be used to + * measure performance reasonably accurately. + */ + int bench_calibrate(struct bench_state *b) { struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT; @@ -424,17 +566,32 @@ int bench_calibrate(struct bench_state *b) unsigned f = BTF_ANY; int rc; + /* The model here is that a timing loop has a fixed overhead as we enter + * and leave (e.g., to do with the indirect branch into the code), and + * per-iteration overheads as we check the counter and loop back. We aim + * to split these apart using linear regression. + */ + + /* If we've already calibrated then there's nothing to do. */ if (b->f&BTF_ANY) return (0); + /* Exercise the inner loop a few times to educate the branch predictor. */ for (i = 0; i < 10; i++) - { tm->ops->now(tm, &t0); fn(1, 0); tm->ops->now(tm, &t1); } + { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); } + /* Now we measure idle loops until they take sufficiently long -- or we run + * out of counter. + */ debug("calibrating..."); n = 1; for (;;) { + + /* Measure @n@ iterations of the idle loop. */ tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1); timer_diff(&delta, &t0, &t1); f &= delta.f; if (!(f&BTF_TIMEOK)) { rc = -1; goto end; } + + /* Register the timings with the regression machinery. */ linreg_update(&lr_clk, n, delta.t); if (!(f&BTF_CYOK)) debug(" n = %10lu; t = %12g s", n, delta.t); @@ -442,33 +599,64 @@ int bench_calibrate(struct bench_state *b) linreg_update(&lr_cy, n, delta.cy); debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy); } + + /* If we're done then stop. */ if (delta.t >= b->target_s/20.0) break; if (n >= ULONG_MAX - n/3) break; + + /* Update the counter and continue. */ n += n/3 + 1; } + /* Now run the linear regression to extract the constant and per-iteration + * overheads. + */ linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0); debug("clock overhead = (%g n + %g) s", b->clk.m, b->clk.c); if (f&BTF_CYOK) { linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0); debug("cycle overhead = (%g n + %g) cy", b->cy.m, b->cy.c); } + + /* We're done. */ b->f |= f; rc = 0; end: return (rc); } +/* --- @bench_measure@ --- * + * + * Arguments: @struct bench_timing *t_out@ = where to leave the timing + * @struct bench_state *b@ = benchmark state + * @double base@ = number of internal units per call + * @bench_fn *fn@, @void *ctx@ = benchmark function to run + * + * Returns: Zero on success, @-1@ if timing failed. + * + * Use: Measure a function. The function @fn@ is called adaptively + * with an iteration count @n@ set so as to run for + * approximately @b->target_s@ seconds. + * + * The result is left in @*t_out@, with @t_out->n@ counting the + * final product of the iteration count and @base@ (which might, + * e.g., reflect the number of inner iterations the function + * performs, or the number of bytes it processes per iteration). + */ + int bench_measure(struct bench_timing *t_out, struct bench_state *b, - double base, bench_fn *fn, void *p) + double base, bench_fn *fn, void *ctx) { struct bench_timer *tm = b->tm; struct bench_time t0, t1; unsigned long n; + /* Make sure the state is calibrated. */ if (bench_calibrate(b)) return (-1); + + /* Main adaptive measurement loop. */ debug("measuring..."); n = 1; for (;;) { - tm->ops->now(tm, &t0); fn(n, p); tm->ops->now(tm, &t1); + tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1); timer_diff(t_out, &t0, &t1); if (!(t_out->f&BTF_TIMEOK)) return (-1); if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t); @@ -476,8 +664,12 @@ int bench_measure(struct bench_timing *t_out, struct bench_state *b, if (t_out->t >= 0.72*b->target_s) break; n *= 1.44*b->target_s/t_out->t; } + + /* Adjust according to the calibration. */ t_out->t -= n*b->clk.m + b->clk.c; if (t_out->f&BTF_CYOK) t_out->cy -= n*b->cy.m + b->cy.c; + + /* Report the results, if debugging. */ if (!(t_out->f&BTF_CYOK)) debug(" adjusted t' = %12g", t_out->t); else debug(" adjusted t = %12g, cy = %10.0f", t_out->t, t_out->cy); if (!(t_out->f&BTF_CYOK)) @@ -485,6 +677,8 @@ int bench_measure(struct bench_timing *t_out, struct bench_state *b, else debug(" %g s (%g cy) per op; %g ops/s", t_out->t/n, t_out->cy/n, n/t_out->t); + + /* All done. */ t_out->n = n*base; return (0); }