@@@ bench man
[mLib] / test / bench.c
CommitLineData
b64eb60f
MW
1/* -*-c-*-
2 *
3 * Benchmarking support
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include "config.h"
31
13ee7406 32#include <ctype.h>
b64eb60f
MW
33#include <errno.h>
34#include <stdarg.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39
40#include "alloc.h"
41#include "bench.h"
42#include "bits.h"
13ee7406 43#include "dstr.h"
b64eb60f
MW
44#include "linreg.h"
45#include "macros.h"
46
47/*----- Data structures ---------------------------------------------------*/
48
13ee7406
MW
49enum { CLK, CY, NTIMER };
50
b64eb60f
MW
51struct timer {
52 struct bench_timer _t;
13ee7406 53 const struct timer_ops *ops[NTIMER]; /* subtimers for clock and cycles */
67b5031e 54 union { int fd; } u_cy; /* state for cycle measurement */
b64eb60f
MW
55};
56
57struct timer_ops {
13ee7406
MW
58 const char *name; /* timer name */
59 unsigned f; /* flags */
60#define TF_SECRET 1u /* don't try this automatically */
61 int (*init)(struct timer */*t*/); /* initialization function */
67b5031e
MW
62 void (*now)(struct bench_time *t_out, struct timer *t); /* read current */
63 void (*teardown)(struct timer *t); /* release held resources */
b64eb60f
MW
64};
65
66/*----- Preliminaries -----------------------------------------------------*/
67
68#define NS_PER_S 1000000000
69
67b5031e
MW
70/* --- @debug@ --- *
71 *
72 * Arguments: @const char *fmt@ = format control string
73 * @...@ = format arguemnts
74 *
75 * Returns: ---
76 *
77 * Use: Maybe report a debugging message to standard error.
78 */
79
31d0247c 80static PRINTF_LIKE(1, 2) void debug(const char *fmt, ...)
b64eb60f
MW
81{
82 const char *p;
83 va_list ap;
84
85 p = getenv("MLIB_BENCH_DEBUG");
86 if (p && *p != 'n' && *p != '0') {
87 va_start(ap, fmt);
88 fputs("mLib BENCH: ", stderr);
89 vfprintf(stderr, fmt, ap);
90 fputc('\n', stderr);
91 va_end(ap);
92 }
93}
94
67b5031e
MW
95/* --- @timer_diff@ --- *
96 *
97 * Arguments: @struct bench_timing *delta_out@ = where to putt the result
98 * @const struct bench_time *t0, *t1@ = two times captured by a
99 * timer's @now@ function
100 *
101 * Returns: ---
102 *
103 * Use: Calculates the difference between two captured times. The
104 * flags are set according to whether the differences are
105 * meaningful; @delta_out->n@ is left unset.
106 */
107
108static void timer_diff(struct bench_timing *delta_out,
109 const struct bench_time *t0,
110 const struct bench_time *t1)
111{
112 unsigned f = t0->f&t1->f;
113 kludge64 k;
114
115#ifdef HAVE_UINT64
116# define FLOATK64(k) ((double)(k).i)
117#else
118# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi)
119#endif
120
121 if (!(f&BTF_TIMEOK))
122 delta_out->t = 0.0;
123 else {
124 SUB64(k, t1->s, t0->s);
125 delta_out->t = FLOATK64(k) - 1 +
126 (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S;
127 }
128
129 if (!(f&BTF_CYOK))
130 delta_out->cy = 0.0;
131 else {
132 SUB64(k, t1->cy, t0->cy);
133 delta_out->cy = FLOATK64(k);
134 }
135
136 delta_out->f = f;
137
138#undef FLOATK64
139}
140
13ee7406 141/*----- The null timer ----------------------------------------------------*/
b64eb60f 142
13ee7406
MW
143/* This is a timer which does nothing, in case we don't have any better
144 * ideas.
67b5031e
MW
145 */
146
13ee7406 147static int null_init(struct timer *t) { return (0); }
b64eb60f
MW
148static void null_now(struct bench_time *t_out, struct timer *t) { ; }
149static void null_teardown(struct timer *t) { ; }
b64eb60f 150
13ee7406
MW
151static const struct timer_ops null_ops =
152 { "null", 0, null_init, null_now, null_teardown };
153#define NULL_ENT &null_ops,
154
155/*----- The broken clock --------------------------------------------------*/
156
157/* This is a cycle counter which does nothing, in case we don't have any
158 * better ideas.
159 */
b64eb60f 160
13ee7406
MW
161static int broken_init(struct timer *t) { return (-1); }
162
163static const struct timer_ops broken_ops =
164 { "broken", TF_SECRET, broken_init, null_now, null_teardown };
165#define BROKEN_ENT &broken_ops,
b64eb60f
MW
166
167/*----- Linux performance counters ----------------------------------------*/
168
67b5031e
MW
169/* This is a cycle counter which uses the Linux performance event system,
170 * which is probably the best choice if it's available.
171 */
172
b64eb60f
MW
173#if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
174
175#include <sys/types.h>
176#include <unistd.h>
177
178#include <linux/perf_event.h>
179#include <asm/unistd.h>
180
181static void perfevent_now(struct bench_time *t_out, struct timer *t)
182{
183 ssize_t n;
184
185 n = read(t->u_cy.fd, &t_out->cy.i, sizeof(t_out->cy.i));
186 if (n != sizeof(t_out->cy.i)) {
187 debug("failed to read perf-event counter: %s", strerror(errno));
188 return;
189 }
190 t_out->f |= BTF_CYOK;
191}
192
193static void perfevent_teardown(struct timer *t)
194 { close(t->u_cy.fd); }
195
b64eb60f
MW
196static int perfevent_init(struct timer *t)
197{
198 struct perf_event_attr attr = { 0 };
199 struct bench_time tm;
200
201 attr.type = PERF_TYPE_HARDWARE;
202 attr.size = sizeof(attr);
203 attr.config = PERF_COUNT_HW_CPU_CYCLES;
204 attr.disabled = 0;
205 attr.exclude_kernel = 1;
206 attr.exclude_hv = 1;
207
208 t->u_cy.fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
209 if (t->u_cy.fd < 0) {
210 debug("couldn't open perf evvent: %s", strerror(errno));
211 return (-1);
212 }
213
214 tm.f = 0; perfevent_now(&tm, t);
215 if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); }
216
13ee7406 217 return (0);
b64eb60f 218}
13ee7406
MW
219
220static const struct timer_ops perfevent_ops =
221 { "linux-perf-hw-cycles", 0,
222 perfevent_init, perfevent_now, perfevent_teardown };
223
224# define PERFEVENT_CYENT &perfevent_ops,
b64eb60f
MW
225#else
226# define PERFEVENT_CYENT
227#endif
228
229/*----- Intel time-stamp counter ------------------------------------------*/
230
67b5031e
MW
231/* This is a cycle counter based on the Intel `rdtsc' instruction. It's not
232 * really suitable for performance measurement because it gets confused by
233 * CPU frequency adjustments.
234 */
235
b64eb60f
MW
236#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
237
13ee7406 238#include <cpuid.h>
b64eb60f 239
13ee7406 240#define CPUID_1D_TSC (1u << 4)
b64eb60f
MW
241
242static void x86rdtsc_now(struct bench_time *t_out, struct timer *t)
13ee7406 243 { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; }
b64eb60f
MW
244
245static int x86rdtsc_init(struct timer *t)
246{
13ee7406
MW
247 unsigned a, b, c, d;
248
249 if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC))
b64eb60f 250 { debug("no `rdtsc' instrunction"); return (-1); }
13ee7406 251 return (0);
b64eb60f
MW
252}
253
13ee7406
MW
254static const struct timer_ops x86rdtsc_ops =
255 { "x86-rdtsc", 0, x86rdtsc_init, x86rdtsc_now, null_teardown };
256
257# define X86RDTSC_CYENT &x86rdtsc_ops,
b64eb60f 258#else
13ee7406 259# define X86RDTSC_CYENT
b64eb60f
MW
260#endif
261
262/*----- POSIX `clock_gettime' ---------------------------------------------*/
263
67b5031e
MW
264/* This is a real-time clock based on the POSIX time interface, with up to
265 * nanosecond precision.
266 */
267
b64eb60f
MW
268#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
269
270static void gettime_now(struct bench_time *t_out, struct timer *t)
271{
272 struct timespec now;
273
274 if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now))
275 { debug("error reading POSIX clock: %s", strerror(errno)); return; }
276 ASSIGN64(t_out->s, now.tv_sec); t_out->ns = now.tv_nsec;
277 t_out->f |= BTF_TIMEOK;
278}
279
b64eb60f
MW
280static int gettime_init(struct timer *t)
281{
282 struct bench_time tm;
283
284 tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
13ee7406 285 return (0);
b64eb60f
MW
286}
287
13ee7406
MW
288static const struct timer_ops gettime_ops =
289 { "posix-thread-cputime", 0, gettime_init, gettime_now, null_teardown };
290
291# define GETTIME_CLKENT &gettime_ops,
b64eb60f
MW
292#else
293# define GETTIME_CLKENT
294#endif
295
296/*----- Standard C `clock' ------------------------------------------------*/
297
67b5031e
MW
298/* This is a real-time clock based on the C `clock' function which is
299 * guaranteed to be available, though it's not likely to be very good.
300 */
301
b64eb60f
MW
302static void clock_now(struct bench_time *t_out, struct timer *t)
303{
304 clock_t now, x;
305 unsigned long s; uint32 ns;
306
307 now = clock();
308 if (now == (clock_t)-1) {
309 debug("error reading standard clock: %s", strerror(errno));
310 return;
311 }
312 x = now/CLOCKS_PER_SEC;
313 if (x > ULONG_MAX) { debug("standard clock out of range"); return; }
314
315 s = x; x = now - CLOCKS_PER_SEC*s;
316 if (!(NS_PER_S%CLOCKS_PER_SEC))
317 ns = x*(NS_PER_S/CLOCKS_PER_SEC);
318 else if (NS_PER_S <= ULONG_MAX/CLOCKS_PER_SEC)
319 ns = (x*NS_PER_S)/CLOCKS_PER_SEC;
320 else
321 ns = x*((NS_PER_S + 0.0)/CLOCKS_PER_SEC);
322 ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK;
323}
324
b64eb60f
MW
325static int clock_init(struct timer *t)
326{
327 struct bench_time tm;
328
329 tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
13ee7406 330 return (0);
b64eb60f
MW
331}
332
13ee7406
MW
333static const struct timer_ops clock_ops =
334 { "stdc-clock", 0, clock_init, clock_now, null_teardown };
335
336#define CLOCK_CLKENT &clock_ops,
b64eb60f
MW
337
338/*----- Timing setup ------------------------------------------------------*/
339
67b5031e 340/* Tables of timing sources. */
13ee7406
MW
341static const struct timer_ops
342 *const clktab[] = { GETTIME_CLKENT CLOCK_CLKENT BROKEN_ENT 0 },
343 *const cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_ENT BROKEN_ENT 0 };
344
345static const struct timertab {
346 const char *what;
347 const char *env;
348 const struct timer_ops *const *opstab;
349} timertab[] = {
350 { "clock", "MLIB_BENCH_CLKTIMER", clktab },
351 { "cycle", "MLIB_BENCH_CYCLETIMER", cytab }
352};
b64eb60f 353
67b5031e
MW
354/* --- @find_timer@ --- *
355 *
356 * Arguments: @const char *name@ = timer name
357 * @size_t sz@ = length of name
13ee7406 358 * @unsigned tm@ = which subtimer we're looking for
67b5031e
MW
359 *
360 * Returns: The table entry matching the given name, or null if there
361 * isn't one.
362 */
363
13ee7406
MW
364static const struct timer_ops *find_timer(const char *name, size_t sz,
365 unsigned tm)
b64eb60f 366{
13ee7406
MW
367 const struct timer_ops *const *tt;
368
369 for (tt = timertab[tm].opstab; *tt; tt++) {
370 if (strlen((*tt)->name) == sz &&
371 MEMCMP(name, ==, (*tt)->name, sz))
372 return (*tt);
b64eb60f 373 }
13ee7406
MW
374 debug("%s timer `%.*s' not found",
375 timertab[tm].what, (int)sz, name); return (0);
b64eb60f
MW
376}
377
67b5031e
MW
378/* --- @try_timer@ --- *
379 *
380 * Arguments: @struct timer *t@ = timer structure
13ee7406
MW
381 * @const struct timer_ops *ops@ = timer ops
382 * @unsigned tm@ = which subtimer we're setting
67b5031e 383 *
13ee7406 384 * Returns: Zero on success, %$-1$% if timer failed.
67b5031e
MW
385 *
386 * Use: Tries to initialize the timer @t@, reporting a debug message
387 * if it worked.
388 */
389
b64eb60f 390static int try_timer(struct timer *t,
13ee7406 391 const struct timer_ops *ops, unsigned tm)
b64eb60f 392{
13ee7406
MW
393 if (ops->init(t)) return (-1);
394 debug("selected %s timer `%s'", timertab[tm].what, ops->name);
395 t->ops[tm] = ops; return (0);
b64eb60f
MW
396}
397
67b5031e
MW
398/* --- @select_timer@ --- *
399 *
400 * Arguments: @struct timer *t@ = timer structure
13ee7406
MW
401 * @unsigned tm@ = which subtimer we're setting
402 * @const char *config@, @size_t sz@ = config string
67b5031e 403 *
13ee7406 404 * Returns: Zero on success, %$-1$% if timer failed.
67b5031e
MW
405 *
406 * Use: Select a timer from the table. If the environment variable
407 * is set, then parse a comma-separated list of timer names and
408 * use the first one listed that seems to work; otherwise, try
409 * the timers in the table in order.
410 */
411
13ee7406
MW
412static int select_timer(struct timer *t, unsigned tm,
413 const char *config, size_t sz)
b64eb60f 414{
13ee7406
MW
415 const char *p, *l;
416 const struct timer_ops *ops, *const *tt;
b64eb60f 417
13ee7406
MW
418 if (!config) {
419 for (tt = timertab[tm].opstab; *tt; tt++)
420 if (!((*tt)->f&TF_SECRET) && !try_timer(t, *tt, tm)) return (0);
b64eb60f 421 } else {
13ee7406 422 l = config + sz;
b64eb60f 423 for (;;) {
13ee7406
MW
424 p = memchr(config, ',', l - config); if (!p) p = l;
425 ops = find_timer(config, p - config, tm);
426 if (ops && !try_timer(t, ops, tm)) return (0);
427 if (p >= l) break;
428 config = p + 1;
b64eb60f
MW
429 }
430 }
13ee7406 431 debug("no suitable %s timer found", timertab[tm].what); return (-1);
b64eb60f
MW
432}
433
67b5031e 434/* Bench timer operations. */
13ee7406
MW
435static void timer_describe(struct bench_timer *tm, dstr *d)
436{
437 struct timer *t = (struct timer *)tm;
438 unsigned i;
439
440 dstr_puts(d, "builtin: ");
441 for (i = 0; i < NTIMER; i++) {
442 if (i) dstr_puts(d, ", ");
443 dstr_putf(d, "%s = %s", timertab[i].what, t->ops[i]->name);
444 }
445}
446
b64eb60f
MW
447static void timer_now(struct bench_timer *tm, struct bench_time *t_out)
448{
449 struct timer *t = (struct timer *)tm;
13ee7406 450 unsigned i;
b64eb60f 451
13ee7406 452 for (i = 0; i < NTIMER; i++) t->ops[i]->now(t_out, t);
b64eb60f 453}
13ee7406 454
b64eb60f
MW
455static void timer_destroy(struct bench_timer *tm)
456{
457 struct timer *t = (struct timer *)tm;
13ee7406 458 unsigned i;
b64eb60f
MW
459
460 if (!t) return;
13ee7406
MW
461 for (i = 0; i < NTIMER; i++)
462 if (t->ops[i]) t->ops[i]->teardown(t);
b64eb60f
MW
463 xfree(t);
464}
465
13ee7406
MW
466static const struct bench_timerops timer_ops =
467 { timer_describe, timer_now, timer_destroy };
b64eb60f 468
67b5031e
MW
469/* --- @bench_createtimer@ --- *
470 *
13ee7406 471 * Arguments: @const char *config@ = timer configuration string
67b5031e
MW
472 *
473 * Returns: A freshly constructed standard timer object.
474 *
475 * Use: Allocate a timer. Dispose of it by calling
476 * @tm->ops->destroy(tm)@ when you're done.
13ee7406
MW
477 *
478 * Applications should not set configuration strings except as
479 * established by user action, e.g., from a command-line option,
480 * environment variable, or configuration file.
67b5031e
MW
481 */
482
13ee7406 483struct bench_timer *bench_createtimer(const char *config)
b64eb60f
MW
484{
485 struct timer *t = 0;
486 struct bench_timer *ret = 0;
13ee7406
MW
487 struct { const char *p; size_t sz; } tmconf[NTIMER] = { 0 };
488 const struct timer_ops *const *tt;
489 const char *p, *l; size_t n, nn;
490 unsigned i;
b64eb60f 491
13ee7406
MW
492 /* Parse the configuration string. */
493 if (config) {
494
495 /* The first thing to do is find the end of the string. */
496 l = config + strlen(config);
497
498 for (;;) {
499 /* Process the whitespace-sparated words of the string one by one. */
500
501 /* Skip over any initial whitespace. If we hit the end of the string
502 * then we're done.
503 */
504 for (;;)
505 if (config >= l) goto done_config;
506 else if (!ISSPACE(*config)) break;
507 else config++;
508
509 /* There's definitely a word here. Find the end of it. */
510 for (p = config; p < l && !ISSPACE(*p); p++);
511 nn = p - config;
512
513 /* Try various simple keywords. */
514#define MATCHP(lit) (nn == sizeof(lit) - 1 && MEMCMP(config, ==, lit, nn))
515
516 if (MATCHP("list")) {
517 /* The `list' keyword requests lists of the available timer
518 * implementations.
519 */
520
521 for (i = 0; i < NTIMER; i++) {
522 printf("%s timers:", timertab[i].what);
523 for (tt = timertab[i].opstab; *tt; tt++)
524 if (!((*tt)->f)&TF_SECRET) printf(" %s", (*tt)->name);
525 putchar('\n');
526 }
527 goto next_config;
528 }
529
530#undef MATCHP
531
532 /* Otherwise it's an assignment, setting a subtimer list. */
533 p = memchr(config, '=', nn);
534 if (!p)
535 n = nn;
536 else {
537 n = p - config;
538 for (i = 0; i < NTIMER; i++)
539 if (STRNCMP(config, ==, timertab[i].what, n) &&
540 !timertab[i].what[n]) {
541 if (tmconf[i].p)
542 debug("duplicate %s timer list", timertab[i].what);
543 tmconf[i].p = config + n + 1; tmconf[i].sz = nn - n - 1;
544 goto next_config;
545 }
546 }
547 debug("unrecognized config keyword `%.*s'", (int)n, config);
548
549 /* Move on to the next word. */
550 next_config:
551 config += nn;
552 }
553 done_config:;
554 }
555
556 /* Override these settings from the environment. */
557 for (i = 0; i < NTIMER; i++) {
558 p = getenv(timertab[i].env);
559 if (p) { tmconf[i].p = p; tmconf[i].sz = strlen(p); }
560 }
561
562 /* All seems well. Allocate the timer object. */
563 t = xmalloc(sizeof(*t));
564 for (i = 0; i < NTIMER; i++) t->ops[i] = 0;
565
566 /* Try to set up the subtimers. */
567 for (i = 0; i < NTIMER; i++)
568 if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end;
569
570 /* All is done. */
b64eb60f
MW
571 t->_t.ops = &timer_ops; ret = &t->_t; t = 0;
572end:
573 if (t) timer_destroy(&t->_t);
574 return (ret);
575}
576
67b5031e 577/*----- Benchmarking ------------------------------------------------------*/
b64eb60f 578
67b5031e
MW
579/* --- @bench_init@ --- *
580 *
581 * Arguments: @struct bench_state *b@ = bench state to initialize
13ee7406 582 * @struct bench_timer *tm@ = timer to attach, or null
67b5031e 583 *
13ee7406 584 * Returns: Zero on success, %$-1$% on failure.
67b5031e 585 *
13ee7406
MW
586 * Use: Initialize the benchmark state. On success, the timer state
587 * still needs to be calibrated (use @bench_calibrate@) before
588 * it can be used, but this will be done automatically by
589 * @bench_measure@ if it's not done by hand earlier. The timer
590 * is now owned by the benchmark state and will be destroyed by
591 * @bench_destroy@.
592 *
593 * The only reason for failure is if @tm@ was null on entry,
594 * and automatic construction of a timer failed. The state is
595 * safe to discard, but calling @bench_destroy@ is safe too.
67b5031e 596 */
b64eb60f 597
13ee7406
MW
598int bench_init(struct bench_state *b, struct bench_timer *tm)
599{
600 int rc;
601
602 b->tm = 0;
603
604 if (!tm) {
605 tm = bench_createtimer(0);
606 if (!tm) { rc = -1; goto end; }
607 }
608
609 b->tm = tm; b->target_s = 1.0; b->f = 0; rc = 0;
610end:
611 return (rc);
612}
b64eb60f 613
67b5031e
MW
614/* --- @bench_destroy@ --- *
615 *
616 * Arguments: @struct bench_state *b@ = bench state
617 *
618 * Returns: ---
619 *
620 * Use: Destroy the benchmark state, releasing the resources that it
621 * holds.
622 */
623
b64eb60f 624void bench_destroy(struct bench_state *b)
13ee7406 625 { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } }
b64eb60f 626
67b5031e
MW
627/* --- @do_nothing@ --- *
628 *
629 * Arguments: @unsigned long n@ = iteration count
630 * @void *ctx@ = context pointer (ignored)
631 *
632 * Returns: ---
633 *
634 * Use: Does nothing at all for @n@ iterations. Used to calibrate
635 * the benchmarking state.
636 */
637
638static void do_nothing(unsigned long n, void *ctx)
b64eb60f
MW
639 { while (n--) RELAX; }
640
67b5031e
MW
641/* --- @bench_calibrate@ --- *
642 *
643 * Arguments: @struct bench_state *b@ = bench state
644 *
13ee7406 645 * Returns: Zero on success, %$-1$% if calibration failed.
67b5031e
MW
646 *
647 * Use: Calibrate the benchmark state, so that it can be used to
648 * measure performance reasonably accurately.
649 */
650
13ee7406
MW
651#define T_CLB 0.0625 /* calibration time limit */
652
b64eb60f
MW
653int bench_calibrate(struct bench_state *b)
654{
655 struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT;
656 unsigned long n;
657 unsigned i;
658 struct bench_timer *tm = b->tm;
659 struct bench_time t0, t1;
660 struct bench_timing delta;
13ee7406 661 double r;
b64eb60f
MW
662 bench_fn *fn = LAUNDER(&do_nothing);
663 unsigned f = BTF_ANY;
664 int rc;
665
67b5031e
MW
666 /* The model here is that a timing loop has a fixed overhead as we enter
667 * and leave (e.g., to do with the indirect branch into the code), and
668 * per-iteration overheads as we check the counter and loop back. We aim
669 * to split these apart using linear regression.
670 */
671
672 /* If we've already calibrated then there's nothing to do. */
13ee7406 673 if (b->f&BTF_CLB) return (b->f&BTF_ANY ? 0 : -1);
b64eb60f 674
67b5031e 675 /* Exercise the inner loop a few times to educate the branch predictor. */
b64eb60f 676 for (i = 0; i < 10; i++)
67b5031e 677 { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); }
b64eb60f 678
67b5031e
MW
679 /* Now we measure idle loops until they take sufficiently long -- or we run
680 * out of counter.
681 */
b64eb60f
MW
682 debug("calibrating...");
683 n = 1;
684 for (;;) {
67b5031e
MW
685
686 /* Measure @n@ iterations of the idle loop. */
b64eb60f
MW
687 tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1);
688 timer_diff(&delta, &t0, &t1); f &= delta.f;
689 if (!(f&BTF_TIMEOK)) { rc = -1; goto end; }
67b5031e
MW
690
691 /* Register the timings with the regression machinery. */
b64eb60f
MW
692 linreg_update(&lr_clk, n, delta.t);
693 if (!(f&BTF_CYOK))
694 debug(" n = %10lu; t = %12g s", n, delta.t);
695 else {
696 linreg_update(&lr_cy, n, delta.cy);
697 debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy);
698 }
67b5031e
MW
699
700 /* If we're done then stop. */
13ee7406 701 if (delta.t >= T_CLB) break;
b64eb60f 702 if (n >= ULONG_MAX - n/3) break;
67b5031e
MW
703
704 /* Update the counter and continue. */
b64eb60f
MW
705 n += n/3 + 1;
706 }
707
67b5031e
MW
708 /* Now run the linear regression to extract the constant and per-iteration
709 * overheads.
710 */
13ee7406
MW
711 linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, &r);
712 debug("clock overhead = (%g n + %g) s (r = %g)", b->clk.m, b->clk.c, r);
b64eb60f 713 if (f&BTF_CYOK) {
13ee7406
MW
714 linreg_fit(&lr_cy, &b->cy.m, &b->cy.c, &r);
715 debug("cycle overhead = (%g n + %g) cy (r = %g)", b->cy.m, b->cy.c, r);
b64eb60f 716 }
67b5031e
MW
717
718 /* We're done. */
13ee7406 719 rc = 0;
b64eb60f 720end:
13ee7406 721 b->f |= f | BTF_CLB; /* no point trying again */
b64eb60f
MW
722 return (rc);
723}
724
67b5031e
MW
725/* --- @bench_measure@ --- *
726 *
8d372122
MW
727 * Arguments: @struct bench_state *b@ = benchmark state
728 * @struct bench_timing *t_out@ = where to leave the timing
67b5031e
MW
729 * @double base@ = number of internal units per call
730 * @bench_fn *fn@, @void *ctx@ = benchmark function to run
731 *
13ee7406 732 * Returns: Zero on success, %$-1$% if timing failed.
67b5031e
MW
733 *
734 * Use: Measure a function. The function @fn@ is called adaptively
735 * with an iteration count @n@ set so as to run for
736 * approximately @b->target_s@ seconds.
737 *
738 * The result is left in @*t_out@, with @t_out->n@ counting the
739 * final product of the iteration count and @base@ (which might,
740 * e.g., reflect the number of inner iterations the function
741 * performs, or the number of bytes it processes per iteration).
742 */
743
8d372122 744int bench_measure(struct bench_state *b, struct bench_timing *t_out,
67b5031e 745 double base, bench_fn *fn, void *ctx)
b64eb60f
MW
746{
747 struct bench_timer *tm = b->tm;
748 struct bench_time t0, t1;
c81c35df 749 unsigned long n, nn;
b64eb60f 750
8d372122
MW
751 /* Make sure the state is calibrated and usable. */
752 if (!(b->f&BTF_CLB) && bench_calibrate(b)) return (-1);
753 if (!(b->f&BTF_TIMEOK)) return (-1);
67b5031e 754
c81c35df
MW
755 /* Main adaptive measurement loop.
756 *
757 * Suppose the timer loop %$n$% iterations in %$t$% seconds. Our ideal
758 * time is %$T$% seconds. If %$t \ge T/\sqrt{2}$%, we're happy.
759 * Otherwise, we need to scale up the iteration count. The obvious next
760 * choice is %$n' = n T/t$%. Alas, rounding is a problem: if
761 * %$T/t < 1 + 1/n$% then %$\floor{n T/t} = n$% and we will make no
762 * progress. We know that %$T/t > \sqrt{2}%, so this can only happen when
763 * %$1 + 1/n > \sqrt{2}$%, i.e., when %$n < \sqrt{2} + 1$%. On the other
764 * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying
765 * again with %$n' = n + 1$% iterations will very likely work.
766 */
b64eb60f
MW
767 debug("measuring..."); n = 1;
768 for (;;) {
67b5031e 769 tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1);
b64eb60f
MW
770 timer_diff(t_out, &t0, &t1);
771 if (!(t_out->f&BTF_TIMEOK)) return (-1);
772 if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t);
773 else debug(" n = %10lu; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy);
c81c35df
MW
774 if (t_out->t >= 0.707*b->target_s) break;
775 nn = n*b->target_s/t_out->t;
776 if (nn > n) n = nn;
777 else n++;
b64eb60f 778 }
67b5031e
MW
779
780 /* Adjust according to the calibration. */
c7da785d
MW
781 t_out->t -= n*b->clk.m + b->clk.c;
782 if (t_out->f&BTF_CYOK) t_out->cy -= n*b->cy.m + b->cy.c;
67b5031e
MW
783
784 /* Report the results, if debugging. */
c7da785d
MW
785 if (!(t_out->f&BTF_CYOK)) debug(" adjusted t' = %12g", t_out->t);
786 else debug(" adjusted t = %12g, cy = %10.0f", t_out->t, t_out->cy);
b64eb60f
MW
787 if (!(t_out->f&BTF_CYOK))
788 debug(" %g s per op; %g ops/s", t_out->t/n, n/t_out->t);
789 else
790 debug(" %g s (%g cy) per op; %g ops/s",
791 t_out->t/n, t_out->cy/n, n/t_out->t);
67b5031e
MW
792
793 /* All done. */
e63124bc 794 t_out->n = n*base; return (0);
b64eb60f
MW
795}
796
797/*----- That's all, folks -------------------------------------------------*/