@@@ tvec doc wip
[mLib] / test / bench.c
CommitLineData
b64eb60f
MW
1/* -*-c-*-
2 *
3 * Benchmarking support
4 *
5 * (c) 2023 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of the mLib utilities library.
11 *
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
16 *
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include "config.h"
31
32#include <errno.h>
33#include <stdarg.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <time.h>
38
39#include "alloc.h"
40#include "bench.h"
41#include "bits.h"
42#include "linreg.h"
43#include "macros.h"
44
45/*----- Data structures ---------------------------------------------------*/
46
47struct timer {
48 struct bench_timer _t;
67b5031e
MW
49 const struct timer_ops *clkops, *cyops; /* time and cycle measurements */
50 union { int fd; } u_cy; /* state for cycle measurement */
b64eb60f
MW
51};
52
53struct timer_ops {
67b5031e
MW
54 void (*now)(struct bench_time *t_out, struct timer *t); /* read current */
55 void (*teardown)(struct timer *t); /* release held resources */
b64eb60f
MW
56};
57
58/*----- Preliminaries -----------------------------------------------------*/
59
60#define NS_PER_S 1000000000
61
67b5031e
MW
62/* --- @debug@ --- *
63 *
64 * Arguments: @const char *fmt@ = format control string
65 * @...@ = format arguemnts
66 *
67 * Returns: ---
68 *
69 * Use: Maybe report a debugging message to standard error.
70 */
71
31d0247c 72static PRINTF_LIKE(1, 2) void debug(const char *fmt, ...)
b64eb60f
MW
73{
74 const char *p;
75 va_list ap;
76
77 p = getenv("MLIB_BENCH_DEBUG");
78 if (p && *p != 'n' && *p != '0') {
79 va_start(ap, fmt);
80 fputs("mLib BENCH: ", stderr);
81 vfprintf(stderr, fmt, ap);
82 fputc('\n', stderr);
83 va_end(ap);
84 }
85}
86
67b5031e
MW
87/* --- @timer_diff@ --- *
88 *
89 * Arguments: @struct bench_timing *delta_out@ = where to putt the result
90 * @const struct bench_time *t0, *t1@ = two times captured by a
91 * timer's @now@ function
92 *
93 * Returns: ---
94 *
95 * Use: Calculates the difference between two captured times. The
96 * flags are set according to whether the differences are
97 * meaningful; @delta_out->n@ is left unset.
98 */
99
100static void timer_diff(struct bench_timing *delta_out,
101 const struct bench_time *t0,
102 const struct bench_time *t1)
103{
104 unsigned f = t0->f&t1->f;
105 kludge64 k;
106
107#ifdef HAVE_UINT64
108# define FLOATK64(k) ((double)(k).i)
109#else
110# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi)
111#endif
112
113 if (!(f&BTF_TIMEOK))
114 delta_out->t = 0.0;
115 else {
116 SUB64(k, t1->s, t0->s);
117 delta_out->t = FLOATK64(k) - 1 +
118 (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S;
119 }
120
121 if (!(f&BTF_CYOK))
122 delta_out->cy = 0.0;
123 else {
124 SUB64(k, t1->cy, t0->cy);
125 delta_out->cy = FLOATK64(k);
126 }
127
128 delta_out->f = f;
129
130#undef FLOATK64
131}
132
b64eb60f
MW
133/*----- The null clock ----------------------------------------------------*/
134
67b5031e
MW
135/* This is a cycle counter which does nothing, in case we don't have any
136 * better ideas.
137 */
138
b64eb60f
MW
139static void null_now(struct bench_time *t_out, struct timer *t) { ; }
140static void null_teardown(struct timer *t) { ; }
141static const struct timer_ops null_ops = { null_now, null_teardown };
142
143static int null_cyinit(struct timer *t)
144 { t->cyops = &null_ops; return (0); }
145
146#define NULL_CYENT { "null", null_cyinit },
147
148/*----- Linux performance counters ----------------------------------------*/
149
67b5031e
MW
150/* This is a cycle counter which uses the Linux performance event system,
151 * which is probably the best choice if it's available.
152 */
153
b64eb60f
MW
154#if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
155
156#include <sys/types.h>
157#include <unistd.h>
158
159#include <linux/perf_event.h>
160#include <asm/unistd.h>
161
162static void perfevent_now(struct bench_time *t_out, struct timer *t)
163{
164 ssize_t n;
165
166 n = read(t->u_cy.fd, &t_out->cy.i, sizeof(t_out->cy.i));
167 if (n != sizeof(t_out->cy.i)) {
168 debug("failed to read perf-event counter: %s", strerror(errno));
169 return;
170 }
171 t_out->f |= BTF_CYOK;
172}
173
174static void perfevent_teardown(struct timer *t)
175 { close(t->u_cy.fd); }
176
177static const struct timer_ops perfevent_ops =
178 { perfevent_now, perfevent_teardown };
179
180static int perfevent_init(struct timer *t)
181{
182 struct perf_event_attr attr = { 0 };
183 struct bench_time tm;
184
185 attr.type = PERF_TYPE_HARDWARE;
186 attr.size = sizeof(attr);
187 attr.config = PERF_COUNT_HW_CPU_CYCLES;
188 attr.disabled = 0;
189 attr.exclude_kernel = 1;
190 attr.exclude_hv = 1;
191
192 t->u_cy.fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
193 if (t->u_cy.fd < 0) {
194 debug("couldn't open perf evvent: %s", strerror(errno));
195 return (-1);
196 }
197
198 tm.f = 0; perfevent_now(&tm, t);
199 if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); }
200
201 t->cyops = &perfevent_ops; return (0);
202}
203# define PERFEVENT_CYENT { "linux-perf-event", perfevent_init },
204#else
205# define PERFEVENT_CYENT
206#endif
207
208/*----- Intel time-stamp counter ------------------------------------------*/
209
67b5031e
MW
210/* This is a cycle counter based on the Intel `rdtsc' instruction. It's not
211 * really suitable for performance measurement because it gets confused by
212 * CPU frequency adjustments.
213 */
214
b64eb60f
MW
215#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
216
217#define EFLAGS_ID (1u << 21)
218#define CPUID_1D_TSC (1u << 4)
219
220static uint32 set_flags(unsigned long m, unsigned long x)
221{
222 unsigned long r;
223
224#ifdef __x86_64__
225# define TMP "%%rcx"
226#else
227# define TMP "%%ecx"
228#endif
229
230 __asm__ ("pushf\n\t"
231 "pop %0\n\t"
232 "mov %0, " TMP "\n\t"
233 "and %1, %0\n\t"
234 "xor %2, %0\n\t"
235 "push %0\n\t"
236 "popf\n\t"
237 "pushf\n\t"
238 "pop %0\n\t"
239 "push " TMP "\n\t"
240 "popf"
241 : "=r"(r)
242 : "g"(m), "g"(x)
243 : "ecx");
244 return (r);
245}
246
247struct cpuid { uint32 a, b, c, d; };
248
249static void cpuid(struct cpuid *info_out, uint32 a, uint32 c)
250{
251 __asm__ ("movl %1, %%eax\n\t"
252 "movl %2, %%ecx\n\t"
253 "cpuid\n\t"
254 "movl %%eax, 0(%0)\n\t"
255 "movl %%ebx, 4(%0)\n\t"
256 "movl %%ecx, 8(%0)\n\t"
257 "movl %%edx, 12(%0)\n\t"
258 : /* no outputs */
259 : "r"(info_out), "g"(a), "g"(c)
260 : "eax", "ebx", "ecx", "edx", "cc");
261}
262
263static void x86rdtsc_now(struct bench_time *t_out, struct timer *t)
264{
265 uint32 lo, hi;
266
267 __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
268 SET64(t_out->cy, hi, lo); t_out->f |= BTF_CYOK;
269}
270
271static const struct timer_ops x86rdtsc_ops =
272 { x86rdtsc_now, null_teardown };
273
274static int x86rdtsc_init(struct timer *t)
275{
276 struct cpuid info;
277
278 if ((set_flags(~EFLAGS_ID, 0)&EFLAGS_ID) ||
279 !(set_flags(~EFLAGS_ID, EFLAGS_ID)&EFLAGS_ID))
280 { debug("no `cpuid' instruction"); return (-1); }
281 cpuid(&info, 0, 0);
282 if (info.a < 1) { debug("no `cpuid' leaf 1"); return (-1); }
283 cpuid(&info, 1, 0);
284 if (!(info.d&CPUID_1D_TSC))
285 { debug("no `rdtsc' instrunction"); return (-1); }
286 t->cyops = &x86rdtsc_ops; return (0);
287}
288
289# define X86RDTSC_CYENT { "x86-rdtsc", x86rdtsc_init },
290#else
291# define X86RDTWC_CYENT
292#endif
293
294/*----- POSIX `clock_gettime' ---------------------------------------------*/
295
67b5031e
MW
296/* This is a real-time clock based on the POSIX time interface, with up to
297 * nanosecond precision.
298 */
299
b64eb60f
MW
300#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
301
302static void gettime_now(struct bench_time *t_out, struct timer *t)
303{
304 struct timespec now;
305
306 if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now))
307 { debug("error reading POSIX clock: %s", strerror(errno)); return; }
308 ASSIGN64(t_out->s, now.tv_sec); t_out->ns = now.tv_nsec;
309 t_out->f |= BTF_TIMEOK;
310}
311
312static const struct timer_ops gettime_ops = { gettime_now, null_teardown };
313
314static int gettime_init(struct timer *t)
315{
316 struct bench_time tm;
317
318 tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
319 t->clkops = &gettime_ops; return (0);
320}
321
322# define GETTIME_CLKENT { "posix-clock_gettime", gettime_init },
323#else
324# define GETTIME_CLKENT
325#endif
326
327/*----- Standard C `clock' ------------------------------------------------*/
328
67b5031e
MW
329/* This is a real-time clock based on the C `clock' function which is
330 * guaranteed to be available, though it's not likely to be very good.
331 */
332
b64eb60f
MW
333static void clock_now(struct bench_time *t_out, struct timer *t)
334{
335 clock_t now, x;
336 unsigned long s; uint32 ns;
337
338 now = clock();
339 if (now == (clock_t)-1) {
340 debug("error reading standard clock: %s", strerror(errno));
341 return;
342 }
343 x = now/CLOCKS_PER_SEC;
344 if (x > ULONG_MAX) { debug("standard clock out of range"); return; }
345
346 s = x; x = now - CLOCKS_PER_SEC*s;
347 if (!(NS_PER_S%CLOCKS_PER_SEC))
348 ns = x*(NS_PER_S/CLOCKS_PER_SEC);
349 else if (NS_PER_S <= ULONG_MAX/CLOCKS_PER_SEC)
350 ns = (x*NS_PER_S)/CLOCKS_PER_SEC;
351 else
352 ns = x*((NS_PER_S + 0.0)/CLOCKS_PER_SEC);
353 ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK;
354}
355
356static const struct timer_ops clock_ops = { clock_now, null_teardown };
357
358static int clock_init(struct timer *t)
359{
360 struct bench_time tm;
361
362 tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1);
363 t->clkops = &clock_ops; return (0);
364}
365
366#define CLOCK_CLKENT { "clock", clock_init },
367
368/*----- Timing setup ------------------------------------------------------*/
369
67b5031e 370/* Tables of timing sources. */
b64eb60f
MW
371static const struct timerent {
372 const char *name;
373 int (*init)(struct timer */*t*/);
374}
375 clktab[] = { GETTIME_CLKENT CLOCK_CLKENT { 0, 0 } },
376 cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_CYENT { 0, 0 } };
377
67b5031e
MW
378/* --- @find_timer@ --- *
379 *
380 * Arguments: @const char *name@ = timer name
381 * @size_t sz@ = length of name
382 * @const struct timerent *timers@ = table to search
383 * @const char *what@ = adjective describing table
384 *
385 * Returns: The table entry matching the given name, or null if there
386 * isn't one.
387 */
388
b64eb60f
MW
389static const struct timerent *find_timer_n(const char *name, size_t sz,
390 const struct timerent *timers,
391 const char *what)
392{
393 while (timers->name) {
394 if (strlen(timers->name) == sz && MEMCMP(name, ==, timers->name, sz))
395 return (timers);
396 timers++;
397 }
398 debug("%s timer `%.*s' not found", what, (int)sz, name); return (0);
399}
400
67b5031e
MW
401/* --- @try_timer@ --- *
402 *
403 * Arguments: @struct timer *t@ = timer structure
404 * @const struct timerent *timer@ = timer table entry
405 * @const char *what@ = adjective describing table
406 *
407 * Returns: Zero on success, @-1@ if timer failed.
408 *
409 * Use: Tries to initialize the timer @t@, reporting a debug message
410 * if it worked.
411 */
412
b64eb60f
MW
413static int try_timer(struct timer *t,
414 const struct timerent *timer, const char *what)
415{
416 if (timer->init(t)) return (-1);
417 debug("selected %s timer `%s'", what, timer->name); return (0);
418}
419
67b5031e
MW
420/* --- @select_timer@ --- *
421 *
422 * Arguments: @struct timer *t@ = timer structure
423 * @const struct timerent *timer@ = timer table
424 * @const char *varname@ = environment variable to consult
425 * @const char *what@ = adjective describing table
426 *
427 * Returns: Zero on success, @-1@ if timer failed.
428 *
429 * Use: Select a timer from the table. If the environment variable
430 * is set, then parse a comma-separated list of timer names and
431 * use the first one listed that seems to work; otherwise, try
432 * the timers in the table in order.
433 */
434
b64eb60f
MW
435static int select_timer(struct timer *t, const struct timerent *timers,
436 const char *varname, const char *what)
437{
438 const char *p; size_t n;
439 const struct timerent *timer;
440
441 p = getenv(varname);
442 if (!p) {
443 while (timers->name)
444 if (!try_timer(t, timers++, what)) return (0);
445 } else {
446 for (;;) {
447 n = strcspn(p, ",");
448 timer = find_timer_n(p, n, timers, what);
449 if (timer && !try_timer(t, timer, what)) return (0);
450 if (!p[n]) break;
451 p += n + 1;
452 }
453 }
454 debug("no suitable %s timer found", what); return (-1);
455}
456
67b5031e 457/* Bench timer operations. */
b64eb60f
MW
458static void timer_now(struct bench_timer *tm, struct bench_time *t_out)
459{
460 struct timer *t = (struct timer *)tm;
461
462 t->clkops->now(t_out, t);
463 t->cyops->now(t_out, t);
464}
b64eb60f
MW
465static void timer_destroy(struct bench_timer *tm)
466{
467 struct timer *t = (struct timer *)tm;
468
469 if (!t) return;
470 if (t->clkops) t->clkops->teardown(t);
471 if (t->cyops) t->cyops->teardown(t);
472 xfree(t);
473}
474
475static const struct bench_timerops timer_ops = { timer_now, timer_destroy };
476
67b5031e
MW
477/* --- @bench_createtimer@ --- *
478 *
479 * Arguments: ---
480 *
481 * Returns: A freshly constructed standard timer object.
482 *
483 * Use: Allocate a timer. Dispose of it by calling
484 * @tm->ops->destroy(tm)@ when you're done.
485 */
486
b64eb60f
MW
487struct bench_timer *bench_createtimer(void)
488{
489 struct timer *t = 0;
490 struct bench_timer *ret = 0;
491
492 t = xmalloc(sizeof(*t)); t->cyops = 0; t->clkops = 0;
493 if (select_timer(t, clktab, "MLIB_BENCH_CLKTIMER", "clock")) goto end;
494 if (select_timer(t, cytab, "MLIB_BENCH_CYCLETIMER", "cycle")) goto end;
495 t->_t.ops = &timer_ops; ret = &t->_t; t = 0;
496end:
497 if (t) timer_destroy(&t->_t);
498 return (ret);
499}
500
67b5031e 501/*----- Benchmarking ------------------------------------------------------*/
b64eb60f 502
67b5031e
MW
503/* --- @bench_init@ --- *
504 *
505 * Arguments: @struct bench_state *b@ = bench state to initialize
506 * @struct bench_timer *tm@ = timer to attach
507 *
508 * Returns: ---
509 *
510 * Use: Initialize the benchmark state. It still needs to be
511 * calibrated (use @bench_calibrate@) before it can be used, but
512 * this will be done automatically by @bench_measure@ if it's
513 * not done by hand earlier. The timer is now owned by the
514 * benchmark state and will be destroyed by @bench_destroy@.
515 */
b64eb60f
MW
516
517void bench_init(struct bench_state *b, struct bench_timer *tm)
518 { b->tm = tm; b->target_s = 1.0; b->f = 0; }
519
67b5031e
MW
520/* --- @bench_destroy@ --- *
521 *
522 * Arguments: @struct bench_state *b@ = bench state
523 *
524 * Returns: ---
525 *
526 * Use: Destroy the benchmark state, releasing the resources that it
527 * holds.
528 */
529
b64eb60f
MW
530void bench_destroy(struct bench_state *b)
531 { b->tm->ops->destroy(b->tm); }
532
67b5031e
MW
533/* --- @do_nothing@ --- *
534 *
535 * Arguments: @unsigned long n@ = iteration count
536 * @void *ctx@ = context pointer (ignored)
537 *
538 * Returns: ---
539 *
540 * Use: Does nothing at all for @n@ iterations. Used to calibrate
541 * the benchmarking state.
542 */
543
544static void do_nothing(unsigned long n, void *ctx)
b64eb60f
MW
545 { while (n--) RELAX; }
546
67b5031e
MW
547/* --- @bench_calibrate@ --- *
548 *
549 * Arguments: @struct bench_state *b@ = bench state
550 *
551 * Returns: Zero on success, @-1@ if calibration failed.
552 *
553 * Use: Calibrate the benchmark state, so that it can be used to
554 * measure performance reasonably accurately.
555 */
556
b64eb60f
MW
557int bench_calibrate(struct bench_state *b)
558{
559 struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT;
560 unsigned long n;
561 unsigned i;
562 struct bench_timer *tm = b->tm;
563 struct bench_time t0, t1;
564 struct bench_timing delta;
565 bench_fn *fn = LAUNDER(&do_nothing);
566 unsigned f = BTF_ANY;
567 int rc;
568
67b5031e
MW
569 /* The model here is that a timing loop has a fixed overhead as we enter
570 * and leave (e.g., to do with the indirect branch into the code), and
571 * per-iteration overheads as we check the counter and loop back. We aim
572 * to split these apart using linear regression.
573 */
574
575 /* If we've already calibrated then there's nothing to do. */
b64eb60f
MW
576 if (b->f&BTF_ANY) return (0);
577
67b5031e 578 /* Exercise the inner loop a few times to educate the branch predictor. */
b64eb60f 579 for (i = 0; i < 10; i++)
67b5031e 580 { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); }
b64eb60f 581
67b5031e
MW
582 /* Now we measure idle loops until they take sufficiently long -- or we run
583 * out of counter.
584 */
b64eb60f
MW
585 debug("calibrating...");
586 n = 1;
587 for (;;) {
67b5031e
MW
588
589 /* Measure @n@ iterations of the idle loop. */
b64eb60f
MW
590 tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1);
591 timer_diff(&delta, &t0, &t1); f &= delta.f;
592 if (!(f&BTF_TIMEOK)) { rc = -1; goto end; }
67b5031e
MW
593
594 /* Register the timings with the regression machinery. */
b64eb60f
MW
595 linreg_update(&lr_clk, n, delta.t);
596 if (!(f&BTF_CYOK))
597 debug(" n = %10lu; t = %12g s", n, delta.t);
598 else {
599 linreg_update(&lr_cy, n, delta.cy);
600 debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy);
601 }
67b5031e
MW
602
603 /* If we're done then stop. */
b64eb60f
MW
604 if (delta.t >= b->target_s/20.0) break;
605 if (n >= ULONG_MAX - n/3) break;
67b5031e
MW
606
607 /* Update the counter and continue. */
b64eb60f
MW
608 n += n/3 + 1;
609 }
610
67b5031e
MW
611 /* Now run the linear regression to extract the constant and per-iteration
612 * overheads.
613 */
b64eb60f
MW
614 linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0);
615 debug("clock overhead = (%g n + %g) s", b->clk.m, b->clk.c);
616 if (f&BTF_CYOK) {
617 linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, 0);
618 debug("cycle overhead = (%g n + %g) cy", b->cy.m, b->cy.c);
619 }
67b5031e
MW
620
621 /* We're done. */
b64eb60f
MW
622 b->f |= f; rc = 0;
623end:
624 return (rc);
625}
626
67b5031e
MW
627/* --- @bench_measure@ --- *
628 *
629 * Arguments: @struct bench_timing *t_out@ = where to leave the timing
630 * @struct bench_state *b@ = benchmark state
631 * @double base@ = number of internal units per call
632 * @bench_fn *fn@, @void *ctx@ = benchmark function to run
633 *
634 * Returns: Zero on success, @-1@ if timing failed.
635 *
636 * Use: Measure a function. The function @fn@ is called adaptively
637 * with an iteration count @n@ set so as to run for
638 * approximately @b->target_s@ seconds.
639 *
640 * The result is left in @*t_out@, with @t_out->n@ counting the
641 * final product of the iteration count and @base@ (which might,
642 * e.g., reflect the number of inner iterations the function
643 * performs, or the number of bytes it processes per iteration).
644 */
645
b64eb60f 646int bench_measure(struct bench_timing *t_out, struct bench_state *b,
67b5031e 647 double base, bench_fn *fn, void *ctx)
b64eb60f
MW
648{
649 struct bench_timer *tm = b->tm;
650 struct bench_time t0, t1;
c81c35df 651 unsigned long n, nn;
b64eb60f 652
67b5031e 653 /* Make sure the state is calibrated. */
b64eb60f 654 if (bench_calibrate(b)) return (-1);
67b5031e 655
c81c35df
MW
656 /* Main adaptive measurement loop.
657 *
658 * Suppose the timer loop %$n$% iterations in %$t$% seconds. Our ideal
659 * time is %$T$% seconds. If %$t \ge T/\sqrt{2}$%, we're happy.
660 * Otherwise, we need to scale up the iteration count. The obvious next
661 * choice is %$n' = n T/t$%. Alas, rounding is a problem: if
662 * %$T/t < 1 + 1/n$% then %$\floor{n T/t} = n$% and we will make no
663 * progress. We know that %$T/t > \sqrt{2}%, so this can only happen when
664 * %$1 + 1/n > \sqrt{2}$%, i.e., when %$n < \sqrt{2} + 1$%. On the other
665 * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying
666 * again with %$n' = n + 1$% iterations will very likely work.
667 */
b64eb60f
MW
668 debug("measuring..."); n = 1;
669 for (;;) {
67b5031e 670 tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1);
b64eb60f
MW
671 timer_diff(t_out, &t0, &t1);
672 if (!(t_out->f&BTF_TIMEOK)) return (-1);
673 if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t);
674 else debug(" n = %10lu; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy);
c81c35df
MW
675 if (t_out->t >= 0.707*b->target_s) break;
676 nn = n*b->target_s/t_out->t;
677 if (nn > n) n = nn;
678 else n++;
b64eb60f 679 }
67b5031e
MW
680
681 /* Adjust according to the calibration. */
c7da785d
MW
682 t_out->t -= n*b->clk.m + b->clk.c;
683 if (t_out->f&BTF_CYOK) t_out->cy -= n*b->cy.m + b->cy.c;
67b5031e
MW
684
685 /* Report the results, if debugging. */
c7da785d
MW
686 if (!(t_out->f&BTF_CYOK)) debug(" adjusted t' = %12g", t_out->t);
687 else debug(" adjusted t = %12g, cy = %10.0f", t_out->t, t_out->cy);
b64eb60f
MW
688 if (!(t_out->f&BTF_CYOK))
689 debug(" %g s per op; %g ops/s", t_out->t/n, n/t_out->t);
690 else
691 debug(" %g s (%g cy) per op; %g ops/s",
692 t_out->t/n, t_out->cy/n, n/t_out->t);
67b5031e
MW
693
694 /* All done. */
e63124bc 695 t_out->n = n*base; return (0);
b64eb60f
MW
696}
697
698/*----- That's all, folks -------------------------------------------------*/