Commit | Line | Data |
---|---|---|
b64eb60f MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Benchmarking support | |
4 | * | |
5 | * (c) 2023 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of the mLib utilities library. | |
11 | * | |
12 | * mLib is free software: you can redistribute it and/or modify it under | |
13 | * the terms of the GNU Library General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or (at | |
15 | * your option) any later version. | |
16 | * | |
17 | * mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | * License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU Library General Public | |
23 | * License along with mLib. If not, write to the Free Software | |
24 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | * USA. | |
26 | */ | |
27 | ||
28 | /*----- Header files ------------------------------------------------------*/ | |
29 | ||
30 | #include "config.h" | |
31 | ||
13ee7406 | 32 | #include <ctype.h> |
b64eb60f MW |
33 | #include <errno.h> |
34 | #include <stdarg.h> | |
35 | #include <stdio.h> | |
36 | #include <stdlib.h> | |
37 | #include <string.h> | |
38 | #include <time.h> | |
39 | ||
40 | #include "alloc.h" | |
41 | #include "bench.h" | |
42 | #include "bits.h" | |
13ee7406 | 43 | #include "dstr.h" |
b64eb60f MW |
44 | #include "linreg.h" |
45 | #include "macros.h" | |
46 | ||
47 | /*----- Data structures ---------------------------------------------------*/ | |
48 | ||
13ee7406 MW |
49 | enum { CLK, CY, NTIMER }; |
50 | ||
b64eb60f MW |
51 | struct timer { |
52 | struct bench_timer _t; | |
13ee7406 | 53 | const struct timer_ops *ops[NTIMER]; /* subtimers for clock and cycles */ |
67b5031e | 54 | union { int fd; } u_cy; /* state for cycle measurement */ |
b64eb60f MW |
55 | }; |
56 | ||
57 | struct timer_ops { | |
13ee7406 MW |
58 | const char *name; /* timer name */ |
59 | unsigned f; /* flags */ | |
60 | #define TF_SECRET 1u /* don't try this automatically */ | |
61 | int (*init)(struct timer */*t*/); /* initialization function */ | |
67b5031e MW |
62 | void (*now)(struct bench_time *t_out, struct timer *t); /* read current */ |
63 | void (*teardown)(struct timer *t); /* release held resources */ | |
b64eb60f MW |
64 | }; |
65 | ||
66 | /*----- Preliminaries -----------------------------------------------------*/ | |
67 | ||
68 | #define NS_PER_S 1000000000 | |
69 | ||
67b5031e MW |
70 | /* --- @debug@ --- * |
71 | * | |
72 | * Arguments: @const char *fmt@ = format control string | |
73 | * @...@ = format arguemnts | |
74 | * | |
75 | * Returns: --- | |
76 | * | |
77 | * Use: Maybe report a debugging message to standard error. | |
78 | */ | |
79 | ||
31d0247c | 80 | static PRINTF_LIKE(1, 2) void debug(const char *fmt, ...) |
b64eb60f MW |
81 | { |
82 | const char *p; | |
83 | va_list ap; | |
84 | ||
85 | p = getenv("MLIB_BENCH_DEBUG"); | |
86 | if (p && *p != 'n' && *p != '0') { | |
87 | va_start(ap, fmt); | |
88 | fputs("mLib BENCH: ", stderr); | |
89 | vfprintf(stderr, fmt, ap); | |
90 | fputc('\n', stderr); | |
91 | va_end(ap); | |
92 | } | |
93 | } | |
94 | ||
67b5031e MW |
95 | /* --- @timer_diff@ --- * |
96 | * | |
97 | * Arguments: @struct bench_timing *delta_out@ = where to putt the result | |
98 | * @const struct bench_time *t0, *t1@ = two times captured by a | |
99 | * timer's @now@ function | |
100 | * | |
101 | * Returns: --- | |
102 | * | |
103 | * Use: Calculates the difference between two captured times. The | |
104 | * flags are set according to whether the differences are | |
105 | * meaningful; @delta_out->n@ is left unset. | |
106 | */ | |
107 | ||
108 | static void timer_diff(struct bench_timing *delta_out, | |
109 | const struct bench_time *t0, | |
110 | const struct bench_time *t1) | |
111 | { | |
112 | unsigned f = t0->f&t1->f; | |
113 | kludge64 k; | |
114 | ||
115 | #ifdef HAVE_UINT64 | |
116 | # define FLOATK64(k) ((double)(k).i) | |
117 | #else | |
118 | # define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi) | |
119 | #endif | |
120 | ||
121 | if (!(f&BTF_TIMEOK)) | |
122 | delta_out->t = 0.0; | |
123 | else { | |
124 | SUB64(k, t1->s, t0->s); | |
125 | delta_out->t = FLOATK64(k) - 1 + | |
126 | (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S; | |
127 | } | |
128 | ||
129 | if (!(f&BTF_CYOK)) | |
130 | delta_out->cy = 0.0; | |
131 | else { | |
132 | SUB64(k, t1->cy, t0->cy); | |
133 | delta_out->cy = FLOATK64(k); | |
134 | } | |
135 | ||
136 | delta_out->f = f; | |
137 | ||
138 | #undef FLOATK64 | |
139 | } | |
140 | ||
13ee7406 | 141 | /*----- The null timer ----------------------------------------------------*/ |
b64eb60f | 142 | |
13ee7406 MW |
143 | /* This is a timer which does nothing, in case we don't have any better |
144 | * ideas. | |
67b5031e MW |
145 | */ |
146 | ||
13ee7406 | 147 | static int null_init(struct timer *t) { return (0); } |
b64eb60f MW |
148 | static void null_now(struct bench_time *t_out, struct timer *t) { ; } |
149 | static void null_teardown(struct timer *t) { ; } | |
b64eb60f | 150 | |
13ee7406 MW |
151 | static const struct timer_ops null_ops = |
152 | { "null", 0, null_init, null_now, null_teardown }; | |
153 | #define NULL_ENT &null_ops, | |
154 | ||
155 | /*----- The broken clock --------------------------------------------------*/ | |
156 | ||
157 | /* This is a cycle counter which does nothing, in case we don't have any | |
158 | * better ideas. | |
159 | */ | |
b64eb60f | 160 | |
13ee7406 MW |
161 | static int broken_init(struct timer *t) { return (-1); } |
162 | ||
163 | static const struct timer_ops broken_ops = | |
164 | { "broken", TF_SECRET, broken_init, null_now, null_teardown }; | |
165 | #define BROKEN_ENT &broken_ops, | |
b64eb60f MW |
166 | |
167 | /*----- Linux performance counters ----------------------------------------*/ | |
168 | ||
67b5031e MW |
169 | /* This is a cycle counter which uses the Linux performance event system, |
170 | * which is probably the best choice if it's available. | |
171 | */ | |
172 | ||
b64eb60f MW |
173 | #if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64) |
174 | ||
175 | #include <sys/types.h> | |
176 | #include <unistd.h> | |
177 | ||
178 | #include <linux/perf_event.h> | |
179 | #include <asm/unistd.h> | |
180 | ||
181 | static void perfevent_now(struct bench_time *t_out, struct timer *t) | |
182 | { | |
183 | ssize_t n; | |
184 | ||
185 | n = read(t->u_cy.fd, &t_out->cy.i, sizeof(t_out->cy.i)); | |
186 | if (n != sizeof(t_out->cy.i)) { | |
187 | debug("failed to read perf-event counter: %s", strerror(errno)); | |
188 | return; | |
189 | } | |
190 | t_out->f |= BTF_CYOK; | |
191 | } | |
192 | ||
193 | static void perfevent_teardown(struct timer *t) | |
194 | { close(t->u_cy.fd); } | |
195 | ||
b64eb60f MW |
196 | static int perfevent_init(struct timer *t) |
197 | { | |
198 | struct perf_event_attr attr = { 0 }; | |
199 | struct bench_time tm; | |
200 | ||
201 | attr.type = PERF_TYPE_HARDWARE; | |
202 | attr.size = sizeof(attr); | |
203 | attr.config = PERF_COUNT_HW_CPU_CYCLES; | |
204 | attr.disabled = 0; | |
205 | attr.exclude_kernel = 1; | |
206 | attr.exclude_hv = 1; | |
207 | ||
208 | t->u_cy.fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); | |
209 | if (t->u_cy.fd < 0) { | |
210 | debug("couldn't open perf evvent: %s", strerror(errno)); | |
211 | return (-1); | |
212 | } | |
213 | ||
214 | tm.f = 0; perfevent_now(&tm, t); | |
215 | if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); } | |
216 | ||
13ee7406 | 217 | return (0); |
b64eb60f | 218 | } |
13ee7406 MW |
219 | |
220 | static const struct timer_ops perfevent_ops = | |
221 | { "linux-perf-hw-cycles", 0, | |
222 | perfevent_init, perfevent_now, perfevent_teardown }; | |
223 | ||
224 | # define PERFEVENT_CYENT &perfevent_ops, | |
b64eb60f MW |
225 | #else |
226 | # define PERFEVENT_CYENT | |
227 | #endif | |
228 | ||
229 | /*----- Intel time-stamp counter ------------------------------------------*/ | |
230 | ||
67b5031e MW |
231 | /* This is a cycle counter based on the Intel `rdtsc' instruction. It's not |
232 | * really suitable for performance measurement because it gets confused by | |
233 | * CPU frequency adjustments. | |
234 | */ | |
235 | ||
b64eb60f MW |
236 | #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) |
237 | ||
13ee7406 | 238 | #include <cpuid.h> |
b64eb60f | 239 | |
13ee7406 | 240 | #define CPUID_1D_TSC (1u << 4) |
b64eb60f MW |
241 | |
242 | static void x86rdtsc_now(struct bench_time *t_out, struct timer *t) | |
13ee7406 | 243 | { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; } |
b64eb60f MW |
244 | |
245 | static int x86rdtsc_init(struct timer *t) | |
246 | { | |
13ee7406 MW |
247 | unsigned a, b, c, d; |
248 | ||
249 | if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC)) | |
b64eb60f | 250 | { debug("no `rdtsc' instrunction"); return (-1); } |
13ee7406 | 251 | return (0); |
b64eb60f MW |
252 | } |
253 | ||
13ee7406 MW |
254 | static const struct timer_ops x86rdtsc_ops = |
255 | { "x86-rdtsc", 0, x86rdtsc_init, x86rdtsc_now, null_teardown }; | |
256 | ||
257 | # define X86RDTSC_CYENT &x86rdtsc_ops, | |
b64eb60f | 258 | #else |
13ee7406 | 259 | # define X86RDTSC_CYENT |
b64eb60f MW |
260 | #endif |
261 | ||
262 | /*----- POSIX `clock_gettime' ---------------------------------------------*/ | |
263 | ||
67b5031e MW |
264 | /* This is a real-time clock based on the POSIX time interface, with up to |
265 | * nanosecond precision. | |
266 | */ | |
267 | ||
b64eb60f MW |
268 | #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) |
269 | ||
270 | static void gettime_now(struct bench_time *t_out, struct timer *t) | |
271 | { | |
272 | struct timespec now; | |
273 | ||
274 | if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now)) | |
275 | { debug("error reading POSIX clock: %s", strerror(errno)); return; } | |
276 | ASSIGN64(t_out->s, now.tv_sec); t_out->ns = now.tv_nsec; | |
277 | t_out->f |= BTF_TIMEOK; | |
278 | } | |
279 | ||
b64eb60f MW |
280 | static int gettime_init(struct timer *t) |
281 | { | |
282 | struct bench_time tm; | |
283 | ||
284 | tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1); | |
13ee7406 | 285 | return (0); |
b64eb60f MW |
286 | } |
287 | ||
13ee7406 MW |
288 | static const struct timer_ops gettime_ops = |
289 | { "posix-thread-cputime", 0, gettime_init, gettime_now, null_teardown }; | |
290 | ||
291 | # define GETTIME_CLKENT &gettime_ops, | |
b64eb60f MW |
292 | #else |
293 | # define GETTIME_CLKENT | |
294 | #endif | |
295 | ||
296 | /*----- Standard C `clock' ------------------------------------------------*/ | |
297 | ||
67b5031e MW |
298 | /* This is a real-time clock based on the C `clock' function which is |
299 | * guaranteed to be available, though it's not likely to be very good. | |
300 | */ | |
301 | ||
b64eb60f MW |
302 | static void clock_now(struct bench_time *t_out, struct timer *t) |
303 | { | |
304 | clock_t now, x; | |
305 | unsigned long s; uint32 ns; | |
306 | ||
307 | now = clock(); | |
308 | if (now == (clock_t)-1) { | |
309 | debug("error reading standard clock: %s", strerror(errno)); | |
310 | return; | |
311 | } | |
312 | x = now/CLOCKS_PER_SEC; | |
313 | if (x > ULONG_MAX) { debug("standard clock out of range"); return; } | |
314 | ||
315 | s = x; x = now - CLOCKS_PER_SEC*s; | |
316 | if (!(NS_PER_S%CLOCKS_PER_SEC)) | |
317 | ns = x*(NS_PER_S/CLOCKS_PER_SEC); | |
318 | else if (NS_PER_S <= ULONG_MAX/CLOCKS_PER_SEC) | |
319 | ns = (x*NS_PER_S)/CLOCKS_PER_SEC; | |
320 | else | |
321 | ns = x*((NS_PER_S + 0.0)/CLOCKS_PER_SEC); | |
322 | ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK; | |
323 | } | |
324 | ||
b64eb60f MW |
325 | static int clock_init(struct timer *t) |
326 | { | |
327 | struct bench_time tm; | |
328 | ||
329 | tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1); | |
13ee7406 | 330 | return (0); |
b64eb60f MW |
331 | } |
332 | ||
13ee7406 MW |
333 | static const struct timer_ops clock_ops = |
334 | { "stdc-clock", 0, clock_init, clock_now, null_teardown }; | |
335 | ||
336 | #define CLOCK_CLKENT &clock_ops, | |
b64eb60f MW |
337 | |
338 | /*----- Timing setup ------------------------------------------------------*/ | |
339 | ||
67b5031e | 340 | /* Tables of timing sources. */ |
13ee7406 MW |
341 | static const struct timer_ops |
342 | *const clktab[] = { GETTIME_CLKENT CLOCK_CLKENT BROKEN_ENT 0 }, | |
343 | *const cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_ENT BROKEN_ENT 0 }; | |
344 | ||
345 | static const struct timertab { | |
346 | const char *what; | |
347 | const char *env; | |
348 | const struct timer_ops *const *opstab; | |
349 | } timertab[] = { | |
350 | { "clock", "MLIB_BENCH_CLKTIMER", clktab }, | |
351 | { "cycle", "MLIB_BENCH_CYCLETIMER", cytab } | |
352 | }; | |
b64eb60f | 353 | |
67b5031e MW |
354 | /* --- @find_timer@ --- * |
355 | * | |
356 | * Arguments: @const char *name@ = timer name | |
357 | * @size_t sz@ = length of name | |
13ee7406 | 358 | * @unsigned tm@ = which subtimer we're looking for |
67b5031e MW |
359 | * |
360 | * Returns: The table entry matching the given name, or null if there | |
361 | * isn't one. | |
362 | */ | |
363 | ||
13ee7406 MW |
364 | static const struct timer_ops *find_timer(const char *name, size_t sz, |
365 | unsigned tm) | |
b64eb60f | 366 | { |
13ee7406 MW |
367 | const struct timer_ops *const *tt; |
368 | ||
369 | for (tt = timertab[tm].opstab; *tt; tt++) { | |
370 | if (strlen((*tt)->name) == sz && | |
371 | MEMCMP(name, ==, (*tt)->name, sz)) | |
372 | return (*tt); | |
b64eb60f | 373 | } |
13ee7406 MW |
374 | debug("%s timer `%.*s' not found", |
375 | timertab[tm].what, (int)sz, name); return (0); | |
b64eb60f MW |
376 | } |
377 | ||
67b5031e MW |
378 | /* --- @try_timer@ --- * |
379 | * | |
380 | * Arguments: @struct timer *t@ = timer structure | |
13ee7406 MW |
381 | * @const struct timer_ops *ops@ = timer ops |
382 | * @unsigned tm@ = which subtimer we're setting | |
67b5031e | 383 | * |
13ee7406 | 384 | * Returns: Zero on success, %$-1$% if timer failed. |
67b5031e MW |
385 | * |
386 | * Use: Tries to initialize the timer @t@, reporting a debug message | |
387 | * if it worked. | |
388 | */ | |
389 | ||
b64eb60f | 390 | static int try_timer(struct timer *t, |
13ee7406 | 391 | const struct timer_ops *ops, unsigned tm) |
b64eb60f | 392 | { |
13ee7406 MW |
393 | if (ops->init(t)) return (-1); |
394 | debug("selected %s timer `%s'", timertab[tm].what, ops->name); | |
395 | t->ops[tm] = ops; return (0); | |
b64eb60f MW |
396 | } |
397 | ||
67b5031e MW |
398 | /* --- @select_timer@ --- * |
399 | * | |
400 | * Arguments: @struct timer *t@ = timer structure | |
13ee7406 MW |
401 | * @unsigned tm@ = which subtimer we're setting |
402 | * @const char *config@, @size_t sz@ = config string | |
67b5031e | 403 | * |
13ee7406 | 404 | * Returns: Zero on success, %$-1$% if timer failed. |
67b5031e MW |
405 | * |
406 | * Use: Select a timer from the table. If the environment variable | |
407 | * is set, then parse a comma-separated list of timer names and | |
408 | * use the first one listed that seems to work; otherwise, try | |
409 | * the timers in the table in order. | |
410 | */ | |
411 | ||
13ee7406 MW |
412 | static int select_timer(struct timer *t, unsigned tm, |
413 | const char *config, size_t sz) | |
b64eb60f | 414 | { |
13ee7406 MW |
415 | const char *p, *l; |
416 | const struct timer_ops *ops, *const *tt; | |
b64eb60f | 417 | |
13ee7406 MW |
418 | if (!config) { |
419 | for (tt = timertab[tm].opstab; *tt; tt++) | |
420 | if (!((*tt)->f&TF_SECRET) && !try_timer(t, *tt, tm)) return (0); | |
b64eb60f | 421 | } else { |
13ee7406 | 422 | l = config + sz; |
b64eb60f | 423 | for (;;) { |
13ee7406 MW |
424 | p = memchr(config, ',', l - config); if (!p) p = l; |
425 | ops = find_timer(config, p - config, tm); | |
426 | if (ops && !try_timer(t, ops, tm)) return (0); | |
427 | if (p >= l) break; | |
428 | config = p + 1; | |
b64eb60f MW |
429 | } |
430 | } | |
13ee7406 | 431 | debug("no suitable %s timer found", timertab[tm].what); return (-1); |
b64eb60f MW |
432 | } |
433 | ||
67b5031e | 434 | /* Bench timer operations. */ |
13ee7406 MW |
435 | static void timer_describe(struct bench_timer *tm, dstr *d) |
436 | { | |
437 | struct timer *t = (struct timer *)tm; | |
438 | unsigned i; | |
439 | ||
440 | dstr_puts(d, "builtin: "); | |
441 | for (i = 0; i < NTIMER; i++) { | |
442 | if (i) dstr_puts(d, ", "); | |
443 | dstr_putf(d, "%s = %s", timertab[i].what, t->ops[i]->name); | |
444 | } | |
445 | } | |
446 | ||
b64eb60f MW |
447 | static void timer_now(struct bench_timer *tm, struct bench_time *t_out) |
448 | { | |
449 | struct timer *t = (struct timer *)tm; | |
13ee7406 | 450 | unsigned i; |
b64eb60f | 451 | |
13ee7406 | 452 | for (i = 0; i < NTIMER; i++) t->ops[i]->now(t_out, t); |
b64eb60f | 453 | } |
13ee7406 | 454 | |
b64eb60f MW |
455 | static void timer_destroy(struct bench_timer *tm) |
456 | { | |
457 | struct timer *t = (struct timer *)tm; | |
13ee7406 | 458 | unsigned i; |
b64eb60f MW |
459 | |
460 | if (!t) return; | |
13ee7406 MW |
461 | for (i = 0; i < NTIMER; i++) |
462 | if (t->ops[i]) t->ops[i]->teardown(t); | |
b64eb60f MW |
463 | xfree(t); |
464 | } | |
465 | ||
13ee7406 MW |
466 | static const struct bench_timerops timer_ops = |
467 | { timer_describe, timer_now, timer_destroy }; | |
b64eb60f | 468 | |
67b5031e MW |
469 | /* --- @bench_createtimer@ --- * |
470 | * | |
13ee7406 | 471 | * Arguments: @const char *config@ = timer configuration string |
67b5031e MW |
472 | * |
473 | * Returns: A freshly constructed standard timer object. | |
474 | * | |
475 | * Use: Allocate a timer. Dispose of it by calling | |
476 | * @tm->ops->destroy(tm)@ when you're done. | |
13ee7406 MW |
477 | * |
478 | * Applications should not set configuration strings except as | |
479 | * established by user action, e.g., from a command-line option, | |
480 | * environment variable, or configuration file. | |
67b5031e MW |
481 | */ |
482 | ||
13ee7406 | 483 | struct bench_timer *bench_createtimer(const char *config) |
b64eb60f MW |
484 | { |
485 | struct timer *t = 0; | |
486 | struct bench_timer *ret = 0; | |
13ee7406 MW |
487 | struct { const char *p; size_t sz; } tmconf[NTIMER] = { 0 }; |
488 | const struct timer_ops *const *tt; | |
489 | const char *p, *l; size_t n, nn; | |
490 | unsigned i; | |
b64eb60f | 491 | |
13ee7406 MW |
492 | /* Parse the configuration string. */ |
493 | if (config) { | |
494 | ||
495 | /* The first thing to do is find the end of the string. */ | |
496 | l = config + strlen(config); | |
497 | ||
498 | for (;;) { | |
499 | /* Process the whitespace-sparated words of the string one by one. */ | |
500 | ||
501 | /* Skip over any initial whitespace. If we hit the end of the string | |
502 | * then we're done. | |
503 | */ | |
504 | for (;;) | |
505 | if (config >= l) goto done_config; | |
506 | else if (!ISSPACE(*config)) break; | |
507 | else config++; | |
508 | ||
509 | /* There's definitely a word here. Find the end of it. */ | |
510 | for (p = config; p < l && !ISSPACE(*p); p++); | |
511 | nn = p - config; | |
512 | ||
513 | /* Try various simple keywords. */ | |
514 | #define MATCHP(lit) (nn == sizeof(lit) - 1 && MEMCMP(config, ==, lit, nn)) | |
515 | ||
516 | if (MATCHP("list")) { | |
517 | /* The `list' keyword requests lists of the available timer | |
518 | * implementations. | |
519 | */ | |
520 | ||
521 | for (i = 0; i < NTIMER; i++) { | |
522 | printf("%s timers:", timertab[i].what); | |
523 | for (tt = timertab[i].opstab; *tt; tt++) | |
524 | if (!((*tt)->f)&TF_SECRET) printf(" %s", (*tt)->name); | |
525 | putchar('\n'); | |
526 | } | |
527 | goto next_config; | |
528 | } | |
529 | ||
530 | #undef MATCHP | |
531 | ||
532 | /* Otherwise it's an assignment, setting a subtimer list. */ | |
533 | p = memchr(config, '=', nn); | |
534 | if (!p) | |
535 | n = nn; | |
536 | else { | |
537 | n = p - config; | |
538 | for (i = 0; i < NTIMER; i++) | |
539 | if (STRNCMP(config, ==, timertab[i].what, n) && | |
540 | !timertab[i].what[n]) { | |
541 | if (tmconf[i].p) | |
542 | debug("duplicate %s timer list", timertab[i].what); | |
543 | tmconf[i].p = config + n + 1; tmconf[i].sz = nn - n - 1; | |
544 | goto next_config; | |
545 | } | |
546 | } | |
547 | debug("unrecognized config keyword `%.*s'", (int)n, config); | |
548 | ||
549 | /* Move on to the next word. */ | |
550 | next_config: | |
551 | config += nn; | |
552 | } | |
553 | done_config:; | |
554 | } | |
555 | ||
556 | /* Override these settings from the environment. */ | |
557 | for (i = 0; i < NTIMER; i++) { | |
558 | p = getenv(timertab[i].env); | |
559 | if (p) { tmconf[i].p = p; tmconf[i].sz = strlen(p); } | |
560 | } | |
561 | ||
562 | /* All seems well. Allocate the timer object. */ | |
563 | t = xmalloc(sizeof(*t)); | |
564 | for (i = 0; i < NTIMER; i++) t->ops[i] = 0; | |
565 | ||
566 | /* Try to set up the subtimers. */ | |
567 | for (i = 0; i < NTIMER; i++) | |
568 | if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end; | |
569 | ||
570 | /* All is done. */ | |
b64eb60f MW |
571 | t->_t.ops = &timer_ops; ret = &t->_t; t = 0; |
572 | end: | |
573 | if (t) timer_destroy(&t->_t); | |
574 | return (ret); | |
575 | } | |
576 | ||
67b5031e | 577 | /*----- Benchmarking ------------------------------------------------------*/ |
b64eb60f | 578 | |
67b5031e MW |
579 | /* --- @bench_init@ --- * |
580 | * | |
581 | * Arguments: @struct bench_state *b@ = bench state to initialize | |
13ee7406 | 582 | * @struct bench_timer *tm@ = timer to attach, or null |
67b5031e | 583 | * |
13ee7406 | 584 | * Returns: Zero on success, %$-1$% on failure. |
67b5031e | 585 | * |
13ee7406 MW |
586 | * Use: Initialize the benchmark state. On success, the timer state |
587 | * still needs to be calibrated (use @bench_calibrate@) before | |
588 | * it can be used, but this will be done automatically by | |
589 | * @bench_measure@ if it's not done by hand earlier. The timer | |
590 | * is now owned by the benchmark state and will be destroyed by | |
591 | * @bench_destroy@. | |
592 | * | |
593 | * The only reason for failure is if @tm@ was null on entry, | |
594 | * and automatic construction of a timer failed. The state is | |
595 | * safe to discard, but calling @bench_destroy@ is safe too. | |
67b5031e | 596 | */ |
b64eb60f | 597 | |
13ee7406 MW |
598 | int bench_init(struct bench_state *b, struct bench_timer *tm) |
599 | { | |
600 | int rc; | |
601 | ||
602 | b->tm = 0; | |
603 | ||
604 | if (!tm) { | |
605 | tm = bench_createtimer(0); | |
606 | if (!tm) { rc = -1; goto end; } | |
607 | } | |
608 | ||
609 | b->tm = tm; b->target_s = 1.0; b->f = 0; rc = 0; | |
610 | end: | |
611 | return (rc); | |
612 | } | |
b64eb60f | 613 | |
67b5031e MW |
614 | /* --- @bench_destroy@ --- * |
615 | * | |
616 | * Arguments: @struct bench_state *b@ = bench state | |
617 | * | |
618 | * Returns: --- | |
619 | * | |
620 | * Use: Destroy the benchmark state, releasing the resources that it | |
621 | * holds. | |
622 | */ | |
623 | ||
b64eb60f | 624 | void bench_destroy(struct bench_state *b) |
13ee7406 | 625 | { if (b->tm) { b->tm->ops->destroy(b->tm); b->tm = 0; } } |
b64eb60f | 626 | |
67b5031e MW |
627 | /* --- @do_nothing@ --- * |
628 | * | |
629 | * Arguments: @unsigned long n@ = iteration count | |
630 | * @void *ctx@ = context pointer (ignored) | |
631 | * | |
632 | * Returns: --- | |
633 | * | |
634 | * Use: Does nothing at all for @n@ iterations. Used to calibrate | |
635 | * the benchmarking state. | |
636 | */ | |
637 | ||
638 | static void do_nothing(unsigned long n, void *ctx) | |
b64eb60f MW |
639 | { while (n--) RELAX; } |
640 | ||
67b5031e MW |
641 | /* --- @bench_calibrate@ --- * |
642 | * | |
643 | * Arguments: @struct bench_state *b@ = bench state | |
644 | * | |
13ee7406 | 645 | * Returns: Zero on success, %$-1$% if calibration failed. |
67b5031e MW |
646 | * |
647 | * Use: Calibrate the benchmark state, so that it can be used to | |
648 | * measure performance reasonably accurately. | |
649 | */ | |
650 | ||
13ee7406 MW |
651 | #define T_CLB 0.0625 /* calibration time limit */ |
652 | ||
b64eb60f MW |
653 | int bench_calibrate(struct bench_state *b) |
654 | { | |
655 | struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT; | |
656 | unsigned long n; | |
657 | unsigned i; | |
658 | struct bench_timer *tm = b->tm; | |
659 | struct bench_time t0, t1; | |
660 | struct bench_timing delta; | |
13ee7406 | 661 | double r; |
b64eb60f MW |
662 | bench_fn *fn = LAUNDER(&do_nothing); |
663 | unsigned f = BTF_ANY; | |
664 | int rc; | |
665 | ||
67b5031e MW |
666 | /* The model here is that a timing loop has a fixed overhead as we enter |
667 | * and leave (e.g., to do with the indirect branch into the code), and | |
668 | * per-iteration overheads as we check the counter and loop back. We aim | |
669 | * to split these apart using linear regression. | |
670 | */ | |
671 | ||
672 | /* If we've already calibrated then there's nothing to do. */ | |
13ee7406 | 673 | if (b->f&BTF_CLB) return (b->f&BTF_ANY ? 0 : -1); |
b64eb60f | 674 | |
67b5031e | 675 | /* Exercise the inner loop a few times to educate the branch predictor. */ |
b64eb60f | 676 | for (i = 0; i < 10; i++) |
67b5031e | 677 | { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); } |
b64eb60f | 678 | |
67b5031e MW |
679 | /* Now we measure idle loops until they take sufficiently long -- or we run |
680 | * out of counter. | |
681 | */ | |
b64eb60f MW |
682 | debug("calibrating..."); |
683 | n = 1; | |
684 | for (;;) { | |
67b5031e MW |
685 | |
686 | /* Measure @n@ iterations of the idle loop. */ | |
b64eb60f MW |
687 | tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1); |
688 | timer_diff(&delta, &t0, &t1); f &= delta.f; | |
689 | if (!(f&BTF_TIMEOK)) { rc = -1; goto end; } | |
67b5031e MW |
690 | |
691 | /* Register the timings with the regression machinery. */ | |
b64eb60f MW |
692 | linreg_update(&lr_clk, n, delta.t); |
693 | if (!(f&BTF_CYOK)) | |
694 | debug(" n = %10lu; t = %12g s", n, delta.t); | |
695 | else { | |
696 | linreg_update(&lr_cy, n, delta.cy); | |
697 | debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy); | |
698 | } | |
67b5031e MW |
699 | |
700 | /* If we're done then stop. */ | |
13ee7406 | 701 | if (delta.t >= T_CLB) break; |
b64eb60f | 702 | if (n >= ULONG_MAX - n/3) break; |
67b5031e MW |
703 | |
704 | /* Update the counter and continue. */ | |
b64eb60f MW |
705 | n += n/3 + 1; |
706 | } | |
707 | ||
67b5031e MW |
708 | /* Now run the linear regression to extract the constant and per-iteration |
709 | * overheads. | |
710 | */ | |
13ee7406 MW |
711 | linreg_fit(&lr_clk, &b->clk.m, &b->clk.c, &r); |
712 | debug("clock overhead = (%g n + %g) s (r = %g)", b->clk.m, b->clk.c, r); | |
b64eb60f | 713 | if (f&BTF_CYOK) { |
13ee7406 MW |
714 | linreg_fit(&lr_cy, &b->cy.m, &b->cy.c, &r); |
715 | debug("cycle overhead = (%g n + %g) cy (r = %g)", b->cy.m, b->cy.c, r); | |
b64eb60f | 716 | } |
67b5031e MW |
717 | |
718 | /* We're done. */ | |
13ee7406 | 719 | rc = 0; |
b64eb60f | 720 | end: |
13ee7406 | 721 | b->f |= f | BTF_CLB; /* no point trying again */ |
b64eb60f MW |
722 | return (rc); |
723 | } | |
724 | ||
67b5031e MW |
725 | /* --- @bench_measure@ --- * |
726 | * | |
8d372122 MW |
727 | * Arguments: @struct bench_state *b@ = benchmark state |
728 | * @struct bench_timing *t_out@ = where to leave the timing | |
67b5031e MW |
729 | * @double base@ = number of internal units per call |
730 | * @bench_fn *fn@, @void *ctx@ = benchmark function to run | |
731 | * | |
13ee7406 | 732 | * Returns: Zero on success, %$-1$% if timing failed. |
67b5031e MW |
733 | * |
734 | * Use: Measure a function. The function @fn@ is called adaptively | |
735 | * with an iteration count @n@ set so as to run for | |
736 | * approximately @b->target_s@ seconds. | |
737 | * | |
738 | * The result is left in @*t_out@, with @t_out->n@ counting the | |
739 | * final product of the iteration count and @base@ (which might, | |
740 | * e.g., reflect the number of inner iterations the function | |
741 | * performs, or the number of bytes it processes per iteration). | |
742 | */ | |
743 | ||
8d372122 | 744 | int bench_measure(struct bench_state *b, struct bench_timing *t_out, |
67b5031e | 745 | double base, bench_fn *fn, void *ctx) |
b64eb60f MW |
746 | { |
747 | struct bench_timer *tm = b->tm; | |
748 | struct bench_time t0, t1; | |
c81c35df | 749 | unsigned long n, nn; |
b64eb60f | 750 | |
8d372122 MW |
751 | /* Make sure the state is calibrated and usable. */ |
752 | if (!(b->f&BTF_CLB) && bench_calibrate(b)) return (-1); | |
753 | if (!(b->f&BTF_TIMEOK)) return (-1); | |
67b5031e | 754 | |
c81c35df MW |
755 | /* Main adaptive measurement loop. |
756 | * | |
757 | * Suppose the timer loop %$n$% iterations in %$t$% seconds. Our ideal | |
758 | * time is %$T$% seconds. If %$t \ge T/\sqrt{2}$%, we're happy. | |
759 | * Otherwise, we need to scale up the iteration count. The obvious next | |
760 | * choice is %$n' = n T/t$%. Alas, rounding is a problem: if | |
761 | * %$T/t < 1 + 1/n$% then %$\floor{n T/t} = n$% and we will make no | |
762 | * progress. We know that %$T/t > \sqrt{2}%, so this can only happen when | |
763 | * %$1 + 1/n > \sqrt{2}$%, i.e., when %$n < \sqrt{2} + 1$%. On the other | |
764 | * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying | |
765 | * again with %$n' = n + 1$% iterations will very likely work. | |
766 | */ | |
b64eb60f MW |
767 | debug("measuring..."); n = 1; |
768 | for (;;) { | |
67b5031e | 769 | tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1); |
b64eb60f MW |
770 | timer_diff(t_out, &t0, &t1); |
771 | if (!(t_out->f&BTF_TIMEOK)) return (-1); | |
772 | if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t); | |
773 | else debug(" n = %10lu; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy); | |
c81c35df MW |
774 | if (t_out->t >= 0.707*b->target_s) break; |
775 | nn = n*b->target_s/t_out->t; | |
776 | if (nn > n) n = nn; | |
777 | else n++; | |
b64eb60f | 778 | } |
67b5031e MW |
779 | |
780 | /* Adjust according to the calibration. */ | |
c7da785d MW |
781 | t_out->t -= n*b->clk.m + b->clk.c; |
782 | if (t_out->f&BTF_CYOK) t_out->cy -= n*b->cy.m + b->cy.c; | |
67b5031e MW |
783 | |
784 | /* Report the results, if debugging. */ | |
c7da785d MW |
785 | if (!(t_out->f&BTF_CYOK)) debug(" adjusted t' = %12g", t_out->t); |
786 | else debug(" adjusted t = %12g, cy = %10.0f", t_out->t, t_out->cy); | |
b64eb60f MW |
787 | if (!(t_out->f&BTF_CYOK)) |
788 | debug(" %g s per op; %g ops/s", t_out->t/n, n/t_out->t); | |
789 | else | |
790 | debug(" %g s (%g cy) per op; %g ops/s", | |
791 | t_out->t/n, t_out->cy/n, n/t_out->t); | |
67b5031e MW |
792 | |
793 | /* All done. */ | |
e63124bc | 794 | t_out->n = n*base; return (0); |
b64eb60f MW |
795 | } |
796 | ||
797 | /*----- That's all, folks -------------------------------------------------*/ |