Commit | Line | Data |
---|---|---|
b64eb60f MW |
1 | /* -*-c-*- |
2 | * | |
3 | * Benchmarking support | |
4 | * | |
5 | * (c) 2023 Straylight/Edgeware | |
6 | */ | |
7 | ||
8 | /*----- Licensing notice --------------------------------------------------* | |
9 | * | |
10 | * This file is part of the mLib utilities library. | |
11 | * | |
12 | * mLib is free software: you can redistribute it and/or modify it under | |
13 | * the terms of the GNU Library General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or (at | |
15 | * your option) any later version. | |
16 | * | |
17 | * mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | * License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU Library General Public | |
23 | * License along with mLib. If not, write to the Free Software | |
24 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | * USA. | |
26 | */ | |
27 | ||
28 | #ifndef MLIB_BENCH_H | |
29 | #define MLIB_BENCH_H | |
30 | ||
31 | #ifdef __cplusplus | |
32 | extern "C" { | |
33 | #endif | |
34 | ||
35 | /*----- Header files ------------------------------------------------------*/ | |
36 | ||
b1a20bee | 37 | #include <limits.h> |
6e683a79 MW |
38 | #include <time.h> |
39 | ||
b64eb60f MW |
40 | #ifndef MLIB_BITS_H |
41 | # include "bits.h" | |
42 | #endif | |
43 | ||
b1a20bee MW |
44 | #ifndef MLIB_CONTROL_H |
45 | # include "control.h" | |
46 | #endif | |
47 | ||
13ee7406 MW |
48 | #ifndef MLIB_DSTR_H |
49 | # include "dstr.h" | |
50 | #endif | |
51 | ||
289651a7 MW |
52 | #ifndef MLIB_MACROS_H |
53 | # include "macros.h" | |
54 | #endif | |
55 | ||
b1a20bee MW |
56 | #ifndef MLIB_GPRINTF_H |
57 | # include "gprintf.h" | |
58 | #endif | |
59 | ||
289651a7 | 60 | /*----- Timers ------------------------------------------------------------*/ |
b64eb60f MW |
61 | |
62 | struct bench_time { | |
67b5031e MW |
63 | unsigned f; /* flags */ |
64 | #define BTF_TIMEOK 1u /* @s@ ad @ns@ slots are value */ | |
65 | #define BTF_CYOK 2u /* @cy@ slot is valid */ | |
66 | #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /* some part is useful */ | |
6e683a79 MW |
67 | union { |
68 | struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */ | |
69 | clock_t clk; /* @clock@ */ | |
70 | kludge64 rawns; /* raw nanosecond count */ | |
71 | } t; /* time */ | |
c91413e6 | 72 | kludge64 cy; /* count of CPU cycles */ |
b64eb60f MW |
73 | }; |
74 | ||
75 | struct bench_timing { | |
c91413e6 | 76 | unsigned f; /* flags (@BTF_...@) */ |
67b5031e | 77 | double n, t, cy; /* count, time, and cycles */ |
b64eb60f MW |
78 | }; |
79 | ||
289651a7 MW |
80 | struct bench_timer { |
81 | const struct bench_timerops *ops; | |
82 | unsigned ref; | |
83 | }; | |
b64eb60f MW |
84 | |
85 | struct bench_timerops { | |
13ee7406 MW |
86 | void (*describe)(struct bench_timer */*bt*/, dstr */*d*/); |
87 | /* Write a description of the timer to @d@. */ | |
88 | ||
b1a20bee MW |
89 | int (*preflight)(struct bench_timer */*bt*/); |
90 | /* Return zero if timer is ready to go, or %$-1$% otherwise. The @now@ | |
91 | * function will only be called following a successful @preflight@ on the | |
92 | * same thread. | |
93 | */ | |
94 | ||
6e683a79 MW |
95 | int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/, |
96 | unsigned /*f*/); | |
97 | #define BTF_T0 0u /* fetching first time of a pair */ | |
98 | #define BTF_T1 1u /* fetching second time of a pair */ | |
99 | /* Fill in @*t_out@ with the current time. Return zero on success | |
100 | * %%\emph{or} permanent failure; return %$-1$% on temporary failure. | |
101 | */ | |
102 | ||
103 | void (*diff)(struct bench_timer */*bt*/, | |
104 | struct bench_timing */*delta_out*/, | |
105 | const struct bench_time */*t0*/, | |
106 | const struct bench_time */*t1*/); | |
107 | /* Subtract the time @t0@ from the time @t1@, leaving the result in | |
108 | * @*delta_out@, setting flags as appropriate. | |
109 | */ | |
67b5031e | 110 | |
b64eb60f | 111 | void (*destroy)(struct bench_timer */*bt*/); |
67b5031e | 112 | /* Release the timer and any resources it holds. */ |
b64eb60f MW |
113 | }; |
114 | ||
289651a7 MW |
115 | /* --- @bench_createtimer@ --- * |
116 | * | |
117 | * Arguments: @const char *config@ = user-supplied configuration string | |
118 | * | |
119 | * Returns: A freshly constructed standard timer object, or null on | |
120 | * failure. | |
121 | * | |
122 | * Use: Allocate a timer. Dispose of it by calling | |
123 | * @tm->ops->destroy(tm)@ when you're done. | |
124 | * | |
125 | * Applications should not set configuration strings except as | |
126 | * established by user action, e.g., from a command-line option, | |
127 | * environment variable, or configuration file. | |
128 | */ | |
b1a20bee | 129 | |
289651a7 | 130 | extern struct bench_timer *bench_createtimer(const char *config); |
b1a20bee MW |
131 | |
132 | /* --- @BENCH_TIMELOOP_DECLS@ --- * | |
133 | * | |
134 | * Arguments: --- | |
135 | * | |
136 | * Use: Expands to the declarations needed by @BENCH_TIMELOOP_TAG@. | |
137 | * These should be at block scope, not at toplevel. | |
138 | */ | |
139 | ||
140 | #define BENCH_TIMELOOP_DECLS \ | |
141 | struct bench_timer *_bench_tm; \ | |
142 | struct bench_time _bench_t0, _bench_t1; \ | |
143 | unsigned long _bench_n, _bench_n1 | |
144 | ||
145 | /* --- @BENCH_TIMELOOP_TAG@ --- * | |
146 | * | |
147 | * Arguments: @tag@ = control structure tag | |
289651a7 | 148 | * @struct bench_state *tm@ = benchmark timer |
b1a20bee MW |
149 | * @struct bench_timing *delta_out@ = where to put the result |
150 | * @double n@ = number of iterations | |
151 | * | |
152 | * Returns: --- | |
153 | * | |
289651a7 | 154 | * Use: @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@ |
b1a20bee MW |
155 | * @n@ times, capturing the time and/or CPU cycles taken in |
156 | * @*delta_out@. The iteration count must be no more than the | |
157 | * %%\emph{square}%% of @ULONG_MAX@. If @stmt@ executes a free | |
289651a7 | 158 | * @break@ statement, then the containing loop -- which must |
b1a20bee MW |
159 | * exist -- is exited. |
160 | * | |
161 | * This macro is not intended to be used directly by users: it | |
162 | * is used internally by @bench_calibrate@ and @BENCH_MEASURE@. | |
163 | */ | |
164 | ||
289651a7 | 165 | #define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak) \ |
b1a20bee MW |
166 | MC_BEFORE(tag##__benchtl_setup, { \ |
167 | double _R = ULONG_MAX; double _n = (n); \ | |
168 | \ | |
289651a7 | 169 | _bench_tm = (tm); \ |
b1a20bee MW |
170 | if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; } \ |
171 | else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; } \ | |
172 | }) \ | |
173 | MC_TARGET(tag##__benchtl_break, { break; }) \ | |
174 | MC_AFTER(tag##__benchtl_end, { \ | |
175 | _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \ | |
176 | }) \ | |
177 | MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--) \ | |
178 | MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; }) \ | |
179 | MC_DOWHILE(tag##__benchtl_t1, \ | |
180 | _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1)) \ | |
181 | MC_WRAP(tag##__benchtl_t0, \ | |
182 | { while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \ | |
183 | { ; }, \ | |
184 | { MC_GOTARGET(tag##__benchtl_break); }) | |
185 | ||
289651a7 | 186 | /*----- Measuring ---------------------------------------------------------*/ |
b64eb60f | 187 | |
289651a7 MW |
188 | struct bench_state { |
189 | struct bench_timer *tm; /* a timer */ | |
190 | double target_s; /* target time to run benchmarks */ | |
191 | unsigned f; /* calibration flags (@BTF_...@) */ | |
192 | #define BTF_CLB 0x0100 /* tried calibrating */ | |
193 | struct { double m, c; } clk, cy; /* calculated overheads */ | |
194 | }; | |
67b5031e | 195 | |
289651a7 MW |
196 | typedef void bench_fn(unsigned long /*n*/, void */*ctx*/); |
197 | /* Run the benchmark @n@ times, given a context pointer @ctx@. */ | |
b64eb60f | 198 | |
67b5031e MW |
199 | /* --- @bench_init@ --- * |
200 | * | |
201 | * Arguments: @struct bench_state *b@ = bench state to initialize | |
13ee7406 | 202 | * @struct bench_timer *tm@ = timer to attach, or null |
67b5031e | 203 | * |
13ee7406 MW |
204 | * Returns: Zero on success, %$-1$% on failure. |
205 | * | |
206 | * Use: Initialize the benchmark state. On success, the timer state | |
207 | * still needs to be calibrated (use @bench_calibrate@) before | |
208 | * it can be used, but this will be done automatically by | |
209 | * @bench_measure@ if it's not done by hand earlier. The timer | |
210 | * is now owned by the benchmark state and will be destroyed by | |
211 | * @bench_destroy@. | |
67b5031e | 212 | * |
13ee7406 MW |
213 | * The only reason for failure is if @tm@ was null on entry, |
214 | * and automatic construction of a timer failed. The state is | |
215 | * safe to discard, but calling @bench_destroy@ is safe too. | |
67b5031e MW |
216 | */ |
217 | ||
13ee7406 | 218 | extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/); |
b64eb60f | 219 | |
67b5031e MW |
220 | /* --- @bench_destroy@ --- * |
221 | * | |
222 | * Arguments: @struct bench_state *b@ = bench state | |
223 | * | |
224 | * Returns: --- | |
225 | * | |
226 | * Use: Destroy the benchmark state, releasing the resources that it | |
227 | * holds. | |
228 | */ | |
229 | ||
e63124bc | 230 | extern void bench_destroy(struct bench_state */*b*/); |
b64eb60f | 231 | |
67b5031e MW |
232 | /* --- @bench_calibrate@ --- * |
233 | * | |
234 | * Arguments: @struct bench_state *b@ = bench state | |
b1a20bee | 235 | * @unsigned f@ = calibration flags |
67b5031e | 236 | * |
13ee7406 | 237 | * Returns: Zero on success, %$-1$% if calibration failed. |
67b5031e MW |
238 | * |
239 | * Use: Calibrate the benchmark state, so that it can be used to | |
240 | * measure performance reasonably accurately. | |
b1a20bee MW |
241 | * |
242 | * Calibration will take into account how the subject code is | |
243 | * going to be located. If you're going to use @BENCH_MEASURE@ | |
244 | * to measure a piece of literal code, then leave @f@ zero. If | |
245 | * the code to be measured is going to be executed via an | |
246 | * indirect branch, e.g., through the @measure@ function, then | |
247 | * set @BTF_INDIRECT@. | |
67b5031e MW |
248 | */ |
249 | ||
b1a20bee MW |
250 | #define BTF_INDIRECT 1u |
251 | extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/); | |
252 | ||
253 | /* --- @bench_preflight@ --- * | |
254 | * | |
255 | * Arguments: @struct bench_state *b@ = benchmark state | |
256 | * | |
257 | * Returns: Zero on success, %$-1$% on failure. | |
258 | * | |
259 | * Use: Prepares for benchmarking on the current thread. Current | |
260 | * checks are that the timer is calibrated and that it can | |
261 | * successfully measure time; the timer preflight is also run. | |
262 | * | |
263 | * Users are unlikely to find this function useful: it's called | |
264 | * automatically by the @BENCH_MEASURE@ macro and the | |
265 | * @bench_measure@ function. | |
266 | */ | |
267 | ||
268 | extern int bench_preflight(struct bench_state */*b*/); | |
269 | ||
270 | /* --- @bench_adapt@ --- * | |
271 | * | |
289651a7 MW |
272 | * Arguments: @double *n_inout@ = number of iterations, updated |
273 | * @double target_s@ = target time in seconds | |
b1a20bee MW |
274 | * @const struct bench_timing *t@ = timing from the previous run |
275 | * | |
276 | * Returns: Nonzero if the measurement is sufficient; zero to run again. | |
277 | * | |
278 | * Use: This function determines a suitable number of iterations of a | |
279 | * benchmark function to perform next. It is used in a loop | |
280 | * such as the following. | |
281 | * | |
289651a7 MW |
282 | * @BENCH_TIMELOOP_DECLS;@ |
283 | * @struct bench_timer *tm;@ | |
b1a20bee | 284 | * @struct bench_timing t;@ |
289651a7 | 285 | * @double n = 1.0, target_s = 1.0;@ |
b1a20bee MW |
286 | * |
287 | * @do {@ | |
289651a7 MW |
288 | * @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@ |
289 | * (do @_bench_n@ iterations of some computation)@;@ | |
290 | * @} while (!bench_adapt(&n, target_s, &t));@ | |
b1a20bee MW |
291 | * |
292 | * On entry, @*n_inout@ should be the number of iterations | |
293 | * performed by the previous pass, and @*t@ the resulting time; | |
289651a7 MW |
294 | * the @BTF_TIMEOK@ flag must be set in @t->f@. If the timing |
295 | * is sufficient -- @t->t@ is sufficiently close to @target_s@ | |
b1a20bee MW |
296 | * -- then the function returns nonzero to indicate that |
297 | * measurement is complete. Otherwise, it sets @*n_inout@ to a | |
298 | * new, larger iteration count and returns zero to indicate that | |
299 | * a further pass is necessary. | |
300 | * | |
301 | * Users are unlikely to find this function useful: it's called | |
302 | * automatically by the @BENCH_MEASURE@ macro and the | |
303 | * @bench_measure@ function. | |
304 | */ | |
305 | ||
289651a7 | 306 | extern int bench_adapt(double */*n_inout*/, double /*target_s*/, |
b1a20bee MW |
307 | const struct bench_timing */*t*/); |
308 | ||
309 | /* --- @bench_adjust@ --- * | |
310 | * | |
311 | * Arguments: @struct bench_state *b@ = benchmark state | |
312 | * @struct bench_timing *t_inout@ = timing to adjust | |
313 | * @double n@ = number of external iterations performed | |
314 | * @double base@ = number of internal operations per external | |
315 | * iteration | |
316 | * | |
317 | * Returns: --- | |
318 | * | |
289651a7 | 319 | * Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@, |
b1a20bee MW |
320 | * according to the calibration data captured in @b@. |
321 | * On exit, the timing data is updated, and @t->n@ is set to the | |
322 | * product @n*base@. | |
323 | * | |
324 | * Users are unlikely to find this function useful: it's called | |
325 | * automatically by the @BENCH_MEASURE@ macro and the | |
326 | * @bench_measure@ function. | |
327 | */ | |
328 | ||
329 | extern void bench_adjust(struct bench_state */*b*/, | |
330 | struct bench_timing */*t_inout*/, | |
331 | double /*n*/, double /*base*/); | |
b64eb60f | 332 | |
289651a7 MW |
333 | /* --- @BENCH_MEASURE_DECLS@ --- * |
334 | * | |
335 | * Arguments: --- | |
336 | * | |
337 | * Use: Expands to the declarations needed by @BENCH_MEASURE@. | |
338 | * These should be at block scope, not at toplevel. | |
339 | */ | |
340 | ||
341 | #define BENCH_MEASURE_DECLS \ | |
342 | struct bench_state *_bench_b; \ | |
343 | struct bench_timing *_bench_t; \ | |
344 | double _bench_nn; \ | |
345 | BENCH_TIMELOOP_DECLS | |
346 | ||
347 | /* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- * | |
348 | * | |
349 | * Arguments: @tag@ = control structure tag | |
350 | * @struct bench_state *b@ = benchmark state | |
351 | * @int &rc@ = where to put the result code (zero on success, | |
352 | * %$-1$% on failure) | |
353 | * @struct bench_timing *t_out@ = where to put the result | |
354 | * @double base@ = number of operations per external iteration | |
355 | * | |
356 | * Returns: --- | |
357 | * | |
358 | * Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the | |
359 | * execution of @stmt@. | |
360 | * | |
361 | * The statement should perform @_bench_n@ iterations of some | |
362 | * operation. It will be invoked multiple times, with varying | |
363 | * iteration counts, so as to run for approximately | |
364 | * @b->target_s@ seconds. | |
365 | * | |
366 | * On success, the resulting timing is left in @*t_out@, with | |
367 | * @t_out->n@ holding the product of the final iteration count | |
368 | * and @base@, and @rc@ is set to zero. If the timer fails, or | |
369 | * if @stmt@ invokes a free @break@ statement, then @rc@ is set | |
370 | * to %$-1$%. | |
371 | * | |
372 | * The macro @BENCH_MEASURE_TAG@ is the same, except that it | |
373 | * allows an explicit control-structure tag to be set so that it | |
374 | * can be used within larger control structure macros. | |
375 | */ | |
376 | ||
377 | #define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \ | |
378 | MC_BEFORE(tag##__benchmsr_setup, { \ | |
379 | _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \ | |
380 | if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \ | |
381 | }) \ | |
382 | MC_TARGET(tag##__benchmsr_done, \ | |
383 | { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \ | |
384 | MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \ | |
385 | for (;;) \ | |
386 | MC_WRAP(tag##__benchmsr_loop, \ | |
387 | { ; }, \ | |
388 | { _bench_t->f &= _bench_b->f; \ | |
389 | if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \ | |
390 | if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t)) \ | |
391 | MC_GOTARGET(tag##__benchmsr_done); \ | |
392 | }, \ | |
393 | { MC_GOTARGET(tag##__benchmsr_fail); }) \ | |
394 | BENCH_TIMELOOP_TAG(tag##__benchmsr_time, \ | |
395 | _bench_b->tm, _bench_t, _bench_nn, { break; }) | |
396 | ||
397 | #define BENCH_MEASURE(b, rc, t_out, base) \ | |
398 | BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base) | |
399 | ||
67b5031e MW |
400 | /* --- @bench_measure@ --- * |
401 | * | |
8d372122 MW |
402 | * Arguments: @struct bench_state *b@ = benchmark state |
403 | * @struct bench_timing *t_out@ = where to leave the timing | |
67b5031e MW |
404 | * @double base@ = number of internal units per call |
405 | * @bench_fn *fn@, @void *ctx@ = benchmark function to run | |
406 | * | |
13ee7406 | 407 | * Returns: Zero on success, %$-1$% if timing failed. |
67b5031e MW |
408 | * |
409 | * Use: Measure a function. The function @fn@ is called adaptively | |
410 | * with an iteration count @n@ set so as to run for | |
411 | * approximately @b->target_s@ seconds. | |
412 | * | |
413 | * The result is left in @*t_out@, with @t_out->n@ counting the | |
414 | * final product of the iteration count and @base@ (which might, | |
415 | * e.g., reflect the number of inner iterations the function | |
416 | * performs, or the number of bytes it processes per iteration). | |
417 | */ | |
418 | ||
8d372122 MW |
419 | extern int bench_measure(struct bench_state */*b*/, |
420 | struct bench_timing */*t_out*/, | |
67b5031e | 421 | double /*base*/, bench_fn */*fn*/, void */*ctx*/); |
b64eb60f | 422 | |
b1a20bee MW |
423 | /*----- Reporting ---------------------------------------------------------*/ |
424 | ||
289651a7 MW |
425 | enum { |
426 | BTU_OP, /* counting operations of some kind */ | |
427 | BTU_BYTE, /* counting bytes (@rbuf >= 0@) */ | |
428 | BTU_LIMIT /* (number of units) */ | |
429 | }; | |
430 | ||
b1a20bee MW |
431 | /* --- @bench_report@ --- * |
432 | * | |
433 | * Arguments: @const struct gprintf_ops *gops, void *gp@ = output formatter | |
434 | * @unsigned unit@ = unit processed by the benchmark function | |
435 | * @const struct bench_timing *t@ = benchmark result | |
436 | * | |
437 | * Returns: --- | |
438 | * | |
439 | * Use: Format, to the output identified by @gops@ and @go@, a | |
440 | * human-readable report of the benchmarking result @t@. No | |
441 | * newline is appended. | |
442 | * | |
443 | * The output format is subject to change in later versions. | |
444 | */ | |
445 | ||
446 | extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/, | |
447 | unsigned /*unit*/, | |
448 | const struct bench_timing */*t*/); | |
449 | ||
b64eb60f MW |
450 | /*----- That's all, folks -------------------------------------------------*/ |
451 | ||
452 | #ifdef __cplusplus | |
453 | } | |
454 | #endif | |
455 | ||
456 | #endif |