Commit | Line | Data |
---|---|---|
d056fbdf | 1 | .\" -*-nroff-*- |
c4ccbbf9 MW |
2 | .\" |
3 | .\" Manual for benchmarking core | |
4 | .\" | |
5 | .\" (c) 2024 Straylight/Edgeware | |
6 | .\" | |
7 | . | |
8 | .\"----- Licensing notice --------------------------------------------------- | |
9 | .\" | |
10 | .\" This file is part of the mLib utilities library. | |
11 | .\" | |
12 | .\" mLib is free software: you can redistribute it and/or modify it under | |
13 | .\" the terms of the GNU Library General Public License as published by | |
14 | .\" the Free Software Foundation; either version 2 of the License, or (at | |
15 | .\" your option) any later version. | |
16 | .\" | |
17 | .\" mLib is distributed in the hope that it will be useful, but WITHOUT | |
18 | .\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
19 | .\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public | |
20 | .\" License for more details. | |
21 | .\" | |
22 | .\" You should have received a copy of the GNU Library General Public | |
23 | .\" License along with mLib. If not, write to the Free Software | |
24 | .\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
25 | .\" USA. | |
26 | . | |
27 | .\"-------------------------------------------------------------------------- | |
28 | .so ../defs.man \" @@@PRE@@@ | |
29 | . | |
30 | .\"-------------------------------------------------------------------------- | |
31 | .TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library" | |
d056fbdf MW |
32 | .\" @bench_createtimer |
33 | .\" @bench_init | |
34 | .\" @bench_destroy | |
35 | .\" @bench_calibrate | |
36 | .\" @bench_measure | |
37 | . | |
c4ccbbf9 MW |
38 | .\"-------------------------------------------------------------------------- |
39 | .SH NAME | |
40 | bench \- low-level benchmarking tools | |
41 | . | |
42 | .\"-------------------------------------------------------------------------- | |
d056fbdf | 43 | .SH SYNOPSIS |
c4ccbbf9 | 44 | . |
d056fbdf MW |
45 | .nf |
46 | .B "#include <mLib/bench.h>" | |
47 | .PP | |
48 | .ta 2n | |
49 | .B "struct bench_time {" | |
50 | .B " unsigned f;" | |
51 | .B " kludge64 s;" | |
52 | .B " uint32 ns;" | |
53 | .B " kludge64 cy;" | |
54 | .B "};" | |
55 | .PP | |
56 | .B "struct bench_timing {" | |
57 | .B " unsigned f;" | |
58 | .B " double n;" | |
59 | .B " double t;" | |
60 | .B " double cy;" | |
61 | .B "};" | |
62 | .PP | |
63 | .B "struct bench_timerops {" | |
64 | .BI " void (*describe)(struct bench_timer *" bt ", dstr *" d ); | |
65 | .BI " void (*now)(struct bench_timer *" bt ", struct bench_time *" t_out ); | |
66 | .BI " void (*destroy)(struct bench_timer *" bt ); | |
67 | .B "};" | |
68 | .B "struct bench_timer {" | |
69 | .B " const struct bench_timerops *ops;" | |
70 | .B "};" | |
71 | .PP | |
72 | .B "struct bench_state {" | |
73 | .B " unsigned f;" | |
74 | .B " double target_s;" | |
75 | .B " ..." | |
76 | .B "}"; | |
77 | .PP | |
78 | .BI "typedef void bench_fn(unsigned long " n ", void *" ctx ); | |
79 | .PP | |
80 | .B "#define BTF_TIMEOK ..." | |
81 | .B "#define BTF_CYOK ..." | |
82 | .B "#define BTF_CLB ..." | |
83 | .B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)" | |
84 | .PP | |
85 | .B "struct bench_timer *bench_createtimer(void);" | |
86 | .PP | |
87 | .BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm ); | |
88 | .BI "void bench_destroy(struct bench_state *" b ); | |
89 | .BI "int bench_calibrate(struct bench_state *" b ); | |
90 | .ta \w'\fBint bench_measure('u | |
91 | .BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out , | |
92 | .BI " double " base ", bench_fn *" fn ", void *" ctx ); | |
93 | .fi | |
94 | . | |
c4ccbbf9 | 95 | .\"-------------------------------------------------------------------------- |
d056fbdf | 96 | .SH DESCRIPTION |
c4ccbbf9 | 97 | . |
d056fbdf MW |
98 | The header file |
99 | .B "<mLib/bench.h>" | |
100 | provides declarations and defintions | |
101 | for performing low-level benchmarks. | |
102 | .PP | |
103 | The `main event' is | |
104 | .BR bench_measure . | |
105 | This function will be described in detail later, | |
106 | but, in brief, | |
107 | it calls a caller-provided function, | |
108 | instructing it to run adaptively chosen numbers of iterations, | |
109 | in order to get a reasonably reliable measurement of its running time, | |
110 | and then reports its results by filling in a structure. | |
111 | .PP | |
112 | With understanding this function as our objective, | |
113 | we must examine all of the pieces involved in making it work. | |
114 | . | |
115 | .SS Timers in general | |
116 | A | |
117 | .I timer | |
118 | is a gadget which is capable of reporting the current time, | |
119 | in seconds (ideally precise to tiny fractions of a second), | |
120 | and/or in CPU cycles. | |
121 | A timer is represented by a pointer to an object of type | |
122 | .BR "struct bench_timer" . | |
123 | This structure has a single member, | |
124 | .BR ops , | |
125 | pointing to a | |
126 | .BR "struct bench_timerops" , | |
127 | which is a table of function pointers; | |
128 | typically, a timer has more data following this, | |
129 | but this fact is not exposed to applications. | |
130 | .PP | |
131 | The function pointers in | |
132 | .B "struct bench_timerops" | |
133 | are as follows. | |
134 | The first argument, | |
135 | named | |
136 | .I tm | |
137 | must always point to the timer object itself. | |
138 | .TP | |
139 | .IB tm ->ops->describe( tm ", " d) | |
140 | Write a description of the timer to the dynamic string | |
141 | .IR d . | |
142 | .TP | |
143 | .IB tm ->ops->now( tm ", " t_out) | |
144 | Store the current time in | |
145 | .IR t_out . | |
146 | The | |
147 | .B struct bench_time | |
148 | used to represent the time reported by a timer | |
149 | is described in detail below. | |
150 | .TP | |
151 | .IB tm ->ops->destroy( tm ) | |
152 | Destroy the timer, | |
153 | releasing all of the resources that it holds. | |
154 | .PP | |
155 | A time, a reported by a timer, is represented by the | |
156 | .BR "struct bench_time" . | |
157 | A passage-of-time measurement is stored in the | |
158 | .B s | |
159 | and | |
160 | .B ns | |
161 | members, holding seconds and nanoseconds respectively. | |
162 | (A timer need not have nanosecond precision. | |
163 | The exact interpretation of the time \(en | |
164 | e.g., whether it measures wallclock time, | |
165 | user-mode CPU time, | |
166 | or total thread CPU time \(en | |
167 | is a matter for the specific timer implementation.) | |
168 | A cycle count is stored in the | |
169 | .B cy | |
170 | member. | |
171 | The | |
172 | .B f | |
173 | member stores flags: | |
174 | .B BTF_TIMEOK | |
175 | is set if the passage-of-time measurement | |
176 | .B s | |
177 | and | |
178 | .B ns | |
179 | are valid; and | |
180 | .B BTF_CYOK | |
181 | is set if the cycle count | |
182 | .B cy | |
183 | is valid. | |
184 | Neither the time nor the cycle count need be measured | |
185 | relative to any particular origin. | |
186 | The mask | |
187 | .B BTF_ANY | |
188 | covers the | |
189 | .B BTF_TIMEOK | |
190 | and | |
191 | .B BTF_CYOK | |
192 | bits: | |
193 | hence, | |
194 | .IB f &BTF_ANY | |
195 | is nonzero (true) | |
196 | if the timer returned any valid timing information. | |
197 | . | |
198 | .SS The built-in timer | |
199 | The function | |
200 | .B bench_createtimer | |
201 | constructs and returns a timer. | |
202 | It takes a single argument, | |
203 | a string | |
204 | .IR config , | |
205 | from which it reads configuration information. | |
206 | If | |
207 | .B bench_createtimer | |
208 | fails, it returns a null pointer. | |
209 | .PP | |
210 | The | |
211 | .I config | |
212 | pointer may safely be null, | |
213 | in which case a default configuration will be used. | |
214 | Applications | |
215 | .I should only | |
216 | set this pointer to a value supplied by a user, | |
217 | e.g., through a command-line argument, | |
218 | environment variable, or | |
219 | configuration file. | |
220 | .PP | |
221 | The built-in timer makes use of one or two | |
222 | .IR subtimers : | |
223 | a `clock' subtimer to measure the passage of time, | |
224 | and possibly a `cycle' subtimer to count CPU cycles. | |
225 | .PP | |
226 | The configuration string consists of a sequence of words | |
227 | separated by whitespace. | |
228 | There may be additional whitespace at the start and end of the string. | |
229 | The words recognized are as follows. | |
230 | .TP | |
231 | .B list | |
232 | Prints a list of the available clock and cycle subtimers | |
233 | to standard output. | |
234 | .TP | |
235 | .BI clock= t , ... | |
236 | Use the first of the listed clock subtimers | |
237 | to initialize successfully | |
238 | as the clock subtimer. | |
239 | If none of the subtimers can be initialized, | |
240 | then construction of the timer as a whole fails. | |
241 | .TP | |
242 | .BI cycle= t , ... | |
243 | Use the first of the listed subtimers | |
244 | to initialize successfully | |
245 | as the cycle subtimer. | |
246 | If none of the subtimers can be initialized, | |
247 | then construction of the timer as a whole fails. | |
248 | .PP | |
249 | The clock subtimers are as follows. | |
250 | Not all of them will be available on every platform. | |
251 | .TP | |
252 | .B posix-thread-cputime | |
253 | Measures the passage of time using | |
254 | .BR clock_gettime (2), | |
255 | specifying the | |
256 | .B CLOCK_\%THREAD_\%CPUTIME_\%ID | |
257 | clock. | |
258 | .TP | |
259 | .B stdc-clock | |
260 | Measures the passage of time using | |
261 | .BR clock (3). | |
262 | Since | |
263 | .BR clock (3) | |
264 | is part of the original ANSI\ C standard, | |
265 | this subtimer should always be available. | |
266 | However, it may produce unhelpful results | |
267 | if other threads are running. | |
268 | .PP | |
269 | The cycle subtimers are as follows. | |
270 | Not all of them will be available on every platform. | |
271 | .TP | |
272 | .B linux-perf-event | |
273 | Counts CPU cycles using the Linux-specific | |
274 | .BR perf_event_open (2) | |
275 | function to read the | |
276 | .BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES | |
277 | counter. | |
278 | Only available on Linux. | |
279 | It will fail to initialize | |
280 | if access to performance counters is restricted, | |
281 | e.g., because the | |
282 | .B /proc/sys/kernel/perf_event_paranoid | |
283 | level is too high. | |
284 | .TP | |
285 | .B x86-rdtsc | |
286 | Counts CPU cycles using the x86 | |
287 | .B rdtsc | |
288 | instruction. | |
289 | This instruction is not really suitable for performance measurement: | |
290 | it gives misleading results on CPUs with variable clock frequency. | |
291 | .TP | |
292 | .B null | |
293 | A dummy cycle counter, | |
294 | which will initialize successfully | |
295 | and then fail to report cycle counts. | |
296 | This is a reasonable fallback in many situations. | |
297 | .PP | |
298 | The built-in preference order for clock subtimers, | |
299 | from most to least preferred, is | |
300 | .B posix-thread-cputime | |
301 | followed by | |
302 | .BR stdc-clock . | |
303 | The built-in preference order for cycle subtimers, | |
304 | from most to least preferred, is | |
305 | .B linux-perf-event | |
306 | followed by | |
307 | .BR x86-rdtsc , | |
308 | and then | |
309 | .BR null . | |
310 | . | |
311 | .SS The benchmark state | |
312 | A | |
313 | .I benchmark state | |
314 | tracks the information needed to measure performance of functions. | |
315 | It is represented by a | |
316 | .B struct bench_state | |
317 | structure. | |
318 | .PP | |
319 | The benchmark state is initialized by calling | |
320 | .BR bench_init , | |
321 | passing the address of the state structure to be initialized, | |
322 | and a pointer to a timer. | |
323 | If | |
324 | .B bench_init | |
325 | is called with a non-null timer pointer, | |
326 | then it will not fail; | |
327 | the benchmark state will be initialized, | |
328 | and the function returns zero. | |
329 | If the timer pointer is null, | |
330 | then | |
331 | .B bench_init | |
332 | attempts to construct a timer for itself | |
333 | by calling | |
334 | .BR bench_createtimer . | |
335 | If this succeeds, | |
336 | then the benchmark state will be initialized, | |
337 | and the function returns zero. | |
338 | In both cases, | |
339 | the timer becomes owned by the benchmark state: | |
340 | calling | |
341 | .B bench_destroy | |
342 | on the benchmark state will destroy the timer. | |
343 | If | |
344 | .B bench_init | |
345 | is called with a null timer pointer, | |
346 | and its attempt to create a timer for itself fails, | |
347 | then | |
348 | .B bench_init | |
349 | returns \-1; | |
350 | the benchmark state is not initialized | |
351 | and can safely be discarded; | |
352 | calling | |
353 | safe to call | |
354 | .B bench_destroy | |
355 | on the unsuccessfully benchmark state is safe and has no effect. | |
356 | .PP | |
357 | Calling | |
358 | .B bench_destroy | |
359 | on a benchmark state | |
360 | releases any resources it holds, | |
361 | most notably its timer, if any. | |
362 | .PP | |
363 | Although | |
364 | .B struct bench_state | |
365 | is defined in the header file, | |
366 | only two members are available for use by applications. | |
367 | .TP | |
368 | .B f | |
369 | A word containing flags. | |
370 | .TP | |
371 | .B target_s | |
372 | The target time for which to try run a benchmark, in seconds. | |
373 | After initialization, this is set to 1.0, | |
374 | though applications can override it. | |
375 | .PP | |
376 | Before the benchmark state can be used in measurements, | |
377 | it must be | |
378 | .IR calibrated . | |
379 | This is performed by calling | |
380 | .B bench_calibrate | |
381 | on the benchmark state. | |
382 | Calibration takes a noticeable amount of time | |
383 | (currently about 0.25\*,s), | |
384 | so it makes sense to defer it until it's known to be necessary. | |
385 | .PP | |
386 | Calibration is carried out separately, but in parallel, | |
387 | for the timer's passage-of-time measurement and cycle counter. | |
388 | Either or both of these calibrations can succeed or fail; | |
389 | if passage-of-time calibration fails, | |
390 | then cycle count calibration is impossible. | |
391 | .PP | |
392 | When it completes, | |
393 | .B bench_calibrate | |
394 | sets flag in the benchmark state's | |
395 | .B f | |
396 | member: | |
397 | if passage-of-time calibration succeeded, | |
398 | .B BTF_TIMEOK | |
399 | is set; | |
400 | if cycle-count calibration succeeded, | |
401 | .B BTF_CYOK | |
402 | is set; | |
403 | and the flag | |
404 | .B BTF_CLB | |
405 | is set unconditionally, | |
406 | as a persistent indication that calibration has been attempted. | |
407 | .PP | |
408 | The | |
409 | .B bench_calibrate | |
410 | function returns zero if it successfully calibrated | |
411 | at least the passage-of-time measurement; | |
412 | otherwise, it returns \-1. | |
413 | If | |
414 | .B bench_calibrate | |
415 | is called for a second or subsequent time on the same benchmark state, | |
416 | it returns immediately, | |
417 | either returning 0 or \-1 | |
418 | according to whether passage-of-time had previously been calibrated. | |
419 | . | |
420 | .SS Timing functions | |
421 | A | |
422 | .I benchmark function | |
423 | has the signature | |
424 | .IP | |
425 | .BI "void " fn "(unsigned long " n ", void *" ctx ); | |
426 | .PP | |
427 | When called, it should perform the operation to be measured | |
428 | .I n | |
429 | times. | |
430 | The | |
431 | .I ctx | |
432 | argument is a pointer passed into | |
433 | .B bench_measure | |
434 | for the benchmark function's own purposes. | |
435 | .PP | |
436 | The function | |
437 | .B bench_measure | |
438 | receives five arguments. | |
439 | .TP | |
440 | .I b | |
441 | points to the benchmark state to be used. | |
442 | .TP | |
443 | .I t_out | |
444 | is the address of a | |
445 | .BR struct bench_timing | |
446 | in which the measurement should be left. | |
447 | This structure is described below. | |
448 | .TP | |
449 | .I base | |
450 | is a count of the number of operations performed | |
451 | by each iteration of the benchmark function. | |
452 | .TP | |
453 | .I fn | |
454 | is a benchmark function, described above. | |
455 | .TP | |
456 | .I ctx | |
457 | is a pointer to be passed to the benchmark function. | |
458 | .B bench_measure | |
459 | does not interpret this pointer in any way. | |
460 | .PP | |
461 | The | |
462 | .B bench_measure | |
463 | function calls its benchark function repeatedly | |
464 | with different iteration counts | |
465 | .IR n , | |
466 | with the objective that the call take approximately | |
467 | .B target_s | |
468 | seconds, as established in the benchmark state. | |
469 | (Currently, if | |
470 | .B target_s | |
471 | holds the value | |
472 | .IR t , | |
473 | then | |
474 | .B bench_measure | |
475 | is satisfied when a call takes at least | |
476 | .IR t /\(sr2\*,s.) | |
477 | Once the function finds a satisfactory number of iterations, | |
478 | it stores the results in | |
479 | .BI * t_out \fR. | |
480 | If measurement succeeds, then | |
481 | .B bench_measure | |
482 | returns zero. | |
483 | If it fails \(en | |
484 | most likely because the timer failed \(en | |
485 | then it returns \-1. | |
486 | .PP | |
487 | A | |
488 | .B bench_timing | |
489 | structure reports the outcome of a successful measurement. | |
490 | It has four members. | |
491 | .TP | |
492 | .B f | |
493 | A flags word. | |
494 | .B BTF_TIMEOK | |
495 | is set if the passage-of-time measurement in | |
496 | .B t | |
497 | is valid; | |
498 | .B BTF_CYOK | |
499 | is set if the cycle count in | |
500 | .B cy | |
501 | is valid. | |
502 | .TP | |
503 | .B n | |
504 | The number of iterations performed by the benchmark function | |
505 | on its satisfactory run, | |
506 | multiplied by | |
507 | .IR base . | |
508 | .TP | |
509 | .B t | |
510 | The time taken for the satisfactory run of the benchmark function, | |
511 | in seconds. | |
512 | Only valid if | |
513 | .B BTF_TIMEOK | |
514 | is set in | |
515 | .BR f . | |
516 | .TP | |
517 | .B cy | |
518 | The number of CPU cycles used | |
519 | in the satisfactory run of the benchmark function, | |
520 | in seconds. | |
521 | Only valid if | |
522 | .B BTF_CYOK | |
523 | is set in | |
524 | .BR f . | |
525 | . | |
c4ccbbf9 | 526 | .\"-------------------------------------------------------------------------- |
d056fbdf | 527 | .SH "SEE ALSO" |
c4ccbbf9 MW |
528 | . |
529 | .BR tvec-bench (3), | |
d056fbdf MW |
530 | .BR mLib (3). |
531 | . | |
c4ccbbf9 | 532 | .\"-------------------------------------------------------------------------- |
d056fbdf | 533 | .SH AUTHOR |
c4ccbbf9 | 534 | . |
d056fbdf | 535 | Mark Wooding, <mdw@distorted.org.uk> |
c4ccbbf9 MW |
536 | . |
537 | .\"----- That's all, folks -------------------------------------------------- |