@@@ much mess, mostly manpages
[mLib] / test / bench.3.in
CommitLineData
d056fbdf 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for benchmarking core
4.\"
5.\" (c) 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library"
d056fbdf
MW
32.\" @bench_createtimer
33.\" @bench_init
34.\" @bench_destroy
35.\" @bench_calibrate
36.\" @bench_measure
37.
c4ccbbf9
MW
38.\"--------------------------------------------------------------------------
39.SH NAME
40bench \- low-level benchmarking tools
41.
42.\"--------------------------------------------------------------------------
d056fbdf 43.SH SYNOPSIS
c4ccbbf9 44.
d056fbdf
MW
45.nf
46.B "#include <mLib/bench.h>"
47.PP
48.ta 2n
49.B "struct bench_time {"
50.B " unsigned f;"
51.B " kludge64 s;"
52.B " uint32 ns;"
53.B " kludge64 cy;"
54.B "};"
55.PP
56.B "struct bench_timing {"
57.B " unsigned f;"
58.B " double n;"
59.B " double t;"
60.B " double cy;"
61.B "};"
62.PP
63.B "struct bench_timerops {"
64.BI " void (*describe)(struct bench_timer *" bt ", dstr *" d );
65.BI " void (*now)(struct bench_timer *" bt ", struct bench_time *" t_out );
66.BI " void (*destroy)(struct bench_timer *" bt );
67.B "};"
68.B "struct bench_timer {"
69.B " const struct bench_timerops *ops;"
70.B "};"
71.PP
72.B "struct bench_state {"
73.B " unsigned f;"
74.B " double target_s;"
75.B " ..."
76.B "}";
77.PP
78.BI "typedef void bench_fn(unsigned long " n ", void *" ctx );
79.PP
80.B "#define BTF_TIMEOK ..."
81.B "#define BTF_CYOK ..."
82.B "#define BTF_CLB ..."
83.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
84.PP
85.B "struct bench_timer *bench_createtimer(void);"
86.PP
87.BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm );
88.BI "void bench_destroy(struct bench_state *" b );
89.BI "int bench_calibrate(struct bench_state *" b );
90.ta \w'\fBint bench_measure('u
91.BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out ,
92.BI " double " base ", bench_fn *" fn ", void *" ctx );
93.fi
94.
c4ccbbf9 95.\"--------------------------------------------------------------------------
d056fbdf 96.SH DESCRIPTION
c4ccbbf9 97.
d056fbdf
MW
98The header file
99.B "<mLib/bench.h>"
100provides declarations and defintions
101for performing low-level benchmarks.
102.PP
103The `main event' is
104.BR bench_measure .
105This function will be described in detail later,
106but, in brief,
107it calls a caller-provided function,
108instructing it to run adaptively chosen numbers of iterations,
109in order to get a reasonably reliable measurement of its running time,
110and then reports its results by filling in a structure.
111.PP
112With understanding this function as our objective,
113we must examine all of the pieces involved in making it work.
114.
115.SS Timers in general
116A
117.I timer
118is a gadget which is capable of reporting the current time,
119in seconds (ideally precise to tiny fractions of a second),
120and/or in CPU cycles.
121A timer is represented by a pointer to an object of type
122.BR "struct bench_timer" .
123This structure has a single member,
124.BR ops ,
125pointing to a
126.BR "struct bench_timerops" ,
127which is a table of function pointers;
128typically, a timer has more data following this,
129but this fact is not exposed to applications.
130.PP
131The function pointers in
132.B "struct bench_timerops"
133are as follows.
134The first argument,
135named
136.I tm
137must always point to the timer object itself.
138.TP
139.IB tm ->ops->describe( tm ", " d)
140Write a description of the timer to the dynamic string
141.IR d .
142.TP
143.IB tm ->ops->now( tm ", " t_out)
144Store the current time in
145.IR t_out .
146The
147.B struct bench_time
148used to represent the time reported by a timer
149is described in detail below.
150.TP
151.IB tm ->ops->destroy( tm )
152Destroy the timer,
153releasing all of the resources that it holds.
154.PP
155A time, a reported by a timer, is represented by the
156.BR "struct bench_time" .
157A passage-of-time measurement is stored in the
158.B s
159and
160.B ns
161members, holding seconds and nanoseconds respectively.
162(A timer need not have nanosecond precision.
163The exact interpretation of the time \(en
164e.g., whether it measures wallclock time,
165user-mode CPU time,
166or total thread CPU time \(en
167is a matter for the specific timer implementation.)
168A cycle count is stored in the
169.B cy
170member.
171The
172.B f
173member stores flags:
174.B BTF_TIMEOK
175is set if the passage-of-time measurement
176.B s
177and
178.B ns
179are valid; and
180.B BTF_CYOK
181is set if the cycle count
182.B cy
183is valid.
184Neither the time nor the cycle count need be measured
185relative to any particular origin.
186The mask
187.B BTF_ANY
188covers the
189.B BTF_TIMEOK
190and
191.B BTF_CYOK
192bits:
193hence,
194.IB f &BTF_ANY
195is nonzero (true)
196if the timer returned any valid timing information.
197.
198.SS The built-in timer
199The function
200.B bench_createtimer
201constructs and returns a timer.
202It takes a single argument,
203a string
204.IR config ,
205from which it reads configuration information.
206If
207.B bench_createtimer
208fails, it returns a null pointer.
209.PP
210The
211.I config
212pointer may safely be null,
213in which case a default configuration will be used.
214Applications
215.I should only
216set this pointer to a value supplied by a user,
217e.g., through a command-line argument,
218environment variable, or
219configuration file.
220.PP
221The built-in timer makes use of one or two
222.IR subtimers :
223a `clock' subtimer to measure the passage of time,
224and possibly a `cycle' subtimer to count CPU cycles.
225.PP
226The configuration string consists of a sequence of words
227separated by whitespace.
228There may be additional whitespace at the start and end of the string.
229The words recognized are as follows.
230.TP
231.B list
232Prints a list of the available clock and cycle subtimers
233to standard output.
234.TP
235.BI clock= t , ...
236Use the first of the listed clock subtimers
237to initialize successfully
238as the clock subtimer.
239If none of the subtimers can be initialized,
240then construction of the timer as a whole fails.
241.TP
242.BI cycle= t , ...
243Use the first of the listed subtimers
244to initialize successfully
245as the cycle subtimer.
246If none of the subtimers can be initialized,
247then construction of the timer as a whole fails.
248.PP
249The clock subtimers are as follows.
250Not all of them will be available on every platform.
251.TP
252.B posix-thread-cputime
253Measures the passage of time using
254.BR clock_gettime (2),
255specifying the
256.B CLOCK_\%THREAD_\%CPUTIME_\%ID
257clock.
258.TP
259.B stdc-clock
260Measures the passage of time using
261.BR clock (3).
262Since
263.BR clock (3)
264is part of the original ANSI\ C standard,
265this subtimer should always be available.
266However, it may produce unhelpful results
267if other threads are running.
268.PP
269The cycle subtimers are as follows.
270Not all of them will be available on every platform.
271.TP
272.B linux-perf-event
273Counts CPU cycles using the Linux-specific
274.BR perf_event_open (2)
275function to read the
276.BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES
277counter.
278Only available on Linux.
279It will fail to initialize
280if access to performance counters is restricted,
281e.g., because the
282.B /proc/sys/kernel/perf_event_paranoid
283level is too high.
284.TP
285.B x86-rdtsc
286Counts CPU cycles using the x86
287.B rdtsc
288instruction.
289This instruction is not really suitable for performance measurement:
290it gives misleading results on CPUs with variable clock frequency.
291.TP
292.B null
293A dummy cycle counter,
294which will initialize successfully
295and then fail to report cycle counts.
296This is a reasonable fallback in many situations.
297.PP
298The built-in preference order for clock subtimers,
299from most to least preferred, is
300.B posix-thread-cputime
301followed by
302.BR stdc-clock .
303The built-in preference order for cycle subtimers,
304from most to least preferred, is
305.B linux-perf-event
306followed by
307.BR x86-rdtsc ,
308and then
309.BR null .
310.
311.SS The benchmark state
312A
313.I benchmark state
314tracks the information needed to measure performance of functions.
315It is represented by a
316.B struct bench_state
317structure.
318.PP
319The benchmark state is initialized by calling
320.BR bench_init ,
321passing the address of the state structure to be initialized,
322and a pointer to a timer.
323If
324.B bench_init
325is called with a non-null timer pointer,
326then it will not fail;
327the benchmark state will be initialized,
328and the function returns zero.
329If the timer pointer is null,
330then
331.B bench_init
332attempts to construct a timer for itself
333by calling
334.BR bench_createtimer .
335If this succeeds,
336then the benchmark state will be initialized,
337and the function returns zero.
338In both cases,
339the timer becomes owned by the benchmark state:
340calling
341.B bench_destroy
342on the benchmark state will destroy the timer.
343If
344.B bench_init
345is called with a null timer pointer,
346and its attempt to create a timer for itself fails,
347then
348.B bench_init
349returns \-1;
350the benchmark state is not initialized
351and can safely be discarded;
352calling
353safe to call
354.B bench_destroy
355on the unsuccessfully benchmark state is safe and has no effect.
356.PP
357Calling
358.B bench_destroy
359on a benchmark state
360releases any resources it holds,
361most notably its timer, if any.
362.PP
363Although
364.B struct bench_state
365is defined in the header file,
366only two members are available for use by applications.
367.TP
368.B f
369A word containing flags.
370.TP
371.B target_s
372The target time for which to try run a benchmark, in seconds.
373After initialization, this is set to 1.0,
374though applications can override it.
375.PP
376Before the benchmark state can be used in measurements,
377it must be
378.IR calibrated .
379This is performed by calling
380.B bench_calibrate
381on the benchmark state.
382Calibration takes a noticeable amount of time
383(currently about 0.25\*,s),
384so it makes sense to defer it until it's known to be necessary.
385.PP
386Calibration is carried out separately, but in parallel,
387for the timer's passage-of-time measurement and cycle counter.
388Either or both of these calibrations can succeed or fail;
389if passage-of-time calibration fails,
390then cycle count calibration is impossible.
391.PP
392When it completes,
393.B bench_calibrate
394sets flag in the benchmark state's
395.B f
396member:
397if passage-of-time calibration succeeded,
398.B BTF_TIMEOK
399is set;
400if cycle-count calibration succeeded,
401.B BTF_CYOK
402is set;
403and the flag
404.B BTF_CLB
405is set unconditionally,
406as a persistent indication that calibration has been attempted.
407.PP
408The
409.B bench_calibrate
410function returns zero if it successfully calibrated
411at least the passage-of-time measurement;
412otherwise, it returns \-1.
413If
414.B bench_calibrate
415is called for a second or subsequent time on the same benchmark state,
416it returns immediately,
417either returning 0 or \-1
418according to whether passage-of-time had previously been calibrated.
419.
420.SS Timing functions
421A
422.I benchmark function
423has the signature
424.IP
425.BI "void " fn "(unsigned long " n ", void *" ctx );
426.PP
427When called, it should perform the operation to be measured
428.I n
429times.
430The
431.I ctx
432argument is a pointer passed into
433.B bench_measure
434for the benchmark function's own purposes.
435.PP
436The function
437.B bench_measure
438receives five arguments.
439.TP
440.I b
441points to the benchmark state to be used.
442.TP
443.I t_out
444is the address of a
445.BR struct bench_timing
446in which the measurement should be left.
447This structure is described below.
448.TP
449.I base
450is a count of the number of operations performed
451by each iteration of the benchmark function.
452.TP
453.I fn
454is a benchmark function, described above.
455.TP
456.I ctx
457is a pointer to be passed to the benchmark function.
458.B bench_measure
459does not interpret this pointer in any way.
460.PP
461The
462.B bench_measure
463function calls its benchark function repeatedly
464with different iteration counts
465.IR n ,
466with the objective that the call take approximately
467.B target_s
468seconds, as established in the benchmark state.
469(Currently, if
470.B target_s
471holds the value
472.IR t ,
473then
474.B bench_measure
475is satisfied when a call takes at least
476.IR t /\(sr2\*,s.)
477Once the function finds a satisfactory number of iterations,
478it stores the results in
479.BI * t_out \fR.
480If measurement succeeds, then
481.B bench_measure
482returns zero.
483If it fails \(en
484most likely because the timer failed \(en
485then it returns \-1.
486.PP
487A
488.B bench_timing
489structure reports the outcome of a successful measurement.
490It has four members.
491.TP
492.B f
493A flags word.
494.B BTF_TIMEOK
495is set if the passage-of-time measurement in
496.B t
497is valid;
498.B BTF_CYOK
499is set if the cycle count in
500.B cy
501is valid.
502.TP
503.B n
504The number of iterations performed by the benchmark function
505on its satisfactory run,
506multiplied by
507.IR base .
508.TP
509.B t
510The time taken for the satisfactory run of the benchmark function,
511in seconds.
512Only valid if
513.B BTF_TIMEOK
514is set in
515.BR f .
516.TP
517.B cy
518The number of CPU cycles used
519in the satisfactory run of the benchmark function,
520in seconds.
521Only valid if
522.B BTF_CYOK
523is set in
524.BR f .
525.
c4ccbbf9 526.\"--------------------------------------------------------------------------
d056fbdf 527.SH "SEE ALSO"
c4ccbbf9
MW
528.
529.BR tvec-bench (3),
d056fbdf
MW
530.BR mLib (3).
531.
c4ccbbf9 532.\"--------------------------------------------------------------------------
d056fbdf 533.SH AUTHOR
c4ccbbf9 534.
d056fbdf 535Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
536.
537.\"----- That's all, folks --------------------------------------------------