@@@ doc wip
[mLib] / test / bench.3.in
CommitLineData
d056fbdf 1.\" -*-nroff-*-
c4ccbbf9
MW
2.\"
3.\" Manual for benchmarking core
4.\"
5.\" (c) 2024 Straylight/Edgeware
6.\"
7.
8.\"----- Licensing notice ---------------------------------------------------
9.\"
10.\" This file is part of the mLib utilities library.
11.\"
12.\" mLib is free software: you can redistribute it and/or modify it under
13.\" the terms of the GNU Library General Public License as published by
14.\" the Free Software Foundation; either version 2 of the License, or (at
15.\" your option) any later version.
16.\"
17.\" mLib is distributed in the hope that it will be useful, but WITHOUT
18.\" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19.\" FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20.\" License for more details.
21.\"
22.\" You should have received a copy of the GNU Library General Public
23.\" License along with mLib. If not, write to the Free Software
24.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25.\" USA.
26.
27.\"--------------------------------------------------------------------------
28.so ../defs.man \" @@@PRE@@@
29.
30.\"--------------------------------------------------------------------------
31.TH bench 3mLib "9 March 2024" "Straylight/Edgeware" "mLib utilities library"
d056fbdf 32.\" @bench_createtimer
289651a7
MW
33.\" @BENCH_TIMELOOP_DECLS
34.\" @BENCH_TIMELOOP_TAG
35.
d056fbdf
MW
36.\" @bench_init
37.\" @bench_destroy
38.\" @bench_calibrate
289651a7
MW
39.\" @bench_preflight
40.\" @bench_adapt
41.\" @bench_adjust
42.\" @BENCH_MEASURE_DECLS
43.\" @BENCH_MEASURE_TAG
44.\" @BENCH_MEASURE
d056fbdf
MW
45.\" @bench_measure
46.
289651a7
MW
47.\" @bench_report
48.
c4ccbbf9
MW
49.\"--------------------------------------------------------------------------
50.SH NAME
51bench \- low-level benchmarking tools
52.
53.\"--------------------------------------------------------------------------
d056fbdf 54.SH SYNOPSIS
c4ccbbf9 55.
d056fbdf
MW
56.nf
57.B "#include <mLib/bench.h>"
58.PP
289651a7
MW
59.B "#define BTF_TIMEOK ..."
60.B "#define BTF_CYOK ..."
61.B "#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)"
62.PP
6e683a79 63.ta 2n +2n +2n
d056fbdf
MW
64.B "struct bench_time {"
65.B " unsigned f;"
6e683a79
MW
66.B " union {"
67.B " struct { kludge64 s; uint32 ns; } ts;"
68.B " clock_t clk;"
69.B " kludge64 rawns;"
70.B " } t;"
d056fbdf
MW
71.B " kludge64 cy;"
72.B "};"
73.PP
74.B "struct bench_timing {"
75.B " unsigned f;"
76.B " double n;"
77.B " double t;"
78.B " double cy;"
79.B "};"
80.PP
6e683a79
MW
81.B "#define BTF_T0 0u"
82.B "#define BTF_T1 ..."
d056fbdf
MW
83.B "struct bench_timerops {"
84.BI " void (*describe)(struct bench_timer *" bt ", dstr *" d );
b1a20bee 85.BI " int (*preflight)(struct bench_timer *" bt );
6e683a79
MW
86.ta 2n +\w'\fBint (*now)('u
87.BI " int (*now)(struct bench_timer *" bt ,
88.BI " struct bench_time *" t_out ", unsigned " f );
289651a7 89.ta 2n +\w'\fBvoid (*diff)('u
6e683a79
MW
90.BI " void (*diff)(struct bench_timer *" bt ,
91.BI " struct bench_timing *" delta_out ,
92.BI " const struct bench_time *" t0 ,
93.BI " const struct bench_time *" t1 );
d056fbdf
MW
94.BI " void (*destroy)(struct bench_timer *" bt );
95.B "};"
96.B "struct bench_timer {"
97.B " const struct bench_timerops *ops;"
289651a7 98.B " unsigned ref;"
d056fbdf
MW
99.B "};"
100.PP
289651a7
MW
101.B "struct bench_timer *bench_createtimer(void);"
102.B "BENCH_TIMELOOP_DECLS;"
103.ta 2n \w'\fBBENCH_TIMELOOP_TAG('u
104.BI "BENCH_TIMELOOP_TAG(" tag ", struct bench_timer *" tm ,
105.BI " struct bench_timing *" delta_out ", double " n ,
106.BI " " onbreak )
107.BI " " stmt
108.PP
109.B "#define BTF_CLB ..."
110.B "#define BTF_INDIRECT ..."
111.PP
112.ta 2n
d056fbdf
MW
113.B "struct bench_state {"
114.B " unsigned f;"
115.B " double target_s;"
116.B " ..."
117.B "}";
118.PP
119.BI "typedef void bench_fn(unsigned long " n ", void *" ctx );
120.PP
d056fbdf
MW
121.BI "int bench_init(struct bench_state *" b ", struct bench_timer *" tm );
122.BI "void bench_destroy(struct bench_state *" b );
289651a7
MW
123.BI "int bench_calibrate(struct bench_state *" b ", unsigned " f );
124.BI "int bench_preflight(struct bench_state *" b );
125.ta \w'\fBint bench_adapt('u
126.BI "int bench_adapt(struct bench_state *" b ", double *" n_inout ,
127.BI " const struct bench_timing *" t );
128.ta \w'\fBint bench_adjust('u
129.BI "int bench_adjust(struct bench_state *" b ", struct bench_timing *" t_inout ,
130.BI " double " n ", double " base );
131.B "BENCH_MEASURE_DECLS;"
132.ta 2n \w'\fBBENCH_MEASURE_TAG('u
133.BI "BENCH_MEASURE_TAG(" tag ", struct bench_state *" b ,
134.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae )
135.BI " " stmt
136.ta 2n \w'\fBBENCH_MEASURE('u
137.BI "BENCH_MEASURE(struct bench_state *" b ,
138.BI " int &" rc ", struct bench_timing *" t_out ", double " bsae )
139.BI " " stmt
d056fbdf
MW
140.ta \w'\fBint bench_measure('u
141.BI "int bench_measure(struct bench_state *" b ", struct bench_timing *" t_out ,
142.BI " double " base ", bench_fn *" fn ", void *" ctx );
289651a7
MW
143.PP
144.ta 2n
145.B "enum {"
146.B " BTU_OP = 0,"
147.B " BTU_BYTE = 1,"
148.B " ..."
149.BI " BTU_LIMIT = " n
150.B "};"
151.ta \w'\fBvoid bench_report('u
152.BI "void bench_report(const struct gprintf_ops *" gops ", void *" go ,
153.BI " unsigned " unit ", const struct bench_timing *" t );
154.PP
d056fbdf
MW
155.fi
156.
c4ccbbf9 157.\"--------------------------------------------------------------------------
d056fbdf 158.SH DESCRIPTION
c4ccbbf9 159.
d056fbdf
MW
160The header file
161.B "<mLib/bench.h>"
162provides declarations and defintions
163for performing low-level benchmarks.
164.PP
289651a7
MW
165The `main event' are the
166.B BENCH_MEASURE
167macro and
168.B bench_measure
169function.
170These will be described in detail later,
d056fbdf 171but, in brief,
289651a7 172they execute a caller-provided piece of code
d056fbdf
MW
173instructing it to run adaptively chosen numbers of iterations,
174in order to get a reasonably reliable measurement of its running time,
289651a7 175and then report the results by filling in a structure.
d056fbdf 176.PP
289651a7
MW
177With understanding these as our objective,
178we must examine all of the pieces involved in making them work.
d056fbdf
MW
179.
180.SS Timers in general
181A
182.I timer
183is a gadget which is capable of reporting the current time,
184in seconds (ideally precise to tiny fractions of a second),
185and/or in CPU cycles.
186A timer is represented by a pointer to an object of type
187.BR "struct bench_timer" .
289651a7 188This structure has two members:
d056fbdf
MW
189.BR ops ,
190pointing to a
191.BR "struct bench_timerops" ,
289651a7
MW
192which is a table of function pointers,
193and
194.BR ref ,
195which is a simple reference count;
d056fbdf
MW
196typically, a timer has more data following this,
197but this fact is not exposed to applications.
198.PP
199The function pointers in
200.B "struct bench_timerops"
201are as follows.
202The first argument,
203named
204.I tm
205must always point to the timer object itself.
206.TP
207.IB tm ->ops->describe( tm ", " d)
208Write a description of the timer to the dynamic string
209.IR d .
289651a7 210.TP
b1a20bee
MW
211.IB tm ->ops->preflight( tm )
212Ensure that the timer is in working order,
213and perform any necessary per-thread or per-process setup.
214Return zero if the
215.B now
216function is likely to work properly
217when called from the same thread
218in the near future;
219otherwise return \-1.
d056fbdf 220.TP
6e683a79 221.IB tm ->ops->now( tm ", " t_out ", " f )
d056fbdf 222Store the current time in
6e683a79 223.BI * t_out \fR.
d056fbdf 224The
6e683a79
MW
225.B BTF_T1
226flag in
227.I f
228to indicate that this is the second call in a pair;
229leave it clear for the first call.
230(A fake
231.B BTF_T0
232flag is defined to be zero for symmetry.)
233Return zero on success
234.I or
235permanent failure;
236return \-1 if timing failed but
237trying again immediately has a reasonable chance of success.
238.TP
239.IB tm ->ops->diff( tm ", " delta_out ", " t0 ", " t1 )
240Store in
241.BI * delta_out
242the difference between the two times
243.I t0
244and
245.IR t1 .
d056fbdf
MW
246.TP
247.IB tm ->ops->destroy( tm )
248Destroy the timer,
249releasing all of the resources that it holds.
250.PP
289651a7
MW
251In a freshly-created timer, the
252.B ref
253member is 1.
254Applications are expected to handle the reference count themselves;
255the
256.B destroy
257function does not check or decrement the count.
258Code for destroying the timer when it's no longer needed
259might look like this.
260.VS
261if (!--tm->ref) tm->ops->destroy(tm);
262.VE
6e683a79
MW
263A
264.B bench_timing
265structure reports the difference between two times,
266as determined by a timer's
267.B diff
268function.
269It has four members.
270.TP
d056fbdf 271.B f
6e683a79 272A flags word.
d056fbdf 273.B BTF_TIMEOK
6e683a79
MW
274is set if the passage-of-time measurement in
275.B t
276is valid;
d056fbdf 277.B BTF_CYOK
6e683a79 278is set if the cycle count in
d056fbdf
MW
279.B cy
280is valid.
d056fbdf
MW
281The mask
282.B BTF_ANY
283covers the
284.B BTF_TIMEOK
285and
286.B BTF_CYOK
287bits:
288hence,
6e683a79 289.B f&BTF_ANY
d056fbdf
MW
290is nonzero (true)
291if the timer returned any valid timing information.
6e683a79
MW
292.TP
293.B n
289651a7 294The number of units processed the benchmark computation
6e683a79 295on its satisfactory run,
289651a7
MW
296multiplied by a given
297.IR base
298\(en see
299.BR BENCH_MEASURE ,
300.BR bench_measure ,
301and
302.BR bench_adjust .
6e683a79
MW
303.TP
304.B t
305The time taken for the satisfactory run of the benchmark function,
306in seconds.
307Only valid if
308.B BTF_TIMEOK
309is set in
310.BR f .
311.TP
312.B cy
313The number of CPU cycles used
314in the satisfactory run of the benchmark function,
315in seconds.
316Only valid if
317.B BTF_CYOK
318is set in
319.BR f .
320.PP
321A
322.B "struct bench_time"
5c0f2e08 323represents a single instant in time,
6e683a79
MW
324as captured by a timer's
325.B now
326function.
327The use of this structure is a private matter for the timer:
328the only hard requirement is that the
329.B diff
330function should be able to compute the difference between two times.
331However, the intent is that
332a passage-of-time measurement is stored in the
333.B t
334union,
335a cycle count is stored in the
336.B cy
337member, and
338the
339.B f
340member stores flags
341.B BTF_TIMEOK
342and or
343.B BTF_CYOK
344if the passage-of-time or cycle count respectively are valid.
289651a7
MW
345.PP
346The
347.B BENCH_TIMELOOP_TAG
348macro uses a timer to measure a number of iterations of a computation.
349It requires the declarations made by
350.B BENCH_TIMELOOP_DECLS
351to be in scope,
352ideally within an enclosing block
353(rather than at top-level,
354where they'll have static storage duration,
355and take longer to access).
356The macro's expansion is syntactically a statement head;
357see
358.BR control (3)
359for details about the underlying machinery.
360In more detail, the macro is invoked as
361.IP
362.nf
363.ta 2n
364.BI "BENCH_TIMELOOP_TAG(" tag ", " tm ", " delta_out ", " n ", " onbreak )
365.BI " " stmt
366.fi
367.PP
368The
369.I tag
370argument is used to distinguish
371the labels used internally by the macro:
372see
373.BR control (3)
374for details about tags.
375The macro calls on the timer
376.I tm
377to determine the initial time and cycle counts,
378performs
379.I n
380iterations of some computation,
381and calls on the timer a second time
382to determine the final time and cycle counts,
383and to store the difference in
384.BI * delta_out \fR.
385The
386.I stmt
387may be any C statement:
388when it is executed,
389the variable
390.BR _bench_n ,
391of type
392.BR "unsigned long" ,
393is in scope.
394The statement should perform
395.B _bench_n
396iterations of the computation to be measured
397\(en and do as little else as possible.
398The argument
399.I n
400to the macro
401may be larger than
402.BR ULONG_MAX :
403the macro will execute
404.I stmt
405multiple times if necessary.
406The statement is allowed to clobber
407.BR _bench_n .
408Note that
409.B BENCH_TIMELOOP_TAG
410does
411.I not
412call the timer's
413.B preflight
414function.
415If the
416.I stmt
417executes a free
418.B break
419statement
420then the statement
421.I onbreak
422is executed;
423a free
424.B continue
425statement within
426.I stmt
427currently does not have a useful behaviour.
428Free
429.B break
430and
431.B continue
432statements within
433.I onbreak
434behave normally.
435(See
436.BR control (3)
437for a definition of
438`free'
439.B break
440and
441.B continue
442statements.)
d056fbdf
MW
443.
444.SS The built-in timer
445The function
446.B bench_createtimer
447constructs and returns a timer.
448It takes a single argument,
449a string
450.IR config ,
451from which it reads configuration information.
452If
453.B bench_createtimer
454fails, it returns a null pointer.
455.PP
456The
457.I config
458pointer may safely be null,
459in which case a default configuration will be used.
460Applications
461.I should only
462set this pointer to a value supplied by a user,
463e.g., through a command-line argument,
464environment variable, or
465configuration file.
466.PP
467The built-in timer makes use of one or two
468.IR subtimers :
469a `clock' subtimer to measure the passage of time,
470and possibly a `cycle' subtimer to count CPU cycles.
471.PP
472The configuration string consists of a sequence of words
473separated by whitespace.
474There may be additional whitespace at the start and end of the string.
475The words recognized are as follows.
476.TP
477.B list
478Prints a list of the available clock and cycle subtimers
479to standard output.
480.TP
481.BI clock= t , ...
482Use the first of the listed clock subtimers
483to initialize successfully
484as the clock subtimer.
485If none of the subtimers can be initialized,
486then construction of the timer as a whole fails.
487.TP
488.BI cycle= t , ...
489Use the first of the listed subtimers
490to initialize successfully
491as the cycle subtimer.
492If none of the subtimers can be initialized,
493then construction of the timer as a whole fails.
494.PP
495The clock subtimers are as follows.
496Not all of them will be available on every platform.
497.TP
6e683a79
MW
498.B linux-x86-perf-rdpmc-hw-cycles
499This is a dummy companion to the similarly named cycle subtimer;
500see its description below.
501.TP
d056fbdf
MW
502.B posix-thread-cputime
503Measures the passage of time using
504.BR clock_gettime (2),
505specifying the
506.B CLOCK_\%THREAD_\%CPUTIME_\%ID
507clock.
508.TP
509.B stdc-clock
510Measures the passage of time using
511.BR clock (3).
512Since
513.BR clock (3)
514is part of the original ANSI\ C standard,
515this subtimer should always be available.
516However, it may produce unhelpful results
517if other threads are running.
518.PP
519The cycle subtimers are as follows.
520Not all of them will be available on every platform.
521.TP
6e683a79
MW
522.B linux-perf-read-hw-cycles
523Counts CPU cycles using the Linux-specific
d056fbdf
MW
524.BR perf_event_open (2)
525function to read the
526.BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES
527counter.
528Only available on Linux.
529It will fail to initialize
530if access to performance counters is restricted,
531e.g., because the
532.B /proc/sys/kernel/perf_event_paranoid
533level is too high.
534.TP
6e683a79
MW
535.B linux-perf-rdpmc-hw-cycles
536Counts CPU cycles using the Linux-specific
537.BR perf_event_open (2)
538function,
539as for
540.B linux-x86-perf-read-hw-cycles
541above,
542except that it additionally uses the i386/AMD64
d056fbdf 543.B rdtsc
6e683a79
MW
544and
545.B rdpmc
546instructions,
547together with information provided by the kernel
548through a memory-mapped page
549to do its measurements without any system call overheads.
550It does passage-of-time and cycle counting in a single operation,
551so no separate clock subtimer is required:
552the similarly-named clock subtimer does nothing
553except check that the
554.B linux-x86-perf-rdpmc-hw-cycles
555cycle subtimer has been selected.
289651a7
MW
556This is almost certainly the best choice if it's available;
557It is, however, not compatible with (at least some versions of)
558.BR valgrind (1);
559it will detect that it is running under
560.B valgrind
561and fail to initialize.
6e683a79
MW
562.TP
563.B x86-rdtscp
564Counts CPU cycles using the x86
565.B rdtscp
d056fbdf
MW
566instruction.
567This instruction is not really suitable for performance measurement:
568it gives misleading results on CPUs with variable clock frequency.
569.TP
6e683a79
MW
570.B x86-rdtsc
571Counts CPU cycles using the x86
572.B rdtsc
573instruction.
574This has the downsides of
575.B rdtscp
576above,
577but also fails to detect when the thread has been suspended
578or transferred to a different CPU core
579and gives misleading answers in this case.
580Not really recommended.
581.TP
d056fbdf
MW
582.B null
583A dummy cycle counter,
584which will initialize successfully
585and then fail to report cycle counts.
586This is a reasonable fallback in many situations.
587.PP
588The built-in preference order for clock subtimers,
589from most to least preferred, is
6e683a79 590.BR linux-x86-perf-rdpmc-hw-cycles ,
d056fbdf 591followed by
6e683a79
MW
592.BR posix-thread-cputime ,
593and finally
d056fbdf
MW
594.BR stdc-clock .
595The built-in preference order for cycle subtimers,
596from most to least preferred, is
6e683a79
MW
597.BR linux-x86-perf-rdpmc-hw-cycles
598then
599.BR linux-x86-perf-read-hw-cycles ,
d056fbdf 600followed by
6e683a79
MW
601.BR x86-rdtscp ,
602and
d056fbdf 603.BR x86-rdtsc ,
6e683a79 604and finally
d056fbdf
MW
605.BR null .
606.
607.SS The benchmark state
608A
609.I benchmark state
610tracks the information needed to measure performance of functions.
611It is represented by a
612.B struct bench_state
613structure.
614.PP
615The benchmark state is initialized by calling
616.BR bench_init ,
617passing the address of the state structure to be initialized,
618and a pointer to a timer.
619If
620.B bench_init
621is called with a non-null timer pointer,
622then it will not fail;
623the benchmark state will be initialized,
289651a7
MW
624and the function returns zero;
625the timer's reference count is
626.I not
627incremented.
d056fbdf
MW
628If the timer pointer is null,
629then
630.B bench_init
631attempts to construct a timer for itself
632by calling
633.BR bench_createtimer .
634If this succeeds,
635then the benchmark state will be initialized,
636and the function returns zero.
637In both cases,
289651a7 638the timer reference becomes owned by the benchmark state:
d056fbdf
MW
639calling
640.B bench_destroy
289651a7
MW
641on the benchmark state will decrement the timer's reference count,
642and destroy it unless it has additional outstanding references.
d056fbdf
MW
643If
644.B bench_init
645is called with a null timer pointer,
646and its attempt to create a timer for itself fails,
647then
648.B bench_init
289651a7 649returns \-1:
d056fbdf 650the benchmark state is not initialized
289651a7 651and can safely be discarded.
d056fbdf
MW
652.PP
653Calling
654.B bench_destroy
655on a benchmark state
656releases any resources it holds,
657most notably its timer, if any.
289651a7
MW
658Calling
659.B bench_destroy
660on an unsuccessfully initialized benchmark state
661is safe but has no effect.
d056fbdf
MW
662.PP
663Although
664.B struct bench_state
665is defined in the header file,
666only two members are available for use by applications.
667.TP
668.B f
669A word containing flags.
670.TP
671.B target_s
672The target time for which to try run a benchmark, in seconds.
673After initialization, this is set to 1.0,
674though applications can override it.
675.PP
676Before the benchmark state can be used in measurements,
677it must be
678.IR calibrated .
679This is performed by calling
680.B bench_calibrate
681on the benchmark state.
682Calibration takes a noticeable amount of time
683(currently about 0.25\*,s),
684so it makes sense to defer it until it's known to be necessary.
685.PP
686Calibration is carried out separately, but in parallel,
687for the timer's passage-of-time measurement and cycle counter.
688Either or both of these calibrations can succeed or fail;
689if passage-of-time calibration fails,
690then cycle count calibration is impossible.
691.PP
289651a7
MW
692The benchmarking state must be calibrated differently
693for different kinds of timing loop;
694this is controlled by the flags passed as the
695.I f
696argument to
697.BR bench_calibrate .
698The main difference lies in whether the code to be measured
699is called
700.IR indirectly ,
701i.e., via a function pointer.
702Set
703.B BTF_INDIRECT
704if the code is to be called indirectly;
705leave this flag clear if the code is called directly.
706The
707.B bench_measure
708function always makes indirect calls;
709the
710.B BENCH_MEASURE
711macro does not itself make indirect calls.
712Usually, a program needs only one or the other;
713if both are necessary for some reason,
714the best approach is just to set up two benchmarking states
715sharing the same timer,
716and calibrate them separately.
717.PP
d056fbdf
MW
718When it completes,
719.B bench_calibrate
289651a7 720sets flags in the benchmark state's
d056fbdf
MW
721.B f
722member:
723if passage-of-time calibration succeeded,
724.B BTF_TIMEOK
725is set;
726if cycle-count calibration succeeded,
727.B BTF_CYOK
728is set;
729and the flag
730.B BTF_CLB
731is set unconditionally,
732as a persistent indication that calibration has been attempted.
733.PP
734The
735.B bench_calibrate
736function returns zero if it successfully calibrated
737at least the passage-of-time measurement;
738otherwise, it returns \-1.
739If
740.B bench_calibrate
741is called for a second or subsequent time on the same benchmark state,
742it returns immediately,
743either returning 0 or \-1
744according to whether passage-of-time had previously been calibrated.
289651a7
MW
745.PP
746The
747.B BENCH_MEASURE
748macro measures the performance of a computation.
749It requires the declarations made by
750.B BENCH_MEASURE_DECLS
751to be in scope,
752ideally within an enclosing block
753(rather than at top-level,
754where they'll have static storage duration,
755and take longer to access).
756The macro's expansion is syntactically a statement head;
757see
758.BR control (3)
759for details about the underlying machinery.
760In more detail, the macro is invoked as
761.IP
762.nf
763.ta 2n
764.BI "BENCH_MEASURE(" b ", " rc ", " t_out ", " base )
765.BI " " stmt
766.fi
767.PP
768The
769.I stmt
770can be any C statement;
771it should perform
772.B _bench_n
773iterations of the computation to be measured.
774(The variable
775.B _bench_n
776is declarared as part of
777.B BENCH_MEASURE_DECLS
778and has type
779.BR "unsigned long" .
780Before commencing measurement proper,
781the macro calls
782.BR bench_preflight ,
783described below,
784to check that everything is set up properly
785for measurements on the current thread;
786if this fails, then the macro sets
787.I rc
788to \-1.
789Otherwise, the macro executes
790.I stmt
791one or more times,
792with the objective of finding an iteration count
793.I n
794such that
795.I n
796iterations of the computation take more than
797.IB b ->target_s "" \fR/\(sr2
798seconds.
799If measurement fails,
800then
801.I rc
802is set to \-1;
803otherwise,
804.I rc
805is set to zero, and
806.BI * t_out
807is filled in with the measurement;
808.IB t_out ->n
809is set to
810.IR n "\ \(mu\ " base .
811.PP
812The
813.B BENCH_MEASURE_TAG
814macro works just like
815.B BENCH_MEASURE
816except that it takes an additional
817.I tag
818argument used to distinguish the internal labels
819used by the macro's implementation;
820this makes it possible to use
821.B BENCH_MEASURE_TAG
822as a component in more complex macros.
823See
824.BR control (3)
825for details about control-structure macros
826and the meaning and format of tags.
827.PP
828The function
829.B bench_measure
830is similar,
831except that it calls a
d056fbdf 832.I benchmark function
289651a7
MW
833to perform the computation.
834A benchmark function has the signature
d056fbdf
MW
835.IP
836.BI "void " fn "(unsigned long " n ", void *" ctx );
837.PP
838When called, it should perform the operation to be measured
839.I n
840times.
841The
842.I ctx
843argument is a pointer passed into
844.B bench_measure
845for the benchmark function's own purposes.
289651a7 846The
d056fbdf 847.B bench_measure
289651a7
MW
848function returns zero on success,
849or \-1 on failure.
850Note that
d056fbdf 851.B bench_measure
289651a7
MW
852invokes the benchmark indirectly,
853so the benchmark state should have been calibrated with
854.BR BTF_INDIRECT .
855.
856.SS Measurement utilities
857The following functions are primarily exported for the benefit of the
858.B BENCH_MEASURE
859macro,
860but are documented here in case they are useful.
d056fbdf
MW
861.PP
862The
289651a7
MW
863.B bench_preflight
864function prepares a benchmarking state for use.
865It checks that the timer is calibrated
866and suitable for measuring passage-of-time;
867it also calls the timer's
868.B preflight
869function to prepare it for measurements on the current thread.
870If these checks succeed, then
871.B bench_preflight
872returns zero;
873otherwise it returns \-1
874and the caller should not proceed with measurements.
875.PP
876The
877.B bench_adapt
878function is used to determine iteration counts.
879It is used in a loop such as the following.
880.IP
881.nf
882.ta 2n +2n
883.B "BENCH_TIMELOOP_DECLS;"
884.B "struct bench_timer *tm;"
885.B "struct bench_timing t;"
886.B "double n = 1.0, target_s = 1.0;"
887.IP
888.B "do {"
889.B " BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })"
890.BI " " "(do " _bench_n " iterations of some computation)" ;
891.B "} while (!bench_adapt(&n, target_s, &t));"
892.fi
893.PP
894On entry,
895.BI *n_inout
896should be the number of iterations performed by the previous step,
897and
898.BI * t
899the resulting time;
900the
901.B BTF_TIMEOK
902flag must be set in
903.IB t ->f \fR.
904If the timing is sufficient \(en if
905.IR t\fB->t "\ \*(>=\ " target_s /\(sr2
906\(en then
907.B bench_adapt
908returns a nonzero value to indicate that measurement is complete.
909Otherwise, it sets
910.BI * n_inout
911to a new, larger iteration count
912and returns zero to indicate that a further pass is necessary.
913.PP
914The
915.B bench_adjust
916function adjusts a raw timing,
917as captured by
918.BR BENCH_TIMELOOP_TAG ,
919according to the calibration data captured in
920.IR b .
921On exit, the timing data is updated,
922and
923.IB t ->n
924is set to the product
925.IR n "\ \(mu\ " base .
926.
927.SS Reporting results
928The
929.B bench_report
930function formats a measurement result
931into a human-readable string.
932The function writes its output using the
933generalized output formatting operations
934.I gops
935and output pointer
936.IR go ;
937see
938.BR gprintf (3)
939for details on generalized output formatting.
940The
941.I unit
942argument describes the unit of activity being measured:
943.TP
944.B BTU_OP
945counts operations of some unspecified nature, while
946.TP
947.B BTU_BYTE
948counts a number of bytes processed.
949.PP
950These are presented differently
951\(em in particular,
952quantities bytes are expressed using binary scaling rather than decimal.
953The timing to report is given by the
954.I t
955argument;
956.IB t ->n
957gives the number of units processed.
958.
959.\"--------------------------------------------------------------------------
960.SH EXAMPLE
d056fbdf 961.
289651a7
MW
962The following macros offer a fairly simple example of
963how the benchmarking functions and macros can be used.
964.VS
965.ta 2n +2n +2n 2n+\w'\fBBENCH_MEASURE_TAG('u \n(.lu-\n(.iu-4n
966#define BENCHMARK_DECLS \e
967 struct bench_timing _bmark_t; \e
968 int _bmark_rc; \e
969 BENCH_MEASURE_DECLS
970.VP
971#define BENCHMARK_TAG(tag, b, unit, base) \e
972 MC_BEFORE(tag##__benchmark_before, { fflush(stdout); }) \e
973 MC_AFTER(tag##__benchmark_after, { \e
974 if (_bmark_rc) \e
c752173d 975 puts(": FAILED"); \e
289651a7
MW
976 else { \e
977 fputs(": ", stdout); \e
978 bench_report(&file_printops, stdout, (unit), &_bmark_tm);\ \e
c752173d 979 putchar('\en'); \e
289651a7
MW
980 } \e
981 }) \e
982 BENCH_MEASURE_TAG(tag##__bmarkmark_measure, \e
983 (b), _bmark_rc, &_bmark_t, (base))
984#define BENCHMARK(b, unit, base) \e
985 BENCHMARK_TAG(bench, b, unit, base)
986.VE
987
c4ccbbf9 988.\"--------------------------------------------------------------------------
d056fbdf 989.SH "SEE ALSO"
c4ccbbf9 990.
289651a7
MW
991.BR control (3),
992.BR macros (3),
c4ccbbf9 993.BR tvec-bench (3),
d056fbdf
MW
994.BR mLib (3).
995.
c4ccbbf9 996.\"--------------------------------------------------------------------------
d056fbdf 997.SH AUTHOR
c4ccbbf9 998.
d056fbdf 999Mark Wooding, <mdw@distorted.org.uk>
c4ccbbf9
MW
1000.
1001.\"----- That's all, folks --------------------------------------------------