[mLib] / test / bench.h

/* -*-c-*-
 *
 * Benchmarking support
 *
 * (c) 2023 Straylight/Edgeware
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of the mLib utilities library.
 *
 * mLib is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Library General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * mLib is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with mLib.  If not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
 * USA.
 */

#ifndef MLIB_BENCH_H
#define MLIB_BENCH_H

#ifdef __cplusplus
  extern "C" {
#endif

/*----- Header files ------------------------------------------------------*/

#include <limits.h>
#include <time.h>

#ifndef MLIB_BITS_H
#  include "bits.h"
#endif

#ifndef MLIB_CONTROL_H
#  include "control.h"
#endif

#ifndef MLIB_DSTR_H
#  include "dstr.h"
#endif

#ifndef MLIB_MACROS_H
#  include "macros.h"
#endif

#ifndef MLIB_GPRINTF_H
#  include "gprintf.h"
#endif

/*----- Timers ------------------------------------------------------------*/

struct bench_time {
  unsigned f;				/* flags */
#define BTF_TIMEOK 1u			/*   @s@ ad @ns@ slots are value */
#define BTF_CYOK 2u			/*   @cy@ slot is valid */
#define BTF_ANY (BTF_TIMEOK | BTF_CYOK)	/*   some part is useful */
  union {
    struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */
    clock_t clk;			/* @clock@ */
    kludge64 rawns;			/* raw nanosecond count */
  } t;					/* time */
  kludge64 cy;				/* count of CPU cycles */
};

struct bench_timing {
  unsigned f;				/* flags (@BTF_...@) */
  double n, t, cy;			/* count, time, and cycles */
};

struct bench_timer {
  const struct bench_timerops *ops;
  unsigned ref;
};

struct bench_timerops {
  void (*describe)(struct bench_timer */*bt*/, dstr */*d*/);
    /* Write a description of the timer to @d@. */

  int (*preflight)(struct bench_timer */*bt*/);
    /* Return zero if timer is ready to go, or %$-1$% otherwise.  The @now@
     * function will only be called following a successful @preflight@ on the
     * same thread.
     */

  int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/,
	      unsigned /*f*/);
#define BTF_T0 0u			/* fetching first time of a pair */
#define BTF_T1 1u			/* fetching second time of a pair */
    /* Fill in @*t_out@ with the current time.  Return zero on success
     * %%\emph{or} permanent failure; return %$-1$% on temporary failure.
     */

  void (*diff)(struct bench_timer */*bt*/,
	       struct bench_timing */*delta_out*/,
	       const struct bench_time */*t0*/,
	       const struct bench_time */*t1*/);
    /* Subtract the time @t0@ from the time @t1@, leaving the result in
     * @*delta_out@, setting flags as appropriate.
     */

  void (*destroy)(struct bench_timer */*bt*/);
    /* Release the timer and any resources it holds. */
};

/* --- @bench_createtimer@ --- *
 *
 * Arguments:	@const char *config@ = user-supplied configuration string
 *
 * Returns:	A freshly constructed standard timer object, or null on
 *		failure.
 *
 * Use:		Allocate a timer.  Dispose of it by calling
 *		@tm->ops->destroy(tm)@ when you're done.
 *
 *		Applications should not set configuration strings except as
 *		established by user action, e.g., from a command-line option,
 *		environment variable, or configuration file.
 */

extern struct bench_timer *bench_createtimer(const char *config);

/* --- @BENCH_TIMELOOP_DECLS@ --- *
 *
 * Arguments:	---
 *
 * Use:		Expands to the declarations needed by @BENCH_TIMELOOP_TAG@.
 *		These should be at block scope, not at toplevel.
 */

#define BENCH_TIMELOOP_DECLS						\
  struct bench_timer *_bench_tm;					\
  struct bench_time _bench_t0, _bench_t1;				\
  unsigned long _bench_n, _bench_n1

/* --- @BENCH_TIMELOOP_TAG@ --- *
 *
 * Arguments:	@tag@ = control structure tag
 *		@struct bench_state *tm@ = benchmark timer
 *		@struct bench_timing *delta_out@ = where to put the result
 *		@double n@ = number of iterations
 *
 * Returns:	---
 *
 * Use:		@BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
 *		@n@ times, capturing the time and/or CPU cycles taken in
 *		@*delta_out@.  The iteration count must be no more than the
 *		%%\emph{square}%% of @ULONG_MAX@.  If @stmt@ executes a free
 *		@break@ statement, then the containing loop -- which must
 *		exist -- is exited.
 *
 *		This macro is not intended to be used directly by users: it
 *		is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
 */

#define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak)		\
  MC_BEFORE(tag##__benchtl_setup, {					\
    double _R = ULONG_MAX; double _n = (n);				\
									\
    _bench_tm = (tm);							\
    if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; }			\
    else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; }		\
  })									\
  MC_TARGET(tag##__benchtl_break, { break; })				\
  MC_AFTER(tag##__benchtl_end, {					\
    _bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \
  })									\
  MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--)			\
    MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; })		\
    MC_DOWHILE(tag##__benchtl_t1,					\
	       _bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1))	\
      MC_WRAP(tag##__benchtl_t0,					\
	{ while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \
	{ ; },								\
	{ MC_GOTARGET(tag##__benchtl_break); })

/*----- Measuring ---------------------------------------------------------*/

struct bench_state {
  struct bench_timer *tm;		/* a timer */
  double target_s;			/* target time to run benchmarks */
  unsigned f;				/* calibration flags (@BTF_...@) */
#define BTF_CLB 0x0100			/*   tried calibrating */
  struct { double m, c; } clk, cy;	/* calculated overheads */
};

typedef void bench_fn(unsigned long /*n*/, void */*ctx*/);
  /* Run the benchmark @n@ times, given a context pointer @ctx@. */

/* --- @bench_init@ --- *
 *
 * Arguments:	@struct bench_state *b@ = bench state to initialize
 *		@struct bench_timer *tm@ = timer to attach, or null
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Initialize the benchmark state.  On success, the timer state
 *		still needs to be calibrated (use @bench_calibrate@) before
 *		it can be used, but this will be done automatically by
 *		@bench_measure@ if it's not done by hand earlier.  The timer
 *		is now owned by the benchmark state and will be destroyed by
 *		@bench_destroy@.
 *
 *		The only reason for failure is if @tm@ was null on entry,
 *		and automatic construction of a timer failed.  The state is
 *		safe to discard, but calling @bench_destroy@ is safe too.
 */

extern int bench_init(struct bench_state */*b*/, struct bench_timer */*tm*/);

/* --- @bench_destroy@ --- *
 *
 * Arguments:	@struct bench_state *b@ = bench state
 *
 * Returns:	---
 *
 * Use:		Destroy the benchmark state, releasing the resources that it
 *		holds.
 */

extern void bench_destroy(struct bench_state */*b*/);

/* --- @bench_calibrate@ --- *
 *
 * Arguments:	@struct bench_state *b@ = bench state
 *		@unsigned f@ = calibration flags
 *
 * Returns:	Zero on success, %$-1$% if calibration failed.
 *
 * Use:		Calibrate the benchmark state, so that it can be used to
 *		measure performance reasonably accurately.
 *
 *		Calibration will take into account how the subject code is
 *		going to be located.  If you're going to use @BENCH_MEASURE@
 *		to measure a piece of literal code, then leave @f@ zero.  If
 *		the code to be measured is going to be executed via an
 *		indirect branch, e.g., through the @measure@ function, then
 *		set @BTF_INDIRECT@.
 */

#define BTF_INDIRECT 1u
extern int bench_calibrate(struct bench_state */*b*/, unsigned /*f*/);

/* --- @bench_preflight@ --- *
 *
 * Arguments:	@struct bench_state *b@ = benchmark state
 *
 * Returns:	Zero on success, %$-1$% on failure.
 *
 * Use:		Prepares for benchmarking on the current thread.  Current
 *		checks are that the timer is calibrated and that it can
 *		successfully measure time; the timer preflight is also run.
 *
 *		Users are unlikely to find this function useful: it's called
 *		automatically by the @BENCH_MEASURE@ macro and the
 *		@bench_measure@ function.
 */

extern int bench_preflight(struct bench_state */*b*/);

/* --- @bench_adapt@ --- *
 *
 * Arguments:	@double *n_inout@ = number of iterations, updated
 *		@double target_s@ = target time in seconds
 *		@const struct bench_timing *t@ = timing from the previous run
 *
 * Returns:	Nonzero if the measurement is sufficient; zero to run again.
 *
 * Use:		This function determines a suitable number of iterations of a
 *		benchmark function to perform next.  It is used in a loop
 *		such as the following.
 *
 *			@BENCH_TIMELOOP_DECLS;@
 *			@struct bench_timer *tm;@
 *			@struct bench_timing t;@
 *			@double n = 1.0, target_s = 1.0;@
 *
 *			@do {@
 *			  @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
 *			    (do @_bench_n@ iterations of some computation)@;@
 *			@} while (!bench_adapt(&n, target_s, &t));@
 *
 *		On entry, @*n_inout@ should be the number of iterations
 *		performed by the previous pass, and @*t@ the resulting time;
 *		the @BTF_TIMEOK@ flag must be set in @t->f@.  If the timing
 *		is sufficient -- @t->t@ is sufficiently close to @target_s@
 *		-- then the function returns nonzero to indicate that
 *		measurement is complete.  Otherwise, it sets @*n_inout@ to a
 *		new, larger iteration count and returns zero to indicate that
 *		a further pass is necessary.
 *
 *		Users are unlikely to find this function useful: it's called
 *		automatically by the @BENCH_MEASURE@ macro and the
 *		@bench_measure@ function.
 */

extern int bench_adapt(double */*n_inout*/, double /*target_s*/,
		       const struct bench_timing */*t*/);

/* --- @bench_adjust@ --- *
 *
 * Arguments:	@struct bench_state *b@ = benchmark state
 *		@struct bench_timing *t_inout@ = timing to adjust
 *		@double n@ = number of external iterations performed
 *		@double base@ = number of internal operations per external
 *			iteration
 *
 * Returns:	---
 *
 * Use:		Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
 *		according to the calibration data captured in @b@.
 *		On exit, the timing data is updated, and @t->n@ is set to the
 *		product @n*base@.
 *
 *		Users are unlikely to find this function useful: it's called
 *		automatically by the @BENCH_MEASURE@ macro and the
 *		@bench_measure@ function.
 */

extern void bench_adjust(struct bench_state */*b*/,
			 struct bench_timing */*t_inout*/,
			 double /*n*/, double /*base*/);

/* --- @BENCH_MEASURE_DECLS@ --- *
 *
 * Arguments:	---
 *
 * Use:		Expands to the declarations needed by @BENCH_MEASURE@.
 *		These should be at block scope, not at toplevel.
 */

#define BENCH_MEASURE_DECLS						\
  struct bench_state *_bench_b;						\
  struct bench_timing *_bench_t;					\
  double _bench_nn;							\
  BENCH_TIMELOOP_DECLS

/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
 *
 * Arguments:	@tag@ = control structure tag
 *		@struct bench_state *b@ = benchmark state
 *		@int &rc@ = where to put the result code (zero on success,
 *			%$-1$% on failure)
 *		@struct bench_timing *t_out@ = where to put the result
 *		@double base@ = number of operations per external iteration
 *
 * Returns:	---
 *
 * Use:		@BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
 *		execution of @stmt@.
 *
 *		The statement should perform @_bench_n@ iterations of some
 *		operation.  It will be invoked multiple times, with varying
 *		iteration counts, so as to run for approximately
 *		@b->target_s@ seconds.
 *
 *		On success, the resulting timing is left in @*t_out@, with
 *		@t_out->n@ holding the product of the final iteration count
 *		and @base@, and @rc@ is set to zero.  If the timer fails, or
 *		if @stmt@ invokes a free @break@ statement, then @rc@ is set
 *		to %$-1$%.
 *
 *		The macro @BENCH_MEASURE_TAG@ is the same, except that it
 *		allows an explicit control-structure tag to be set so that it
 *		can be used within larger control structure macros.
 */

#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base)			\
  MC_BEFORE(tag##__benchmsr_setup, {					\
    _bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0;		\
    if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail);	\
  })									\
  MC_TARGET(tag##__benchmsr_done,					\
    { bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
  MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; })			\
  for (;;)								\
    MC_WRAP(tag##__benchmsr_loop,					\
      { ; },								\
      { _bench_t->f &= _bench_b->f;					\
	if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
	if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t))	\
	  MC_GOTARGET(tag##__benchmsr_done);				\
      },								\
      { MC_GOTARGET(tag##__benchmsr_fail); })				\
    BENCH_TIMELOOP_TAG(tag##__benchmsr_time,				\
		       _bench_b->tm, _bench_t, _bench_nn, { break; })

#define BENCH_MEASURE(b, rc, t_out, base)				\
  BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)

/* --- @bench_measure@ --- *
 *
 * Arguments:	@struct bench_state *b@ = benchmark state
 *		@struct bench_timing *t_out@ = where to leave the timing
 *		@double base@ = number of internal units per call
 *		@bench_fn *fn@, @void *ctx@ = benchmark function to run
 *
 * Returns:	Zero on success, %$-1$% if timing failed.
 *
 * Use:		Measure a function.  The function @fn@ is called adaptively
 *		with an iteration count @n@ set so as to run for
 *		approximately @b->target_s@ seconds.
 *
 *		The result is left in @*t_out@, with @t_out->n@ counting the
 *		final product of the iteration count and @base@ (which might,
 *		e.g., reflect the number of inner iterations the function
 *		performs, or the number of bytes it processes per iteration).
 */

extern int bench_measure(struct bench_state */*b*/,
			 struct bench_timing */*t_out*/,
			 double /*base*/, bench_fn */*fn*/, void */*ctx*/);

/*----- Reporting ---------------------------------------------------------*/

enum {
  BTU_OP,			       /* counting operations of some kind */
  BTU_BYTE,				/* counting bytes (@rbuf >= 0@) */
  BTU_LIMIT				/* (number of units) */
};

/* --- @bench_report@ --- *
 *
 * Arguments:	@const struct gprintf_ops *gops, void *gp@ = output formatter
 *		@unsigned unit@ = unit processed by the benchmark function
 *		@const struct bench_timing *t@ = benchmark result
 *
 * Returns:	---
 *
 * Use:		Format, to the output identified by @gops@ and @go@, a
 *		human-readable report of the benchmarking result @t@.  No
 *		newline is appended.
 *
 *		The output format is subject to change in later versions.
 */

extern void bench_report(const struct gprintf_ops */*gops*/, void */*go*/,
			 unsigned /*unit*/,
			 const struct bench_timing */*t*/);

/*----- That's all, folks -------------------------------------------------*/

#ifdef __cplusplus
  }
#endif

#endif
Commit	Line	Data
b64eb60f MW	1	/* --c--
	2	*
	3	* Benchmarking support
	4	*
	5	* (c) 2023 Straylight/Edgeware
	6	*/
	7
	8	/----- Licensing notice --------------------------------------------------
	9	*
	10	* This file is part of the mLib utilities library.
	11	*
	12	* mLib is free software: you can redistribute it and/or modify it under
	13	* the terms of the GNU Library General Public License as published by
	14	* the Free Software Foundation; either version 2 of the License, or (at
	15	* your option) any later version.
	16	*
	17	* mLib is distributed in the hope that it will be useful, but WITHOUT
	18	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	19	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
	20	* License for more details.
	21	*
	22	* You should have received a copy of the GNU Library General Public
	23	* License along with mLib. If not, write to the Free Software
	24	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
	25	* USA.
	26	*/
	27
	28	#ifndef MLIB_BENCH_H
	29	#define MLIB_BENCH_H
	30
	31	#ifdef __cplusplus
	32	extern "C" {
	33	#endif
	34
	35	/----- Header files ------------------------------------------------------/
	36
b1a20bee	37	#include <limits.h>
6e683a79 MW	38	#include <time.h>
6e683a79 MW	39
b64eb60f MW	40	#ifndef MLIB_BITS_H
	41	# include "bits.h"
	42	#endif
	43
b1a20bee MW	44	#ifndef MLIB_CONTROL_H
	45	# include "control.h"
	46	#endif
	47
13ee7406 MW	48	#ifndef MLIB_DSTR_H
	49	# include "dstr.h"
	50	#endif
	51
289651a7 MW	52	#ifndef MLIB_MACROS_H
	53	# include "macros.h"
	54	#endif
	55
b1a20bee MW	56	#ifndef MLIB_GPRINTF_H
	57	# include "gprintf.h"
	58	#endif
	59
289651a7	60	/----- Timers ------------------------------------------------------------/
b64eb60f MW	61
b64eb60f MW	62	struct bench_time {
67b5031e MW	63	unsigned f; /* flags */
	64	#define BTF_TIMEOK 1u /* @s@ ad @ns@ slots are value */
	65	#define BTF_CYOK 2u /* @cy@ slot is valid */
	66	#define BTF_ANY (BTF_TIMEOK \| BTF_CYOK) /* some part is useful */
6e683a79 MW	67	union {
	68	struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */
	69	clock_t clk; /* @clock@ */
	70	kludge64 rawns; /* raw nanosecond count */
	71	} t; /* time */
c91413e6	72	kludge64 cy; /* count of CPU cycles */
b64eb60f MW	73	};
	74
	75	struct bench_timing {
c91413e6	76	unsigned f; /* flags (@BTF_...@) */
67b5031e	77	double n, t, cy; /* count, time, and cycles */
b64eb60f MW	78	};
b64eb60f MW	79
289651a7 MW	80	struct bench_timer {
	81	const struct bench_timerops *ops;
	82	unsigned ref;
	83	};
b64eb60f MW	84
b64eb60f MW	85	struct bench_timerops {
13ee7406 MW	86	void (describe)(struct bench_timer /bt/, dstr /d*/);
	87	/* Write a description of the timer to @d@. */
	88
b1a20bee MW	89	int (preflight)(struct bench_timer /bt/);
	90	/* Return zero if timer is ready to go, or %$-1$% otherwise. The @now@
	91	* function will only be called following a successful @preflight@ on the
	92	* same thread.
	93	*/
	94
6e683a79 MW	95	int (now)(struct bench_timer /bt/, struct bench_time /t_out*/,
	96	unsigned /f/);
	97	#define BTF_T0 0u /* fetching first time of a pair */
	98	#define BTF_T1 1u /* fetching second time of a pair */
	99	/* Fill in @*t_out@ with the current time. Return zero on success
	100	* %%\emph{or} permanent failure; return %$-1$% on temporary failure.
	101	*/
	102
	103	void (diff)(struct bench_timer /bt/,
	104	struct bench_timing /delta_out*/,
	105	const struct bench_time /t0*/,
	106	const struct bench_time /t1*/);
	107	/* Subtract the time @t0@ from the time @t1@, leaving the result in
	108	* @*delta_out@, setting flags as appropriate.
	109	*/
67b5031e	110
b64eb60f	111	void (destroy)(struct bench_timer /bt/);
67b5031e	112	/* Release the timer and any resources it holds. */
b64eb60f MW	113	};
b64eb60f MW	114
289651a7 MW	115	/* --- @bench_createtimer@ --- *
	116	*
	117	* Arguments: @const char *config@ = user-supplied configuration string
	118	*
	119	* Returns: A freshly constructed standard timer object, or null on
	120	* failure.
	121	*
	122	* Use: Allocate a timer. Dispose of it by calling
	123	* @tm->ops->destroy(tm)@ when you're done.
	124	*
	125	* Applications should not set configuration strings except as
	126	* established by user action, e.g., from a command-line option,
	127	* environment variable, or configuration file.
	128	*/
b1a20bee	129
289651a7	130	extern struct bench_timer bench_createtimer(const char config);
b1a20bee MW	131
	132	/* --- @BENCH_TIMELOOP_DECLS@ --- *
	133	*
	134	* Arguments: ---
	135	*
	136	* Use: Expands to the declarations needed by @BENCH_TIMELOOP_TAG@.
	137	* These should be at block scope, not at toplevel.
	138	*/
	139
	140	#define BENCH_TIMELOOP_DECLS \
	141	struct bench_timer *_bench_tm; \
	142	struct bench_time _bench_t0, _bench_t1; \
	143	unsigned long _bench_n, _bench_n1
	144
	145	/* --- @BENCH_TIMELOOP_TAG@ --- *
	146	*
	147	* Arguments: @tag@ = control structure tag
289651a7	148	* @struct bench_state *tm@ = benchmark timer
b1a20bee MW	149	* @struct bench_timing *delta_out@ = where to put the result
	150	* @double n@ = number of iterations
	151	*
	152	* Returns: ---
	153	*
289651a7	154	* Use: @BENCH_TIMELOOP_TAG(tag, tm, delta_out, n) stmt@ runs @stmt@
b1a20bee MW	155	* @n@ times, capturing the time and/or CPU cycles taken in
	156	* @*delta_out@. The iteration count must be no more than the
	157	* %%\emph{square}%% of @ULONG_MAX@. If @stmt@ executes a free
289651a7	158	* @break@ statement, then the containing loop -- which must
b1a20bee MW	159	* exist -- is exited.
	160	*
	161	* This macro is not intended to be used directly by users: it
	162	* is used internally by @bench_calibrate@ and @BENCH_MEASURE@.
	163	*/
	164
289651a7	165	#define BENCH_TIMELOOP_TAG(tag, tm, delta_out, n, onbreak) \
b1a20bee MW	166	MC_BEFORE(tag##__benchtl_setup, { \
	167	double _R = ULONG_MAX; double _n = (n); \
	168	\
289651a7	169	_bench_tm = (tm); \
b1a20bee MW	170	if (_n <= _R) { _bench_n1 = 0; _bench_n = _n; } \
	171	else { _bench_n1 = _n/_R; _bench_n = _n - _R*_bench_n1; } \
	172	}) \
	173	MC_TARGET(tag##__benchtl_break, { break; }) \
	174	MC_AFTER(tag##__benchtl_end, { \
	175	_bench_tm->ops->diff(_bench_tm, (delta_out), &_bench_t0, &_bench_t1); \
	176	}) \
	177	MC_DOWHILE(tag##__benchtl_countdown, _bench_n1--) \
	178	MC_AFTER(tag##__benchtl_post, { _bench_n = ULONG_MAX; }) \
	179	MC_DOWHILE(tag##__benchtl_t1, \
	180	_bench_tm->ops->now(_bench_tm, &_bench_t1, BTF_T1)) \
	181	MC_WRAP(tag##__benchtl_t0, \
	182	{ while (_bench_tm->ops->now(_bench_tm, &_bench_t0, BTF_T0)) ; }, \
	183	{ ; }, \
	184	{ MC_GOTARGET(tag##__benchtl_break); })
	185
289651a7	186	/----- Measuring ---------------------------------------------------------/
b64eb60f	187
289651a7 MW	188	struct bench_state {
	189	struct bench_timer tm; / a timer */
	190	double target_s; /* target time to run benchmarks */
	191	unsigned f; /* calibration flags (@BTF_...@) */
	192	#define BTF_CLB 0x0100 /* tried calibrating */
	193	struct { double m, c; } clk, cy; /* calculated overheads */
	194	};
67b5031e	195
289651a7 MW	196	typedef void bench_fn(unsigned long /n/, void /ctx*/);
289651a7 MW	197	/* Run the benchmark @n@ times, given a context pointer @ctx@. */
b64eb60f	198
67b5031e MW	199	/* --- @bench_init@ --- *
	200	*
	201	* Arguments: @struct bench_state *b@ = bench state to initialize
13ee7406	202	* @struct bench_timer *tm@ = timer to attach, or null
67b5031e	203	*
13ee7406 MW	204	* Returns: Zero on success, %$-1$% on failure.
	205	*
	206	* Use: Initialize the benchmark state. On success, the timer state
	207	* still needs to be calibrated (use @bench_calibrate@) before
	208	* it can be used, but this will be done automatically by
	209	* @bench_measure@ if it's not done by hand earlier. The timer
	210	* is now owned by the benchmark state and will be destroyed by
	211	* @bench_destroy@.
67b5031e	212	*
13ee7406 MW	213	* The only reason for failure is if @tm@ was null on entry,
	214	* and automatic construction of a timer failed. The state is
	215	* safe to discard, but calling @bench_destroy@ is safe too.
67b5031e MW	216	*/
67b5031e MW	217
13ee7406	218	extern int bench_init(struct bench_state /b/, struct bench_timer /tm/);
b64eb60f	219
67b5031e MW	220	/* --- @bench_destroy@ --- *
	221	*
	222	* Arguments: @struct bench_state *b@ = bench state
	223	*
	224	* Returns: ---
	225	*
	226	* Use: Destroy the benchmark state, releasing the resources that it
	227	* holds.
	228	*/
	229
e63124bc	230	extern void bench_destroy(struct bench_state /b*/);
b64eb60f	231
67b5031e MW	232	/* --- @bench_calibrate@ --- *
	233	*
	234	* Arguments: @struct bench_state *b@ = bench state
b1a20bee	235	* @unsigned f@ = calibration flags
67b5031e	236	*
13ee7406	237	* Returns: Zero on success, %$-1$% if calibration failed.
67b5031e MW	238	*
	239	* Use: Calibrate the benchmark state, so that it can be used to
	240	* measure performance reasonably accurately.
b1a20bee MW	241	*
	242	* Calibration will take into account how the subject code is
	243	* going to be located. If you're going to use @BENCH_MEASURE@
	244	* to measure a piece of literal code, then leave @f@ zero. If
	245	* the code to be measured is going to be executed via an
	246	* indirect branch, e.g., through the @measure@ function, then
	247	* set @BTF_INDIRECT@.
67b5031e MW	248	*/
67b5031e MW	249
b1a20bee MW	250	#define BTF_INDIRECT 1u
	251	extern int bench_calibrate(struct bench_state /b/, unsigned /f*/);
	252
	253	/* --- @bench_preflight@ --- *
	254	*
	255	* Arguments: @struct bench_state *b@ = benchmark state
	256	*
	257	* Returns: Zero on success, %$-1$% on failure.
	258	*
	259	* Use: Prepares for benchmarking on the current thread. Current
	260	* checks are that the timer is calibrated and that it can
	261	* successfully measure time; the timer preflight is also run.
	262	*
	263	* Users are unlikely to find this function useful: it's called
	264	* automatically by the @BENCH_MEASURE@ macro and the
	265	* @bench_measure@ function.
	266	*/
	267
	268	extern int bench_preflight(struct bench_state /b*/);
	269
	270	/* --- @bench_adapt@ --- *
	271	*
289651a7 MW	272	* Arguments: @double *n_inout@ = number of iterations, updated
289651a7 MW	273	* @double target_s@ = target time in seconds
b1a20bee MW	274	* @const struct bench_timing *t@ = timing from the previous run
	275	*
	276	* Returns: Nonzero if the measurement is sufficient; zero to run again.
	277	*
	278	* Use: This function determines a suitable number of iterations of a
	279	* benchmark function to perform next. It is used in a loop
	280	* such as the following.
	281	*
289651a7 MW	282	* @BENCH_TIMELOOP_DECLS;@
289651a7 MW	283	* @struct bench_timer *tm;@
b1a20bee	284	* @struct bench_timing t;@
289651a7	285	* @double n = 1.0, target_s = 1.0;@
b1a20bee MW	286	*
b1a20bee MW	287	* @do {@
289651a7 MW	288	* @BENCH_TIMELOOP_TAG(time, tm, &t, n, { break; })@
	289	* (do @_bench_n@ iterations of some computation)@;@
	290	* @} while (!bench_adapt(&n, target_s, &t));@
b1a20bee MW	291	*
	292	* On entry, @*n_inout@ should be the number of iterations
	293	* performed by the previous pass, and @*t@ the resulting time;
289651a7 MW	294	* the @BTF_TIMEOK@ flag must be set in @t->f@. If the timing
289651a7 MW	295	* is sufficient -- @t->t@ is sufficiently close to @target_s@
b1a20bee MW	296	* -- then the function returns nonzero to indicate that
	297	* measurement is complete. Otherwise, it sets @*n_inout@ to a
	298	* new, larger iteration count and returns zero to indicate that
	299	* a further pass is necessary.
	300	*
	301	* Users are unlikely to find this function useful: it's called
	302	* automatically by the @BENCH_MEASURE@ macro and the
	303	* @bench_measure@ function.
	304	*/
	305
289651a7	306	extern int bench_adapt(double /n_inout/, double /target_s*/,
b1a20bee MW	307	const struct bench_timing /t*/);
	308
	309	/* --- @bench_adjust@ --- *
	310	*
	311	* Arguments: @struct bench_state *b@ = benchmark state
	312	* @struct bench_timing *t_inout@ = timing to adjust
	313	* @double n@ = number of external iterations performed
	314	* @double base@ = number of internal operations per external
	315	* iteration
	316	*
	317	* Returns: ---
	318	*
289651a7	319	* Use: Adjusts a raw timing, as captured by @BENCH_TIMELOOP_TAG@,
b1a20bee MW	320	* according to the calibration data captured in @b@.
	321	* On exit, the timing data is updated, and @t->n@ is set to the
	322	* product @n*base@.
	323	*
	324	* Users are unlikely to find this function useful: it's called
	325	* automatically by the @BENCH_MEASURE@ macro and the
	326	* @bench_measure@ function.
	327	*/
	328
	329	extern void bench_adjust(struct bench_state /b*/,
	330	struct bench_timing /t_inout*/,
	331	double /n/, double /base/);
b64eb60f	332
289651a7 MW	333	/* --- @BENCH_MEASURE_DECLS@ --- *
	334	*
	335	* Arguments: ---
	336	*
	337	* Use: Expands to the declarations needed by @BENCH_MEASURE@.
	338	* These should be at block scope, not at toplevel.
	339	*/
	340
	341	#define BENCH_MEASURE_DECLS \
	342	struct bench_state *_bench_b; \
	343	struct bench_timing *_bench_t; \
	344	double _bench_nn; \
	345	BENCH_TIMELOOP_DECLS
	346
	347	/* --- @BENCH_MEASURE@, @BENCH_MEASURE_TAG@ --- *
	348	*
	349	* Arguments: @tag@ = control structure tag
	350	* @struct bench_state *b@ = benchmark state
	351	* @int &rc@ = where to put the result code (zero on success,
	352	* %$-1$% on failure)
	353	* @struct bench_timing *t_out@ = where to put the result
	354	* @double base@ = number of operations per external iteration
	355	*
	356	* Returns: ---
	357	*
	358	* Use: @BENCH_MEASURE(b, rc, delta_out, n) stmt@ measures the
	359	* execution of @stmt@.
	360	*
	361	* The statement should perform @_bench_n@ iterations of some
	362	* operation. It will be invoked multiple times, with varying
	363	* iteration counts, so as to run for approximately
	364	* @b->target_s@ seconds.
	365	*
	366	* On success, the resulting timing is left in @*t_out@, with
	367	* @t_out->n@ holding the product of the final iteration count
	368	* and @base@, and @rc@ is set to zero. If the timer fails, or
	369	* if @stmt@ invokes a free @break@ statement, then @rc@ is set
	370	* to %$-1$%.
	371	*
	372	* The macro @BENCH_MEASURE_TAG@ is the same, except that it
	373	* allows an explicit control-structure tag to be set so that it
	374	* can be used within larger control structure macros.
	375	*/
	376
	377	#define BENCH_MEASURE_TAG(tag, b, rc, t_out, base) \
	378	MC_BEFORE(tag##__benchmsr_setup, { \
	379	_bench_b = (b); _bench_t = (t_out); _bench_nn = 1.0; \
	380	if (bench_preflight(_bench_b)) MC_GOTARGET(tag##__benchmsr_fail); \
	381	}) \
	382	MC_TARGET(tag##__benchmsr_done, \
	383	{ bench_adjust(_bench_b, _bench_t, _bench_nn, (base)); (rc) = 0; }) \
	384	MC_TARGET(tag##__benchmsr_fail, { (rc) = -1; }) \
	385	for (;;) \
	386	MC_WRAP(tag##__benchmsr_loop, \
	387	{ ; }, \
	388	{ _bench_t->f &= _bench_b->f; \
	389	if (!(_bench_t->f&BTF_TIMEOK)) MC_GOTARGET(tag##__benchmsr_fail); \
	390	if (bench_adapt(&_bench_nn, _bench_b->target_s, _bench_t)) \
	391	MC_GOTARGET(tag##__benchmsr_done); \
	392	}, \
	393	{ MC_GOTARGET(tag##__benchmsr_fail); }) \
	394	BENCH_TIMELOOP_TAG(tag##__benchmsr_time, \
	395	_bench_b->tm, _bench_t, _bench_nn, { break; })
	396
397	#define BENCH_MEASURE(b, rc, t_out, base) \
398	BENCH_MEASURE_TAG(bench_measure, b, rc, t_out, base)
399
67b5031e MW	400	/* --- @bench_measure@ --- *
67b5031e MW	401	*
8d372122 MW	402	* Arguments: @struct bench_state *b@ = benchmark state
8d372122 MW	403	* @struct bench_timing *t_out@ = where to leave the timing
67b5031e MW	404	* @double base@ = number of internal units per call
	405	* @bench_fn fn@, @void ctx@ = benchmark function to run
	406	*
13ee7406	407	* Returns: Zero on success, %$-1$% if timing failed.
67b5031e MW	408	*
	409	* Use: Measure a function. The function @fn@ is called adaptively
	410	* with an iteration count @n@ set so as to run for
	411	* approximately @b->target_s@ seconds.
	412	*
	413	* The result is left in @*t_out@, with @t_out->n@ counting the
	414	* final product of the iteration count and @base@ (which might,
	415	* e.g., reflect the number of inner iterations the function
	416	* performs, or the number of bytes it processes per iteration).
	417	*/
	418
8d372122 MW	419	extern int bench_measure(struct bench_state /b*/,
8d372122 MW	420	struct bench_timing /t_out*/,
67b5031e	421	double /base/, bench_fn /fn/, void /ctx/);
b64eb60f	422
b1a20bee MW	423	/----- Reporting ---------------------------------------------------------/
b1a20bee MW	424
289651a7 MW	425	enum {
	426	BTU_OP, /* counting operations of some kind */
	427	BTU_BYTE, /* counting bytes (@rbuf >= 0@) */
	428	BTU_LIMIT /* (number of units) */
	429	};
	430
b1a20bee MW	431	/* --- @bench_report@ --- *
	432	*
	433	* Arguments: @const struct gprintf_ops gops, void gp@ = output formatter
	434	* @unsigned unit@ = unit processed by the benchmark function
	435	* @const struct bench_timing *t@ = benchmark result
	436	*
	437	* Returns: ---
	438	*
	439	* Use: Format, to the output identified by @gops@ and @go@, a
	440	* human-readable report of the benchmarking result @t@. No
	441	* newline is appended.
	442	*
	443	* The output format is subject to change in later versions.
	444	*/
	445
	446	extern void bench_report(const struct gprintf_ops /gops/, void /go/,
	447	unsigned /unit/,
	448	const struct bench_timing /t*/);
	449
b64eb60f MW	450	/----- That's all, folks -------------------------------------------------/
	451
	452	#ifdef __cplusplus
	453	}
	454	#endif
	455
	456	#endif