5 * (c) 2023 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of the mLib utilities library.
12 * mLib is free software: you can redistribute it and/or modify it under
13 * the terms of the GNU Library General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or (at
15 * your option) any later version.
17 * mLib is distributed in the hope that it will be useful, but WITHOUT
18 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
20 * License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License along with mLib. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
28 /*----- Header files ------------------------------------------------------*/
45 /*----- Data structures ---------------------------------------------------*/
48 struct bench_timer _t
;
49 const struct timer_ops
*clkops
, *cyops
;
50 union { int fd
; } u_cy
;
54 void (*now
)(struct bench_time
*t_out
, struct timer
*t
);
55 void (*teardown
)(struct timer
*t
);
58 /*----- Preliminaries -----------------------------------------------------*/
60 #define NS_PER_S 1000000000
62 static void PRINTF_LIKE(1, 2) debug(const char *fmt
, ...)
67 p
= getenv("MLIB_BENCH_DEBUG");
68 if (p
&& *p
!= 'n' && *p
!= '0') {
70 fputs("mLib BENCH: ", stderr
);
71 vfprintf(stderr
, fmt
, ap
);
77 /*----- The null clock ----------------------------------------------------*/
79 static void null_now(struct bench_time
*t_out
, struct timer
*t
) { ; }
80 static void null_teardown(struct timer
*t
) { ; }
81 static const struct timer_ops null_ops
= { null_now
, null_teardown
};
83 static int null_cyinit(struct timer
*t
)
84 { t
->cyops
= &null_ops
; return (0); }
86 #define NULL_CYENT { "null", null_cyinit },
88 /*----- Linux performance counters ----------------------------------------*/
90 #if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
92 #include <sys/types.h>
95 #include <linux/perf_event.h>
96 #include <asm/unistd.h>
98 static void perfevent_now(struct bench_time
*t_out
, struct timer
*t
)
102 n
= read(t
->u_cy
.fd
, &t_out
->cy
.i
, sizeof(t_out
->cy
.i
));
103 if (n
!= sizeof(t_out
->cy
.i
)) {
104 debug("failed to read perf-event counter: %s", strerror(errno
));
107 t_out
->f
|= BTF_CYOK
;
110 static void perfevent_teardown(struct timer
*t
)
111 { close(t
->u_cy
.fd
); }
113 static const struct timer_ops perfevent_ops
=
114 { perfevent_now
, perfevent_teardown
};
116 static int perfevent_init(struct timer
*t
)
118 struct perf_event_attr attr
= { 0 };
119 struct bench_time tm
;
121 attr
.type
= PERF_TYPE_HARDWARE
;
122 attr
.size
= sizeof(attr
);
123 attr
.config
= PERF_COUNT_HW_CPU_CYCLES
;
125 attr
.exclude_kernel
= 1;
128 t
->u_cy
.fd
= syscall(__NR_perf_event_open
, &attr
, 0, -1, -1, 0);
129 if (t
->u_cy
.fd
< 0) {
130 debug("couldn't open perf evvent: %s", strerror(errno
));
134 tm
.f
= 0; perfevent_now(&tm
, t
);
135 if (!(tm
.f
&BTF_CYOK
)) { close(t
->u_cy
.fd
); return (-1); }
137 t
->cyops
= &perfevent_ops
; return (0);
139 # define PERFEVENT_CYENT { "linux-perf-event", perfevent_init },
141 # define PERFEVENT_CYENT
144 /*----- Intel time-stamp counter ------------------------------------------*/
146 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
148 #define EFLAGS_ID (1u << 21)
149 #define CPUID_1D_TSC (1u << 4)
151 static uint32
set_flags(unsigned long m
, unsigned long x
)
163 "mov %0, " TMP
"\n\t"
178 struct cpuid
{ uint32 a
, b
, c
, d
; };
180 static void cpuid(struct cpuid
*info_out
, uint32 a
, uint32 c
)
182 __asm__ ("movl %1, %%eax\n\t"
185 "movl %%eax, 0(%0)\n\t"
186 "movl %%ebx, 4(%0)\n\t"
187 "movl %%ecx, 8(%0)\n\t"
188 "movl %%edx, 12(%0)\n\t"
190 : "r"(info_out
), "g"(a
), "g"(c
)
191 : "eax", "ebx", "ecx", "edx", "cc");
194 static void x86rdtsc_now(struct bench_time
*t_out
, struct timer
*t
)
198 __asm__
__volatile__ ("rdtsc" : "=a"(lo
), "=d"(hi
));
199 SET64(t_out
->cy
, hi
, lo
); t_out
->f
|= BTF_CYOK
;
202 static const struct timer_ops x86rdtsc_ops
=
203 { x86rdtsc_now
, null_teardown
};
205 static int x86rdtsc_init(struct timer
*t
)
209 if ((set_flags(~EFLAGS_ID
, 0)&EFLAGS_ID
) ||
210 !(set_flags(~EFLAGS_ID
, EFLAGS_ID
)&EFLAGS_ID
))
211 { debug("no `cpuid' instruction"); return (-1); }
213 if (info
.a
< 1) { debug("no `cpuid' leaf 1"); return (-1); }
215 if (!(info
.d
&CPUID_1D_TSC
))
216 { debug("no `rdtsc' instrunction"); return (-1); }
217 t
->cyops
= &x86rdtsc_ops
; return (0);
220 # define X86RDTSC_CYENT { "x86-rdtsc", x86rdtsc_init },
222 # define X86RDTWC_CYENT
225 /*----- POSIX `clock_gettime' ---------------------------------------------*/
227 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID)
229 static void gettime_now(struct bench_time
*t_out
, struct timer
*t
)
233 if (clock_gettime(CLOCK_THREAD_CPUTIME_ID
, &now
))
234 { debug("error reading POSIX clock: %s", strerror(errno
)); return; }
235 ASSIGN64(t_out
->s
, now
.tv_sec
); t_out
->ns
= now
.tv_nsec
;
236 t_out
->f
|= BTF_TIMEOK
;
239 static const struct timer_ops gettime_ops
= { gettime_now
, null_teardown
};
241 static int gettime_init(struct timer
*t
)
243 struct bench_time tm
;
245 tm
.f
= 0; gettime_now(&tm
, t
); if (!tm
.f
&BTF_TIMEOK
) return (-1);
246 t
->clkops
= &gettime_ops
; return (0);
249 # define GETTIME_CLKENT { "posix-clock_gettime", gettime_init },
251 # define GETTIME_CLKENT
254 /*----- Standard C `clock' ------------------------------------------------*/
256 static void clock_now(struct bench_time
*t_out
, struct timer
*t
)
259 unsigned long s
; uint32 ns
;
262 if (now
== (clock_t)-1) {
263 debug("error reading standard clock: %s", strerror(errno
));
266 x
= now
/CLOCKS_PER_SEC
;
267 if (x
> ULONG_MAX
) { debug("standard clock out of range"); return; }
269 s
= x
; x
= now
- CLOCKS_PER_SEC
*s
;
270 if (!(NS_PER_S
%CLOCKS_PER_SEC
))
271 ns
= x
*(NS_PER_S
/CLOCKS_PER_SEC
);
272 else if (NS_PER_S
<= ULONG_MAX
/CLOCKS_PER_SEC
)
273 ns
= (x
*NS_PER_S
)/CLOCKS_PER_SEC
;
275 ns
= x
*((NS_PER_S
+ 0.0)/CLOCKS_PER_SEC
);
276 ASSIGN64(t_out
->s
, s
); t_out
->ns
= ns
; t_out
->f
|= BTF_TIMEOK
;
279 static const struct timer_ops clock_ops
= { clock_now
, null_teardown
};
281 static int clock_init(struct timer
*t
)
283 struct bench_time tm
;
285 tm
.f
= 0; clock_now(&tm
, t
); if (!tm
.f
&BTF_TIMEOK
) return (-1);
286 t
->clkops
= &clock_ops
; return (0);
289 #define CLOCK_CLKENT { "clock", clock_init },
291 /*----- Timing setup ------------------------------------------------------*/
293 static const struct timerent
{
295 int (*init
)(struct timer */
*t*/
);
297 clktab
[] = { GETTIME_CLKENT CLOCK_CLKENT
{ 0, 0 } },
298 cytab
[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_CYENT
{ 0, 0 } };
300 static const struct timerent
*find_timer_n(const char *name
, size_t sz
,
301 const struct timerent
*timers
,
304 while (timers
->name
) {
305 if (strlen(timers
->name
) == sz
&& MEMCMP(name
, ==, timers
->name
, sz
))
309 debug("%s timer `%.*s' not found", what
, (int)sz
, name
); return (0);
312 static int try_timer(struct timer
*t
,
313 const struct timerent
*timer
, const char *what
)
315 if (timer
->init(t
)) return (-1);
316 debug("selected %s timer `%s'", what
, timer
->name
); return (0);
319 static int select_timer(struct timer
*t
, const struct timerent
*timers
,
320 const char *varname
, const char *what
)
322 const char *p
; size_t n
;
323 const struct timerent
*timer
;
328 if (!try_timer(t
, timers
++, what
)) return (0);
332 timer
= find_timer_n(p
, n
, timers
, what
);
333 if (timer
&& !try_timer(t
, timer
, what
)) return (0);
338 debug("no suitable %s timer found", what
); return (-1);
341 static void timer_now(struct bench_timer
*tm
, struct bench_time
*t_out
)
343 struct timer
*t
= (struct timer
*)tm
;
345 t
->clkops
->now(t_out
, t
);
346 t
->cyops
->now(t_out
, t
);
349 static void timer_destroy(struct bench_timer
*tm
)
351 struct timer
*t
= (struct timer
*)tm
;
354 if (t
->clkops
) t
->clkops
->teardown(t
);
355 if (t
->cyops
) t
->cyops
->teardown(t
);
359 static const struct bench_timerops timer_ops
= { timer_now
, timer_destroy
};
361 struct bench_timer
*bench_createtimer(void)
364 struct bench_timer
*ret
= 0;
366 t
= xmalloc(sizeof(*t
)); t
->cyops
= 0; t
->clkops
= 0;
367 if (select_timer(t
, clktab
, "MLIB_BENCH_CLKTIMER", "clock")) goto end
;
368 if (select_timer(t
, cytab
, "MLIB_BENCH_CYCLETIMER", "cycle")) goto end
;
369 t
->_t
.ops
= &timer_ops
; ret
= &t
->_t
; t
= 0;
371 if (t
) timer_destroy(&t
->_t
);
376 # define FLOATK64(k) ((double)(k).i)
378 # define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi)
381 static void timer_diff(struct bench_timing
*delta_out
,
382 const struct bench_time
*t0
,
383 const struct bench_time
*t1
)
385 delta_out
->f
= t0
->f
&t1
->f
;
388 if (!(delta_out
->f
&BTF_TIMEOK
))
391 SUB64(k
, t1
->s
, t0
->s
);
392 delta_out
->t
= FLOATK64(k
) - 1 +
393 (t1
->ns
+ NS_PER_S
- t0
->ns
)/(double)NS_PER_S
;
396 if (!(delta_out
->f
&BTF_CYOK
))
399 SUB64(k
, t1
->cy
, t0
->cy
);
400 delta_out
->cy
= FLOATK64(k
);
404 /*----- Calibration -------------------------------------------------------*/
406 void bench_init(struct bench_state
*b
, struct bench_timer
*tm
)
407 { b
->tm
= tm
; b
->target_s
= 1.0; b
->f
= 0; }
409 void bench_destroy(struct bench_state
*b
)
410 { b
->tm
->ops
->destroy(b
->tm
); }
412 static void do_nothing(unsigned long n
, void *p
)
413 { while (n
--) RELAX
; }
415 int bench_calibrate(struct bench_state
*b
)
417 struct linreg lr_clk
= LINREG_INIT
, lr_cy
= LINREG_INIT
;
420 struct bench_timer
*tm
= b
->tm
;
421 struct bench_time t0
, t1
;
422 struct bench_timing delta
;
423 bench_fn
*fn
= LAUNDER(&do_nothing
);
424 unsigned f
= BTF_ANY
;
427 if (b
->f
&BTF_ANY
) return (0);
429 for (i
= 0; i
< 10; i
++)
430 { tm
->ops
->now(tm
, &t0
); fn(1, 0); tm
->ops
->now(tm
, &t1
); }
432 debug("calibrating...");
435 tm
->ops
->now(tm
, &t0
); fn(n
, 0); tm
->ops
->now(tm
, &t1
);
436 timer_diff(&delta
, &t0
, &t1
); f
&= delta
.f
;
437 if (!(f
&BTF_TIMEOK
)) { rc
= -1; goto end
; }
438 linreg_update(&lr_clk
, n
, delta
.t
);
440 debug(" n = %10lu; t = %12g s", n
, delta
.t
);
442 linreg_update(&lr_cy
, n
, delta
.cy
);
443 debug(" n = %10lu; t = %12g s, cy = %10.0f", n
, delta
.t
, delta
.cy
);
445 if (delta
.t
>= b
->target_s
/20.0) break;
446 if (n
>= ULONG_MAX
- n
/3) break;
450 linreg_fit(&lr_clk
, &b
->clk
.m
, &b
->clk
.c
, 0);
451 debug("clock overhead = (%g n + %g) s", b
->clk
.m
, b
->clk
.c
);
453 linreg_fit(&lr_clk
, &b
->clk
.m
, &b
->clk
.c
, 0);
454 debug("cycle overhead = (%g n + %g) cy", b
->cy
.m
, b
->cy
.c
);
461 int bench_measure(struct bench_timing
*t_out
, struct bench_state
*b
,
462 bench_fn
*fn
, void *p
)
464 struct bench_timer
*tm
= b
->tm
;
465 struct bench_time t0
, t1
;
468 if (bench_calibrate(b
)) return (-1);
469 debug("measuring..."); n
= 1;
471 tm
->ops
->now(tm
, &t0
); fn(n
, p
); tm
->ops
->now(tm
, &t1
);
472 timer_diff(t_out
, &t0
, &t1
);
473 if (!(t_out
->f
&BTF_TIMEOK
)) return (-1);
474 if (!(t_out
->f
&BTF_CYOK
)) debug(" n = %10lu; t = %12g", n
, t_out
->t
);
475 else debug(" n = %10lu; t = %12g, cy = %10.0f", n
, t_out
->t
, t_out
->cy
);
476 if (t_out
->t
>= 0.72*b
->target_s
) break;
477 n
*= 1.44*b
->target_s
/t_out
->t
;
479 if (!(t_out
->f
&BTF_CYOK
))
480 debug(" %g s per op; %g ops/s", t_out
->t
/n
, n
/t_out
->t
);
482 debug(" %g s (%g cy) per op; %g ops/s",
483 t_out
->t
/n
, t_out
->cy
/n
, n
/t_out
->t
);
484 t_out
->n
= n
; return (0);
487 /*----- That's all, folks -------------------------------------------------*/