From 6e683a79101025ee0d371f0b9bece811856edd8d Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 14 Mar 2024 09:15:46 +0000 Subject: [PATCH] @@@ misc mess --- struct/buf.3.in | 2 +- struct/dstr.3.in | 2 +- test/bench.3.in | 235 +++++++++++++-------- test/bench.c | 577 +++++++++++++++++++++++++++++++++++++++++----------- test/bench.h | 25 ++- test/tvec-bench.c | 3 +- test/tvec-core.c | 14 +- test/tvec-env.3.in | 8 +- test/tvec-remote.c | 8 +- test/tvec-timeout.c | 4 +- test/tvec.3.in | 12 ++ test/tvec.h | 33 +-- utils/macros.3.in | 30 ++- utils/macros.h | 254 ++++++++++++++++++++--- 14 files changed, 939 insertions(+), 268 deletions(-) diff --git a/struct/buf.3.in b/struct/buf.3.in index c39d412..083c599 100644 --- a/struct/buf.3.in +++ b/struct/buf.3.in @@ -1097,7 +1097,7 @@ or the macro equivalent these leave the buffer in the state established by initialization: the buffer holds no resources, but is ready for immediate use. .PP -A dynamic buffer contains an +A dynamic buffer contains a .B buf buffer, called its diff --git a/struct/dstr.3.in b/struct/dstr.3.in index 919bf09..e7b527e 100644 --- a/struct/dstr.3.in +++ b/struct/dstr.3.in @@ -365,7 +365,7 @@ functions are implemented using The output operations table is exposed as .BR dstr_printops ; the functions expect the output pointer to be the address of the output -.BR dstr +.BR dstr . .PP The function .B dstr_putd diff --git a/test/bench.3.in b/test/bench.3.in index d5953ad..ef0b4db 100644 --- a/test/bench.3.in +++ b/test/bench.3.in @@ -45,11 +45,14 @@ bench \- low-level benchmarking tools .nf .B "#include " .PP -.ta 2n +.ta 2n +2n +2n .B "struct bench_time {" .B " unsigned f;" -.B " kludge64 s;" -.B " uint32 ns;" +.B " union {" +.B " struct { kludge64 s; uint32 ns; } ts;" +.B " clock_t clk;" +.B " kludge64 rawns;" +.B " } t;" .B " kludge64 cy;" .B "};" .PP @@ -60,9 +63,18 @@ bench \- low-level benchmarking tools .B " double cy;" .B "};" .PP +.B "#define BTF_T0 0u" +.B "#define BTF_T1 ..." .B "struct bench_timerops {" .BI " void (*describe)(struct bench_timer *" bt ", dstr *" d ); -.BI " void (*now)(struct bench_timer *" bt ", struct bench_time *" t_out ); +.ta 2n +\w'\fBint (*now)('u +.BI " int (*now)(struct bench_timer *" bt , +.BI " struct bench_time *" t_out ", unsigned " f ); +.ta 2n +\w'\void (*diff)('u +.BI " void (*diff)(struct bench_timer *" bt , +.BI " struct bench_timing *" delta_out , +.BI " const struct bench_time *" t0 , +.BI " const struct bench_time *" t1 ); .BI " void (*destroy)(struct bench_timer *" bt ); .B "};" .B "struct bench_timer {" @@ -140,49 +152,54 @@ must always point to the timer object itself. Write a description of the timer to the dynamic string .IR d . .TP -.IB tm ->ops->now( tm ", " t_out) +.IB tm ->ops->now( tm ", " t_out ", " f ) Store the current time in -.IR t_out . +.BI * t_out \fR. The -.B struct bench_time -used to represent the time reported by a timer -is described in detail below. +.B BTF_T1 +flag in +.I f +to indicate that this is the second call in a pair; +leave it clear for the first call. +(A fake +.B BTF_T0 +flag is defined to be zero for symmetry.) +Return zero on success +.I or +permanent failure; +return \-1 if timing failed but +trying again immediately has a reasonable chance of success. +.TP +.IB tm ->ops->diff( tm ", " delta_out ", " t0 ", " t1 ) +Store in +.BI * delta_out +the difference between the two times +.I t0 +and +.IR t1 . .TP .IB tm ->ops->destroy( tm ) Destroy the timer, releasing all of the resources that it holds. .PP -A time, a reported by a timer, is represented by the -.BR "struct bench_time" . -A passage-of-time measurement is stored in the -.B s -and -.B ns -members, holding seconds and nanoseconds respectively. -(A timer need not have nanosecond precision. -The exact interpretation of the time \(en -e.g., whether it measures wallclock time, -user-mode CPU time, -or total thread CPU time \(en -is a matter for the specific timer implementation.) -A cycle count is stored in the -.B cy -member. -The +A +.B bench_timing +structure reports the difference between two times, +as determined by a timer's +.B diff +function. +It has four members. +.TP .B f -member stores flags: +A flags word. .B BTF_TIMEOK -is set if the passage-of-time measurement -.B s -and -.B ns -are valid; and +is set if the passage-of-time measurement in +.B t +is valid; .B BTF_CYOK -is set if the cycle count +is set if the cycle count in .B cy is valid. -Neither the time nor the cycle count need be measured -relative to any particular origin. The mask .B BTF_ANY covers the @@ -191,9 +208,57 @@ and .B BTF_CYOK bits: hence, -.IB f &BTF_ANY +.B f&BTF_ANY is nonzero (true) if the timer returned any valid timing information. +.TP +.B n +The number of iterations performed by the benchmark function +on its satisfactory run, +multiplied by +.IR base . +.TP +.B t +The time taken for the satisfactory run of the benchmark function, +in seconds. +Only valid if +.B BTF_TIMEOK +is set in +.BR f . +.TP +.B cy +The number of CPU cycles used +in the satisfactory run of the benchmark function, +in seconds. +Only valid if +.B BTF_CYOK +is set in +.BR f . +.PP +A +.B "struct bench_time" +representats a single instant in time, +as captured by a timer's +.B now +function. +The use of this structure is a private matter for the timer: +the only hard requirement is that the +.B diff +function should be able to compute the difference between two times. +However, the intent is that +a passage-of-time measurement is stored in the +.B t +union, +a cycle count is stored in the +.B cy +member, and +the +.B f +member stores flags +.B BTF_TIMEOK +and or +.B BTF_CYOK +if the passage-of-time or cycle count respectively are valid. . .SS The built-in timer The function @@ -249,6 +314,10 @@ then construction of the timer as a whole fails. The clock subtimers are as follows. Not all of them will be available on every platform. .TP +.B linux-x86-perf-rdpmc-hw-cycles +This is a dummy companion to the similarly named cycle subtimer; +see its description below. +.TP .B posix-thread-cputime Measures the passage of time using .BR clock_gettime (2), @@ -269,8 +338,8 @@ if other threads are running. The cycle subtimers are as follows. Not all of them will be available on every platform. .TP -.B linux-perf-event -Counts CPU cycles using the Linux-specific +.B linux-perf-read-hw-cycles +Counts CPU cycles using the Linux-specific .BR perf_event_open (2) function to read the .BR PERF_\%COUNT_\%HW_\%CPU_\%CYCLES @@ -282,13 +351,48 @@ e.g., because the .B /proc/sys/kernel/perf_event_paranoid level is too high. .TP -.B x86-rdtsc -Counts CPU cycles using the x86 +.B linux-perf-rdpmc-hw-cycles +Counts CPU cycles using the Linux-specific +.BR perf_event_open (2) +function, +as for +.B linux-x86-perf-read-hw-cycles +above, +except that it additionally uses the i386/AMD64 .B rdtsc +and +.B rdpmc +instructions, +together with information provided by the kernel +through a memory-mapped page +to do its measurements without any system call overheads. +It does passage-of-time and cycle counting in a single operation, +so no separate clock subtimer is required: +the similarly-named clock subtimer does nothing +except check that the +.B linux-x86-perf-rdpmc-hw-cycles +cycle subtimer has been selected. +This is almost certainly the best choice if it's available. +.TP +.B x86-rdtscp +Counts CPU cycles using the x86 +.B rdtscp instruction. This instruction is not really suitable for performance measurement: it gives misleading results on CPUs with variable clock frequency. .TP +.B x86-rdtsc +Counts CPU cycles using the x86 +.B rdtsc +instruction. +This has the downsides of +.B rdtscp +above, +but also fails to detect when the thread has been suspended +or transferred to a different CPU core +and gives misleading answers in this case. +Not really recommended. +.TP .B null A dummy cycle counter, which will initialize successfully @@ -297,15 +401,21 @@ This is a reasonable fallback in many situations. .PP The built-in preference order for clock subtimers, from most to least preferred, is -.B posix-thread-cputime +.BR linux-x86-perf-rdpmc-hw-cycles , followed by +.BR posix-thread-cputime , +and finally .BR stdc-clock . The built-in preference order for cycle subtimers, from most to least preferred, is -.B linux-perf-event +.BR linux-x86-perf-rdpmc-hw-cycles +then +.BR linux-x86-perf-read-hw-cycles , followed by +.BR x86-rdtscp , +and .BR x86-rdtsc , -and then +and finally .BR null . . .SS The benchmark state @@ -483,45 +593,6 @@ returns zero. If it fails \(en most likely because the timer failed \(en then it returns \-1. -.PP -A -.B bench_timing -structure reports the outcome of a successful measurement. -It has four members. -.TP -.B f -A flags word. -.B BTF_TIMEOK -is set if the passage-of-time measurement in -.B t -is valid; -.B BTF_CYOK -is set if the cycle count in -.B cy -is valid. -.TP -.B n -The number of iterations performed by the benchmark function -on its satisfactory run, -multiplied by -.IR base . -.TP -.B t -The time taken for the satisfactory run of the benchmark function, -in seconds. -Only valid if -.B BTF_TIMEOK -is set in -.BR f . -.TP -.B cy -The number of CPU cycles used -in the satisfactory run of the benchmark function, -in seconds. -Only valid if -.B BTF_CYOK -is set in -.BR f . . .\"-------------------------------------------------------------------------- .SH "SEE ALSO" diff --git a/test/bench.c b/test/bench.c index 7fedc9d..9cb84e5 100644 --- a/test/bench.c +++ b/test/bench.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -44,6 +45,22 @@ #include "linreg.h" #include "macros.h" +#if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__)) +# include +# define CPUID_1D_TSC (1u << 4) +# define CPUID_1xD_TSCP (1u << 27) +#endif + +#if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64) +# include +# include +# include +# include +# if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__)) +# include +# endif +#endif + /*----- Data structures ---------------------------------------------------*/ enum { CLK, CY, NTIMER }; @@ -51,7 +68,12 @@ enum { CLK, CY, NTIMER }; struct timer { struct bench_timer _t; const struct timer_ops *ops[NTIMER]; /* subtimers for clock and cycles */ - union { int fd; } u_cy; /* state for cycle measurement */ + union { + unsigned tscaux; /* `ia32_tsc_aux' for `ldtscp' */ + int fd; /* vanilla `perf_event_open' */ + struct { const volatile void *map; size_t sz; } pmc; /* `perf_event_open' + * with `rdpmc' */ + } u_cy; /* state for cycle measurement */ }; struct timer_ops { @@ -59,8 +81,13 @@ struct timer_ops { unsigned f; /* flags */ #define TF_SECRET 1u /* don't try this automatically */ int (*init)(struct timer */*t*/); /* initialization function */ - void (*now)(struct bench_time *t_out, struct timer *t); /* read current */ - void (*teardown)(struct timer *t); /* release held resources */ + int (*now)(struct timer */*t*/, /* read current */ + struct bench_time */*t_out*/, unsigned /*f*/); + void (*diff)(struct timer */*t*/, /* difference */ + struct bench_timing */*t_inout*/, + const struct bench_time */*t0*/, + const struct bench_time */*t1*/); + void (*teardown)(struct timer */*t*/); /* release held resources */ }; /*----- Preliminaries -----------------------------------------------------*/ @@ -92,52 +119,75 @@ static PRINTF_LIKE(1, 2) void debug(const char *fmt, ...) } } -/* --- @timer_diff@ --- * +/*----- Difference utilities ----------------------------------------------*/ + +#ifdef HAVE_UINT64 +# define FLOATK64(k) ((double)(k).i) +#else +# define FLOATK64(k) ((double)(k).lo + 4294967296.0*(double)(k).hi) +#endif + +/* --- @diff_ts@ --- * * - * Arguments: @struct bench_timing *delta_out@ = where to putt the result - * @const struct bench_time *t0, *t1@ = two times captured by a - * timer's @now@ function + * Arguments: @struct timer *t@ = timer structure + * @struct bench_timing *delta_inout@ = where to put the result + * @const struct time *t0, *t1@ = two input times * * Returns: --- * - * Use: Calculates the difference between two captured times. The - * flags are set according to whether the differences are - * meaningful; @delta_out->n@ is left unset. + * Use: Calculates a time difference for timers using the + * @struct timespec@-like time format. */ -static void timer_diff(struct bench_timing *delta_out, - const struct bench_time *t0, - const struct bench_time *t1) +static void diff_ts(struct timer *t, struct bench_timing *delta_inout, + const struct bench_time *t0, const struct bench_time *t1) { unsigned f = t0->f&t1->f; kludge64 k; -#ifdef HAVE_UINT64 -# define FLOATK64(k) ((double)(k).i) -#else -# define FLOATK64(k) ((double)(k).lo + 4275123318.0*(double)(k).hi) -#endif + if (f&BTF_TIMEOK) { - if (!(f&BTF_TIMEOK)) - delta_out->t = 0.0; - else { - SUB64(k, t1->s, t0->s); - delta_out->t = FLOATK64(k) - 1 + - (t1->ns + NS_PER_S - t0->ns)/(double)NS_PER_S; - } + /* Calculate the integer difference in seconds. */ + SUB64(k, t1->t.ts.s, t0->t.ts.s); - if (!(f&BTF_CYOK)) - delta_out->cy = 0.0; - else { - SUB64(k, t1->cy, t0->cy); - delta_out->cy = FLOATK64(k); + /* And apply the nanoseconds difference. To prevent underflow, + * pre-emptively borrow one from the integer difference. + */ + delta_inout->t = + FLOATK64(k) - 1.0 + + (t1->t.ts.ns + NS_PER_S - t0->t.ts.ns)/(double)NS_PER_S; + + /* Done. */ + delta_inout->f |= BTF_TIMEOK; } +} - delta_out->f = f; +/* --- @diff_cycles@ --- * + * + * Arguments: @struct timer *t@ = timer structure + * @struct bench_timing *delta_inout@ = where to put the result + * @const struct time *t0, *t1@ = two input times + * + * Returns: --- + * + * Use: Calculates a time difference for cycle-counting timers. + */ -#undef FLOATK64 +static void diff_cycles(struct timer *t, struct bench_timing *delta_inout, + const struct bench_time *t0, + const struct bench_time *t1) +{ + unsigned f = t0->f&t1->f; + kludge64 k; + + if (f&BTF_CYOK) { + SUB64(k, t1->cy, t0->cy); delta_inout->cy = FLOATK64(k); + delta_inout->f |= BTF_CYOK; + } } +#undef FLOATK64 + /*----- The null timer ----------------------------------------------------*/ /* This is a timer which does nothing, in case we don't have any better @@ -145,11 +195,16 @@ static void timer_diff(struct bench_timing *delta_out, */ static int null_init(struct timer *t) { return (0); } -static void null_now(struct bench_time *t_out, struct timer *t) { ; } +static int null_now(struct timer *t, struct bench_time *t_out, unsigned f) + { return (0); } +static void null_diff(struct timer *t, struct bench_timing *delta_inout, + const struct bench_time *t0, + const struct bench_time *t1) + { ; } static void null_teardown(struct timer *t) { ; } static const struct timer_ops null_ops = - { "null", 0, null_init, null_now, null_teardown }; + { "null", 0, null_init, null_now, null_diff, null_teardown }; #define NULL_ENT &null_ops, /*----- The broken clock --------------------------------------------------*/ @@ -161,7 +216,7 @@ static const struct timer_ops null_ops = static int broken_init(struct timer *t) { return (-1); } static const struct timer_ops broken_ops = - { "broken", TF_SECRET, broken_init, null_now, null_teardown }; + { "broken", TF_SECRET, broken_init, null_now, null_diff, null_teardown }; #define BROKEN_ENT &broken_ops, /*----- Linux performance counters ----------------------------------------*/ @@ -172,22 +227,48 @@ static const struct timer_ops broken_ops = #if defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64) -#include -#include +/* --- @perfevent_open@ --- * + * + * Arguments: --- + * + * Returns: File descriptor, or %$-1$%. + * + * Use: Open a performance measurement descriptor set up to count CPU + * cycles. + */ -#include -#include +static int perfevent_open(void) +{ + struct perf_event_attr attr = { 0 }; + int fd; -static void perfevent_now(struct bench_time *t_out, struct timer *t) + attr.type = PERF_TYPE_HARDWARE; + attr.size = sizeof(attr); + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.disabled = 0; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + + fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); + if (fd < 0) { + debug("couldn't open perf event: %s", strerror(errno)); + return (-1); + } + + return (fd); +} + +static int perfevent_now(struct timer *t, + struct bench_time *t_out, unsigned f) { ssize_t n; n = read(t->u_cy.fd, &t_out->cy.i, sizeof(t_out->cy.i)); if (n != sizeof(t_out->cy.i)) { debug("failed to read perf-event counter: %s", strerror(errno)); - return; + return (0); } - t_out->f |= BTF_CYOK; + t_out->f |= BTF_CYOK; return (0); } static void perfevent_teardown(struct timer *t) @@ -195,34 +276,201 @@ static void perfevent_teardown(struct timer *t) static int perfevent_init(struct timer *t) { - struct perf_event_attr attr = { 0 }; struct bench_time tm; + int fd = -1, rc; - attr.type = PERF_TYPE_HARDWARE; - attr.size = sizeof(attr); - attr.config = PERF_COUNT_HW_CPU_CYCLES; - attr.disabled = 0; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; + fd = perfevent_open(); if (!fd) { rc = -1; goto end; } - t->u_cy.fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); - if (t->u_cy.fd < 0) { - debug("couldn't open perf evvent: %s", strerror(errno)); - return (-1); + t->u_cy.fd = fd; tm.f = 0; perfevent_now(t, &tm, 0); + if (!(tm.f&BTF_CYOK)) { rc = -1; goto end; } + fd = -1; rc = 0; +end: + if (fd != -1) close(fd); + return (rc); +} + +static const struct timer_ops perfevent_ops = + { "linux-perf-read-hw-cycles", 0, + perfevent_init, perfevent_now, diff_cycles, perfevent_teardown }; +#define PERFEVENT_VANILLA_CYENT &perfevent_ops, + +# if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__)) + +/* Special syscall-free version for x86 using `rdpmc' instruction. * + * + * This is a bit weird because it does both kinds of measurement in a single + * operation. + */ + +static int perfevrdpmc_now(struct timer *t, + struct bench_time *t_out, unsigned f) +{ + const volatile struct perf_event_mmap_page *map = t->u_cy.pmc.map; + unsigned long long tsc = tsc, toff = toff, tenb = tenb; + unsigned long long cy = cy, cyoff = cyoff; + unsigned long long m, hi, lo; + unsigned tshift = tshift, tmult = tmult, q0, q1, ff; + + /* Repeat until we can complete this job without the buffer changing in the + * middle. + */ + q0 = map->lock; + __atomic_thread_fence(__ATOMIC_ACQ_REL); + for (;;) { + ff = 0; + + /* Read the passage-of-time information. */ + if (map->cap_user_time) { + tenb = map->time_enabled; + tsc = __builtin_ia32_rdtsc(); + tshift = map->time_shift; + tmult = map->time_mult; + toff = map->time_offset; + ff |= BTF_TIMEOK; + } + + /* Read the performance-counter information. */ + if (map->cap_user_rdpmc) { + cy = __builtin_ia32_rdpmc(map->index - 1); + cyoff = map->offset; + ff |= BTF_CYOK; + } + + /* Check the sequence number again. */ + __atomic_thread_fence(__ATOMIC_ACQ_REL); + q1 = map->lock; + if (q0 == q1) break; + q0 = q1; + } + + if (ff&BTF_TIMEOK) { + /* We have a raw reference-cycle count %$n$% (@tsc@), and parameters + * %$a$%, %$w$% and %$t_0$%, such that %$a n/2^w + t_0$% gives a time in + * nanoseconds. + */ + + m = (1ull << tshift) - 1; + hi = tsc >> tshift; lo = tsc&m; + t_out->t.rawns.i = hi*tmult + (lo*tmult >> tshift) + toff + tenb; + t_out->f |= BTF_TIMEOK; } - tm.f = 0; perfevent_now(&tm, t); - if (!(tm.f&BTF_CYOK)) { close(t->u_cy.fd); return (-1); } + if (ff&BTF_CYOK) { + /* We have the cycle count. */ + t_out->cy.i = cy + cyoff; + t_out->f |= BTF_CYOK; + } return (0); } -static const struct timer_ops perfevent_ops = - { "linux-perf-hw-cycles", 0, - perfevent_init, perfevent_now, perfevent_teardown }; +static void perfevrdpmc_diff(struct timer *t, + struct bench_timing *delta_inout, + const struct bench_time *t0, + const struct bench_time *t1) +{ + unsigned f = t0->f&t1->f; -# define PERFEVENT_CYENT &perfevent_ops, + if (f&BTF_TIMEOK) { + delta_inout->t = (t1->t.rawns.i - t0->t.rawns.i)/(double)NS_PER_S; + delta_inout->f |= BTF_TIMEOK; + } + + if (f&BTF_CYOK) { + delta_inout->cy = t1->cy.i - t0->cy.i; + delta_inout->f |= BTF_CYOK; + } +} + +static void perfevrdpmc_teardown(struct timer *t) + { munmap((/*unconst unvolatile*/ void *)t->u_cy.pmc.map, t->u_cy.pmc.sz); } + +static int perfevrdpmc_cyinit(struct timer *t) +{ + const volatile struct perf_event_mmap_page *map = 0; + unsigned a, b, c, d, q0, q1, f; + int pgsz, mapsz, fd = -1, rc; + + /* We need `rdtsc' to do the passage-of-time measurement. */ + if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC)) + { debug("no `rdtsc' instrunction"); return (-1); } + + /* The rules say we must allocate %$1 + 2^n$% pages, so we need to know how + * big a page is. + */ + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz < 0) { + debug("failed to discover page size!: %s", strerror(errno)); + rc = -1; goto end; + } + + /* Open the measurement descriptor and map it. */ + fd = perfevent_open(); if (!fd) return (-1); + mapsz = 2*pgsz; + map = mmap(0, mapsz, PROT_READ, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + debug("failed to map perf event: %s", strerror(errno)); + return (-1); + } + + /* Check that it's revealed the necessary information. */ + q0 = map->lock; + __atomic_thread_fence(__ATOMIC_ACQ_REL); + for (;;) { + f = 0; + if (map->cap_user_time) f |= BTF_TIMEOK; + if (map->cap_user_rdpmc) f |= BTF_CYOK; + __atomic_thread_fence(__ATOMIC_ACQ_REL); + q1 = map->lock; + if (q0 == q1) break; + q0 = q1; + } + if (!(f&BTF_TIMEOK)) + { debug("kernel refused user time measurement"); rc = -1; goto end; } + if (!(f&BTF_TIMEOK)) + { debug("kernel refused user cycle measurement"); rc = -1; goto end; } + + /* All done. We can close the descriptor here: the mapping will keep the + * performance-measurement machinery alive. + */ + t->u_cy.pmc.map = map; t->u_cy.pmc.sz = mapsz; map = 0; rc = 0; +end: + if (fd != -1) close(fd); + if (map) munmap((/*unconst unvolatile*/ void *)map, mapsz); + return (rc); +} + +static const struct timer_ops perfevrdpmc_cyops = + { "linux-x86-perf-rdpmc-hw-cycles", 0, + perfevrdpmc_cyinit, perfevrdpmc_now, + perfevrdpmc_diff, perfevrdpmc_teardown }; + +static int perfevrdpmc_clkinit(struct timer *t) +{ + if (t->ops[CLK] != &perfevrdpmc_cyops) { + debug("linux-x86-perf-rdpmc-hw-cycles not set as cycle subtimer"); + return(-1); + } + return (0); +} + +static const struct timer_ops perfevrdpmc_clkops = + { "linux-x86-perf-rdpmc-hw-cycles", 0, + perfevrdpmc_clkinit, null_now, + null_diff, null_teardown }; + +# define PERFEVENT_RDPMC_CLKENT &perfevrdpmc_clkops, +# define PERFEVENT_RDPMC_CYENT &perfevrdpmc_cyops, + +# else +# define PERFEVENT_RDPMC_CLKENT +# define PERFEVENT_RDPMC_CYENT +# endif + +# define PERFEVENT_CLKENT PERFEVENT_RDPMC_CLKENT +# define PERFEVENT_CYENT PERFEVENT_RDPMC_CYENT PERFEVENT_VANILLA_CYENT #else +# define PERFEVENT_CLKENT # define PERFEVENT_CYENT #endif @@ -233,14 +481,11 @@ static const struct timer_ops perfevent_ops = * CPU frequency adjustments. */ -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#if GCC_VERSION_P(4, 5) && (defined(__i386__) || defined(__x86_64__)) -#include - -#define CPUID_1D_TSC (1u << 4) - -static void x86rdtsc_now(struct bench_time *t_out, struct timer *t) - { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; } +static int x86rdtsc_now(struct timer *t, + struct bench_time *t_out, unsigned f) + { t_out->cy.i = __builtin_ia32_rdtsc(); t_out->f |= BTF_CYOK; return (0); } static int x86rdtsc_init(struct timer *t) { @@ -248,13 +493,44 @@ static int x86rdtsc_init(struct timer *t) if (!__get_cpuid(1, &a, &b, &c, &d) || !(d&CPUID_1D_TSC)) { debug("no `rdtsc' instrunction"); return (-1); } + t->u_cy.tscaux = ~0u; + return (0); +} + +static int x86rdtscp_now(struct timer *t, + struct bench_time *t_out, unsigned f) +{ + unsigned tscaux; + unsigned long long n; + + n = __builtin_ia32_rdtscp(&tscaux); + if (!(f&BTF_T1)) + t->u_cy.tscaux = tscaux; + else if (t->u_cy.tscaux != tscaux) { + debug("tscaux mismatch: new 0x%08x /= old 0x%08x", + tscaux, t->u_cy.tscaux); + return (-1); + } + t_out->cy.i = n; t_out->f |= BTF_CYOK; return (0); +} + +static int x86rdtscp_init(struct timer *t) +{ + unsigned a, b, c, d; + + if (!__get_cpuid(0x80000001, &a, &b, &c, &d) || !(d&CPUID_1xD_TSCP)) + { debug("no `rdtscp' instrunction"); return (-1); } return (0); } static const struct timer_ops x86rdtsc_ops = - { "x86-rdtsc", 0, x86rdtsc_init, x86rdtsc_now, null_teardown }; + { "x86-rdtsc", 0, + x86rdtsc_init, x86rdtsc_now, diff_cycles, null_teardown }; +static const struct timer_ops x86rdtscp_ops = + { "x86-rdtscp", 0, + x86rdtscp_init, x86rdtscp_now, diff_cycles, null_teardown }; -# define X86RDTSC_CYENT &x86rdtsc_ops, +# define X86RDTSC_CYENT &x86rdtscp_ops, &x86rdtsc_ops, #else # define X86RDTSC_CYENT #endif @@ -267,26 +543,27 @@ static const struct timer_ops x86rdtsc_ops = #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_THREAD_CPUTIME_ID) -static void gettime_now(struct bench_time *t_out, struct timer *t) +static int gettime_now(struct timer *t, struct bench_time *t_out, unsigned f) { struct timespec now; if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now)) - { debug("error reading POSIX clock: %s", strerror(errno)); return; } - ASSIGN64(t_out->s, now.tv_sec); t_out->ns = now.tv_nsec; - t_out->f |= BTF_TIMEOK; + { debug("error reading POSIX clock: %s", strerror(errno)); return (0); } + ASSIGN64(t_out->t.ts.s, now.tv_sec); t_out->t.ts.ns = now.tv_nsec; + t_out->f |= BTF_TIMEOK; return (0); } static int gettime_init(struct timer *t) { struct bench_time tm; - tm.f = 0; gettime_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1); + tm.f = 0; gettime_now(t, &tm, 0); if (!tm.f&BTF_TIMEOK) return (-1); return (0); } static const struct timer_ops gettime_ops = - { "posix-thread-cputime", 0, gettime_init, gettime_now, null_teardown }; + { "posix-thread-cputime", 0, + gettime_init, gettime_now, diff_ts, null_teardown }; # define GETTIME_CLKENT &gettime_ops, #else @@ -299,39 +576,40 @@ static const struct timer_ops gettime_ops = * guaranteed to be available, though it's not likely to be very good. */ -static void clock_now(struct bench_time *t_out, struct timer *t) +static int clock_now(struct timer *t, struct bench_time *t_out, unsigned f) { - clock_t now, x; - unsigned long s; uint32 ns; + clock_t now; now = clock(); if (now == (clock_t)-1) { debug("error reading standard clock: %s", strerror(errno)); - return; + return (0); } - x = now/CLOCKS_PER_SEC; - if (x > ULONG_MAX) { debug("standard clock out of range"); return; } - - s = x; x = now - CLOCKS_PER_SEC*s; - if (!(NS_PER_S%CLOCKS_PER_SEC)) - ns = x*(NS_PER_S/CLOCKS_PER_SEC); - else if (NS_PER_S <= ULONG_MAX/CLOCKS_PER_SEC) - ns = (x*NS_PER_S)/CLOCKS_PER_SEC; - else - ns = x*((NS_PER_S + 0.0)/CLOCKS_PER_SEC); - ASSIGN64(t_out->s, s); t_out->ns = ns; t_out->f |= BTF_TIMEOK; + t_out->t.clk = now; t_out->f |= BTF_TIMEOK; return (0); +} + +static void clock_diff(struct timer *t, struct bench_timing *delta_inout, + const struct bench_time *t0, + const struct bench_time *t1) +{ + unsigned f = t0->f&t1->f; + + if (f&BTF_TIMEOK) { + delta_inout->t = (t1->t.clk - t0->t.clk)/(double)CLOCKS_PER_SEC; + delta_inout->f |= BTF_TIMEOK; + } } static int clock_init(struct timer *t) { struct bench_time tm; - tm.f = 0; clock_now(&tm, t); if (!tm.f&BTF_TIMEOK) return (-1); + tm.f = 0; clock_now(t, &tm, 0); if (!tm.f&BTF_TIMEOK) return (-1); return (0); } static const struct timer_ops clock_ops = - { "stdc-clock", 0, clock_init, clock_now, null_teardown }; + { "stdc-clock", 0, clock_init, clock_now, clock_diff, null_teardown }; #define CLOCK_CLKENT &clock_ops, @@ -339,8 +617,16 @@ static const struct timer_ops clock_ops = /* Tables of timing sources. */ static const struct timer_ops - *const clktab[] = { GETTIME_CLKENT CLOCK_CLKENT BROKEN_ENT 0 }, - *const cytab[] = { PERFEVENT_CYENT X86RDTSC_CYENT NULL_ENT BROKEN_ENT 0 }; + *const clktab[] = { PERFEVENT_CLKENT + GETTIME_CLKENT + CLOCK_CLKENT + BROKEN_ENT + 0 }, + *const cytab[] = { PERFEVENT_CYENT + X86RDTSC_CYENT + NULL_ENT + BROKEN_ENT + 0 }; static const struct timertab { const char *what; @@ -444,12 +730,27 @@ static void timer_describe(struct bench_timer *tm, dstr *d) } } -static void timer_now(struct bench_timer *tm, struct bench_time *t_out) +static int timer_now(struct bench_timer *tm, + struct bench_time *t_out, unsigned f) { struct timer *t = (struct timer *)tm; unsigned i; - for (i = 0; i < NTIMER; i++) t->ops[i]->now(t_out, t); + t_out->f = 0; + for (i = 0; i < NTIMER; i++) if (t->ops[i]->now(t, t_out, f)) return (-1); + return (0); +} + +static void timer_diff(struct bench_timer *tm, + struct bench_timing *t_out, + const struct bench_time *t0, + const struct bench_time *t1) +{ + struct timer *t = (struct timer *)tm; + unsigned i; + + t_out->f = 0; + for (i = 0; i < NTIMER; i++) t->ops[i]->diff(t, t_out, t0, t1); } static void timer_destroy(struct bench_timer *tm) @@ -464,7 +765,7 @@ static void timer_destroy(struct bench_timer *tm) } static const struct bench_timerops timer_ops = - { timer_describe, timer_now, timer_destroy }; + { timer_describe, timer_now, timer_diff, timer_destroy }; /* --- @bench_createtimer@ --- * * @@ -564,7 +865,7 @@ struct bench_timer *bench_createtimer(const char *config) for (i = 0; i < NTIMER; i++) t->ops[i] = 0; /* Try to set up the subtimers. */ - for (i = 0; i < NTIMER; i++) + for (i = NTIMER; i--; ) if (select_timer(t, i, tmconf[i].p, tmconf[i].sz)) goto end; /* All is done. */ @@ -638,6 +939,48 @@ void bench_destroy(struct bench_state *b) static void do_nothing(unsigned long n, void *ctx) { while (n--) RELAX; } +/* --- @measure@ --- * + * + * Arguments: @struct bench_state *b@ = bench state + * @struct bench_timing *delta_out@ = where to leave the timing + * @bench_fn *fn@ = function to measure + * @void *ctx@ = context for the function + * @double n@ = number of iterations + * + * Returns: --- + * + * Use: Run the function @n@ times, and report how long it took. + * + * This function deals with retrying the measurements if the + * timer reports a temporary failure, and all of the + * difficulties if @n@ is too large to fit in a machine integer. + */ + +static void measure(struct bench_state *b, struct bench_timing *delta_out, + bench_fn *fn, void *ctx, double n) +{ + struct bench_timer *tm = b->tm; + struct bench_time t0, t1; + unsigned long n0, n1; + double R = ULONG_MAX; + + if (n <= R) { + n0 = n; + do { + while (tm->ops->now(tm, &t0, BTF_T0)); + fn(n0, ctx); + } while (tm->ops->now(tm, &t1, BTF_T1)); + } else { + n1 = n/R; n0 = n - n1*R; + do { + while (tm->ops->now(tm, &t0, BTF_T0)); + while (n1--) fn(ULONG_MAX, ctx); + fn(n0, ctx); + } while (tm->ops->now(tm, &t1, BTF_T1)); + } + tm->ops->diff(tm, delta_out, &t0, &t1); +} + /* --- @bench_calibrate@ --- * * * Arguments: @struct bench_state *b@ = bench state @@ -653,14 +996,10 @@ static void do_nothing(unsigned long n, void *ctx) int bench_calibrate(struct bench_state *b) { struct linreg lr_clk = LINREG_INIT, lr_cy = LINREG_INIT; - unsigned long n; - unsigned i; - struct bench_timer *tm = b->tm; - struct bench_time t0, t1; struct bench_timing delta; - double r; + double n, r; bench_fn *fn = LAUNDER(&do_nothing); - unsigned f = BTF_ANY; + unsigned i, f = BTF_ANY; int rc; /* The model here is that a timing loop has a fixed overhead as we enter @@ -673,28 +1012,26 @@ int bench_calibrate(struct bench_state *b) if (b->f&BTF_CLB) return (b->f&BTF_ANY ? 0 : -1); /* Exercise the inner loop a few times to educate the branch predictor. */ - for (i = 0; i < 10; i++) - { tm->ops->now(tm, &t0); fn(50, 0); tm->ops->now(tm, &t1); } + for (i = 0; i < 50; i++) measure(b, &delta, fn, 0, 10000); /* Now we measure idle loops until they take sufficiently long -- or we run * out of counter. */ debug("calibrating..."); - n = 1; + n = 1.0; for (;;) { /* Measure @n@ iterations of the idle loop. */ - tm->ops->now(tm, &t0); fn(n, 0); tm->ops->now(tm, &t1); - timer_diff(&delta, &t0, &t1); f &= delta.f; + measure(b, &delta, fn, 0, n); f &= delta.f; if (!(f&BTF_TIMEOK)) { rc = -1; goto end; } /* Register the timings with the regression machinery. */ linreg_update(&lr_clk, n, delta.t); if (!(f&BTF_CYOK)) - debug(" n = %10lu; t = %12g s", n, delta.t); + debug(" n = %10.0f; t = %12g s", n, delta.t); else { linreg_update(&lr_cy, n, delta.cy); - debug(" n = %10lu; t = %12g s, cy = %10.0f", n, delta.t, delta.cy); + debug(" n = %10.0f; t = %12g s, cy = %10.0f", n, delta.t, delta.cy); } /* If we're done then stop. */ @@ -702,7 +1039,7 @@ int bench_calibrate(struct bench_state *b) if (n >= ULONG_MAX - n/3) break; /* Update the counter and continue. */ - n += n/3 + 1; + n += n/3.0 + 1.0; } /* Now run the linear regression to extract the constant and per-iteration @@ -744,9 +1081,7 @@ end: int bench_measure(struct bench_state *b, struct bench_timing *t_out, double base, bench_fn *fn, void *ctx) { - struct bench_timer *tm = b->tm; - struct bench_time t0, t1; - unsigned long n, nn; + double n, nn; /* Make sure the state is calibrated and usable. */ if (!(b->f&BTF_CLB) && bench_calibrate(b)) return (-1); @@ -764,16 +1099,18 @@ int bench_measure(struct bench_state *b, struct bench_timing *t_out, * hand, if %$T/t < 1 + 1/n$% then %$t (n + 1)/n > T$%, so just trying * again with %$n' = n + 1$% iterations will very likely work. */ - debug("measuring..."); n = 1; + debug("measuring..."); n = 1.0; for (;;) { - tm->ops->now(tm, &t0); fn(n, ctx); tm->ops->now(tm, &t1); - timer_diff(t_out, &t0, &t1); + measure(b, t_out, fn, ctx, n); t_out->f &= b->f; if (!(t_out->f&BTF_TIMEOK)) return (-1); - if (!(t_out->f&BTF_CYOK)) debug(" n = %10lu; t = %12g", n, t_out->t); - else debug(" n = %10lu; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy); + if (!(t_out->f&BTF_CYOK)) + debug(" n = %10.0f; t = %12g", n, t_out->t); + else + debug(" n = %10.0f; t = %12g, cy = %10.0f", n, t_out->t, t_out->cy); + if (t_out->t >= 0.707*b->target_s) break; nn = n*b->target_s/t_out->t; - if (nn > n) n = nn; + if (n > ULONG_MAX || nn > (unsigned long)n + 1) n = nn; else n++; } diff --git a/test/bench.h b/test/bench.h index 0645068..2484584 100644 --- a/test/bench.h +++ b/test/bench.h @@ -34,6 +34,8 @@ /*----- Header files ------------------------------------------------------*/ +#include + #ifndef MLIB_BITS_H # include "bits.h" #endif @@ -49,7 +51,11 @@ struct bench_time { #define BTF_TIMEOK 1u /* @s@ ad @ns@ slots are value */ #define BTF_CYOK 2u /* @cy@ slot is valid */ #define BTF_ANY (BTF_TIMEOK | BTF_CYOK) /* some part is useful */ - kludge64 s; uint32 ns; /* real time, seconds and nanos */ + union { + struct { kludge64 s; uint32 ns; } ts; /* @struct timespec@-ish */ + clock_t clk; /* @clock@ */ + kludge64 rawns; /* raw nanosecond count */ + } t; /* time */ kludge64 cy; /* count of CPU cycles */ }; @@ -64,8 +70,21 @@ struct bench_timerops { void (*describe)(struct bench_timer */*bt*/, dstr */*d*/); /* Write a description of the timer to @d@. */ - void (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/); - /* Fill in @*t_out@ with the current time. v*/ + int (*now)(struct bench_timer */*bt*/, struct bench_time */*t_out*/, + unsigned /*f*/); +#define BTF_T0 0u /* fetching first time of a pair */ +#define BTF_T1 1u /* fetching second time of a pair */ + /* Fill in @*t_out@ with the current time. Return zero on success + * %%\emph{or} permanent failure; return %$-1$% on temporary failure. + */ + + void (*diff)(struct bench_timer */*bt*/, + struct bench_timing */*delta_out*/, + const struct bench_time */*t0*/, + const struct bench_time */*t1*/); + /* Subtract the time @t0@ from the time @t1@, leaving the result in + * @*delta_out@, setting flags as appropriate. + */ void (*destroy)(struct bench_timer */*bt*/); /* Release the timer and any resources it holds. */ diff --git a/test/tvec-bench.c b/test/tvec-bench.c index bff3c3a..f61d5e1 100644 --- a/test/tvec-bench.c +++ b/test/tvec-bench.c @@ -287,7 +287,7 @@ static int setvar(struct tvec_state *tv, const char *var, struct tvec_benchctx *bc = ctx; if (STRCMP(var, ==, "@target")) { - if (bc->f&TVBF_SETTRG) return (tvec_dupreg(tv, var)); + if (bc->f&TVBF_SETTRG) return (tvec_dupregerr(tv, var)); bc->bst->target_s = rv->f; bc->f |= TVBF_SETTRG; } else assert("unknown var"); return (0); @@ -385,6 +385,7 @@ void tvec_benchrun(struct tvec_state *tv, tvec_testfn *fn, void *ctx) rd->ty->dump(&TVEC_REG(tv, in, rd->i)->v, rd, TVSF_COMPACT, &dstr_printops, &d); } + DPUTZ(&d); /* Run the benchmark. */ o->ops->bbench(o, d.buf, unit); diff --git a/test/tvec-core.c b/test/tvec-core.c index e4f9f84..fc1b413 100644 --- a/test/tvec-core.c +++ b/test/tvec-core.c @@ -318,7 +318,7 @@ int tvec_syntax_v(struct tvec_state *tv, int ch, dstr_destroy(&d); return (-1); } -/* --- @tvec_unkreg@ --- * +/* --- @tvec_unkregerr@ --- * * * Arguments: @struct tvec_state *tv@ = test-vector state * @const char *name@ = register or pseudoregister name @@ -329,13 +329,13 @@ int tvec_syntax_v(struct tvec_state *tv, int ch, * unrecognized. */ -int tvec_unkreg(struct tvec_state *tv, const char *name) +int tvec_unkregerr(struct tvec_state *tv, const char *name) { return (tvec_error(tv, "unknown special register `%s' for test `%s'", name, tv->test->name)); } -/* --- @tvec_dupreg@ --- * +/* --- @tvec_dupregerr@ --- * * * Arguments: @struct tvec_state *tv@ = test-vector state * @const char *name@ = register or pseudoregister name @@ -346,7 +346,7 @@ int tvec_unkreg(struct tvec_state *tv, const char *name) * assigned already in the current test. */ -int tvec_dupreg(struct tvec_state *tv, const char *name) +int tvec_dupregerr(struct tvec_state *tv, const char *name) { return (tvec_error(tv, "register `%s' is already set", name)); } /* --- @tvec_skipspc@ --- * @@ -956,7 +956,7 @@ static int core_setvar(struct tvec_state *tv, const char *name, struct groupstate *g = ctx; if (STRCMP(name, ==, "@outcome")) { - if (g->f&GRPF_SETOUTC) return (tvec_dupreg(tv, name)); + if (g->f&GRPF_SETOUTC) return (tvec_dupregerr(tv, name)); if (rv->u == XFAIL) tvec_xfail(tv); g->f |= GRPF_SETOUTC; } else assert(!"unknown var"); @@ -1113,7 +1113,7 @@ int tvec_read(struct tvec_state *tv, const char *infile, FILE *fp) vd = env->findvar(tv, d.buf, &varctx, g.ctx); if (vd) goto found_var; } - tvec_unkreg(tv, d.buf); goto flush_line; + tvec_unkregerr(tv, d.buf); goto flush_line; found_var: rd = &vd->def; } else { @@ -1130,7 +1130,7 @@ int tvec_read(struct tvec_state *tv, const char *infile, FILE *fp) /* Complain if the register is already set. */ r = TVEC_REG(tv, in, rd->i); if (r->f&TVRF_SEEN) - { tvec_dupreg(tv, rd->name); goto flush_line; } + { tvec_dupregerr(tv, rd->name); goto flush_line; } } /* If there's no test, then report an error. Set the muffle flag, diff --git a/test/tvec-env.3.in b/test/tvec-env.3.in index eb71889..8508bd6 100644 --- a/test/tvec-env.3.in +++ b/test/tvec-env.3.in @@ -49,8 +49,8 @@ .\" @tvec_report_v .\" @tvec_error .\" @tvec_notice -.\" @tvec_unkreg -.\" @tvec_dupreg +.\" @tvec_unkregerr +.\" @tvec_dupregerr . .\" @tvec_serialize .\" @tvec_deserialize @@ -163,8 +163,8 @@ tvec-env \- test vector framework environments .BI " const char *" msg ", va_list *" ap ); .BI "int tvec_error(struct tvec_state *" tv ", const char *" msg ", ...);" .BI "void tvec_notice(struct tvec_state *" tv ", const char *" msg ", ...);" -.BI "int tvec_unkreg(struct tvec_state *" tv ", const char *" name ); -.BI "int tvec_dupreg(struct tvec_state *" tv ", const char *" name ); +.BI "int tvec_unkregerr(struct tvec_state *" tv ", const char *" name ); +.BI "int tvec_dupregerr(struct tvec_state *" tv ", const char *" name ); .PP .ta \w'\fBint tvec_serialize('u .BI "int tvec_serialize(const struct tvec_reg *" rv ", buf *" b , diff --git a/test/tvec-remote.c b/test/tvec-remote.c index 3c25282..3087d1b 100644 --- a/test/tvec-remote.c +++ b/test/tvec-remote.c @@ -700,7 +700,7 @@ int tvec_remoteserver(int infd, int outfd, const struct tvec_config *config) vd = env->findvar(&srvtv, d.buf, &varctx, ctx); if (vd) goto found_var; } - rc = tvec_unkreg(&srvtv, d.buf); goto setvar_end; + rc = tvec_unkregerr(&srvtv, d.buf); goto setvar_end; found_var: /* Set up the register. */ @@ -1929,15 +1929,15 @@ static int setvar_local(struct tvec_state *tv, const char *var, struct tvec_remotectx *r = ctx; if (STRCMP(var, ==, "@exit")) { - if (r->rc.f&TVRF_SETEXIT) return (tvec_dupreg(tv, var)); + if (r->rc.f&TVRF_SETEXIT) return (tvec_dupregerr(tv, var)); r->exwant = rv->u; r->rc.f |= TVRF_SETEXIT; return (0); } else if (STRCMP(var, ==, "@progress")) { - if (r->rc.f&TVRF_SETPRG) return (tvec_dupreg(tv, var)); + if (r->rc.f&TVRF_SETPRG) return (tvec_dupregerr(tv, var)); DRESET(&r->prgwant); DPUTM(&r->prgwant, rv->text.p, rv->text.sz); DPUTZ(&r->prgwant); r->rc.f |= TVRF_SETPRG; return (0); } else if (STRCMP(var, ==, "@reconnect")) { - if (r->rc.f&TVRF_SETRCN) return (tvec_dupreg(tv, var)); + if (r->rc.f&TVRF_SETRCN) return (tvec_dupregerr(tv, var)); r->rc.f = (r->rc.f&~TVRF_RCNMASK) | (rv->u&TVRF_RCNMASK) | TVRF_SETRCN; return (0); } else assert(!"unknown var"); diff --git a/test/tvec-timeout.c b/test/tvec-timeout.c index f2c9208..f1b0b0c 100644 --- a/test/tvec-timeout.c +++ b/test/tvec-timeout.c @@ -107,10 +107,10 @@ static int setvar(struct tvec_state *tv, const char *var, struct tvec_timeoutctx *tc = ctx; if (STRCMP(var, ==, "@timeout")) { - if (tc->f&TVTF_SETTMO) return (tvec_dupreg(tv, var)); + if (tc->f&TVTF_SETTMO) return (tvec_dupregerr(tv, var)); tc->t = rv->f; tc->f |= TVTF_SETTMO; } else if (STRCMP(var, ==, "@timer")) { - if (tc->f&TVTF_SETTMR) return (tvec_dupreg(tv, var)); + if (tc->f&TVTF_SETTMR) return (tvec_dupregerr(tv, var)); tc->timer = rv->i; tc->f |= TVTF_SETTMR; } else assert(!"unknown var"); return (0); diff --git a/test/tvec.3.in b/test/tvec.3.in index bc254d2..90e7781 100644 --- a/test/tvec.3.in +++ b/test/tvec.3.in @@ -301,3 +301,15 @@ the corresponding .I value is stored in the named register. .PP +A test environment fits in between +the framework and the test function. +It can establish hook functions which are called +at various stages during the test group. +.hP \*o +The +.I setup +hook is called once at the start of the test group. +.hP \*o +The +.I teardown +hook is called once at the end of the test group. diff --git a/test/tvec.h b/test/tvec.h index 3a18fc5..f7512bd 100644 --- a/test/tvec.h +++ b/test/tvec.h @@ -1204,8 +1204,8 @@ extern tvec_envteardownfn tvec_benchteardown; * Returns: --- * * Use: Formats a report about the benchmark performance. This - * function is intended to be called on by an output - * @ebench@ function. + * function is intended to be called on by an output @ebench@ + * function. */ extern void tvec_benchreport @@ -1493,7 +1493,7 @@ extern PRINTF_LIKE(2, 3) extern PRINTF_LIKE(2, 3) void tvec_notice(struct tvec_state */*tv*/, const char */*msg*/, ...); -/* --- @tvec_unkreg@ --- * +/* --- @tvec_unkregerr@ --- * * * Arguments: @struct tvec_state *tv@ = test-vector state * @const char *name@ = register or pseudoregister name @@ -1504,9 +1504,9 @@ extern PRINTF_LIKE(2, 3) * unrecognized. */ -extern int tvec_unkreg(struct tvec_state */*tv*/, const char */*name*/); +extern int tvec_unkregerr(struct tvec_state */*tv*/, const char */*name*/); -/* --- @tvec_dupreg@ --- * +/* --- @tvec_dupregerr@ --- * * * Arguments: @struct tvec_state *tv@ = test-vector state * @const char *name@ = register or pseudoregister name @@ -1517,7 +1517,7 @@ extern int tvec_unkreg(struct tvec_state */*tv*/, const char */*name*/); * assigned already in the current test. */ -extern int tvec_dupreg(struct tvec_state */*tv*/, const char */*name*/); +extern int tvec_dupregerr(struct tvec_state */*tv*/, const char */*name*/); /* --- @tvec_humanoutput@ --- * * @@ -1545,16 +1545,17 @@ extern struct tvec_output *tvec_humanoutput(FILE */*fp*/); * (`Test Anything Protocol') format. * * TAP comes from the Perl community, but has spread rather - * further. This driver produces TAP version 14, but pretends - * to be version 13. The driver produces a TAP `test point' -- - * i.e., a result reported as `ok' or `not ok' -- for each input - * test group. Failure reports and register dumps are produced - * as diagnostic messages before the final group result. (TAP - * permits structuerd YAML data after the test-point result, - * which could be used to report details, but (a) postponing the - * details until after the report is inconvenient, and (b) there - * is no standardization for the YAML anyway, so in practice - * it's no more useful than the unstructured diagnostics. + * further. This driver currently produces TAP version 14, but + * pretends to be version 13. The driver produces a TAP `test + * point' -- i.e., a result reported as `ok' or `not ok' -- for + * each input test group. Failure reports and register dumps + * are produced as diagnostic messages before the final group + * result. (TAP permits structuerd YAML data after the + * test-point result, which could be used to report details, but + * (a) postponing the details until after the report is + * inconvenient, and (b) there is no standardization for the + * YAML anyway, so in practice it's no more useful than the + * unstructured diagnostics. */ extern struct tvec_output *tvec_tapoutput(FILE */*fp*/); diff --git a/utils/macros.3.in b/utils/macros.3.in index 6bb18d4..91052c2 100644 --- a/utils/macros.3.in +++ b/utils/macros.3.in @@ -33,6 +33,8 @@ .\" @STR .\" @GLUE .\" @STATIC_ASSERT +.\" COMMA +. .\" @ISALNUM .\" @ISALPHA .\" @ISASCII @@ -49,18 +51,24 @@ .\" @TOASCII .\" @TOLOWER .\" @TOUPPER +. .\" @MEMCMP .\" @STRCMP .\" @STRNCMP +. .\" @DISCARD .\" @IGNORE +.\" @LAUNDER +.\" @RELAX +. .\" @DEPRECATED -.\" @EXECL_LIKE .\" @IGNORABLE .\" @MUST_CHECK .\" @NORETURN .\" @PRINTF_LIKE .\" @SCANF_LIKE +.\" @EXECL_LIKE +. .\" @MUFFLE_WARNINGS_DECL .\" @MUFFLE_WARNINGS_EXPR .\" @MUFFLE_WARNINGS_STMT @@ -147,7 +155,7 @@ preprocessing token. .PP The .B STATIC_ASSERT -causes compilation to fail if the integer constant expression +macro causes compilation to fail if the integer constant expression .I cond evaluates to zero. This macro uses the C11 .B static_assert @@ -158,6 +166,13 @@ falls back to a somewhat ugly hack which currently ignores the .IR msg . .PP The +.B COMMA +macro expands to a comma +.BR ` , ', +which is useful for smuggling commas into macro arguments +if they can't be protected by parentheses. +.PP +The .BR IS ...\& and .BR TO ...\& @@ -201,6 +216,17 @@ The .B IGNORE macro ignores its argument, which may be an expression of any type. This can be useful in muffling warnings about unused variables. +.PP +The +.B LAUNDER +macro tries to confuse a compiler so that it `forgets' what it knows +about a particular value. This is most useful in benchmarking or +similar applications. +.PP +The +.B RELAX +macro tries do nothing, but in a way that a compiler won't optimize +away. . .SS Annotations The following annotations can be attached to function declarations and diff --git a/utils/macros.h b/utils/macros.h index 95d0ddb..dba22a7 100644 --- a/utils/macros.h +++ b/utils/macros.h @@ -42,14 +42,47 @@ /*----- Miscellaneous utility macros --------------------------------------*/ +/* --- @N@ --- * + * + * Arguments: @type v[]@ = an actual array, not a pointer + * + * Returns: The number of elements in @v@. + */ + #define N(v) (sizeof(v)/sizeof(*(v))) +/* --- @STR@ --- * + * + * Arguments: @x@ = some tokens + * + * Returns: A string literal containing the macro-expanded text of @x@. + */ + #define MLIB__STR(x) #x #define STR(x) MLIB__STR(x) +/* --- @GLUE@ --- * + * + * Arguments: @x, y@ = two sequences of tokens + * + * Returns: A single token formed by gluing together the macro-expansions + * of @x@ and @y@. + */ + #define MLIB__GLUE(x, y) x##y #define GLUE(x, y) MLIB__GLUE(x, y) +/* --- @STATIC_ASSERT@ --- * + * + * Arguments: @int cond@ = a condition + * @msg@ = a string literal message + * + * Returns: --- + * + * Use: Fail at compile time unless @cond@ is nonzero. The failure + * might report @msg@. + */ + #ifdef static_assert # define STATIC_ASSERT(cond, msg) static_assert(!!(cond), msg) #else @@ -57,10 +90,31 @@ IGNORABLE extern char static_assert_failed[2*!!(cond) - 1] #endif +/* --- @COMMA@ --- * + * + * Arguments: --- + * + * Returns: A `%|,|%' token, which can be usefully passed to macros to + * avoid argument splitting. + */ + #define COMMA , /*----- String and character hacks ----------------------------------------*/ +/* --- @IS...@ --- * + * + * Arguments: @int ch@ = a character code, but not @EOF@ + * + * Returns: Nonzero if @ch@ is in the relevant @@ category. + * + * Use: Classifies characters, but safely even if characters are + * signed. + * + * There is a macro for each of the @@ @is...@ + * functions. + */ + #define CTYPE_HACK(func, ch) (func((unsigned char)(ch))) #define ISALNUM(ch) CTYPE_HACK(isalnum, ch) @@ -77,17 +131,47 @@ #define ISUPPER(ch) CTYPE_HACK(isupper, ch) #define ISXDIGIT(ch) CTYPE_HACK(isxdigit, ch) +/* --- @TO...@ --- * + * + * Arguments: @int ch@ = a character code, but not @EOF@ + * + * Returns: The converted character code. + * + * Use: Converts characters, but safely even if characters are + * signed. + * + * There is a macro for each of the @@ @to...@ + * functions. + */ + #define TOASCII(ch) CTYPE_HACK(toascii, ch) #define TOLOWER(ch) CTYPE_HACK(tolower, ch) #define TOUPPER(ch) CTYPE_HACK(toupper, ch) +/* --- @MEMCMP@, @STRCMP@, @STRNCMP@ --- * + * + * Arguments: @const type *x, *y@ = pointers to strings + * @op@ = a relational operator symbol + * @size_t n@ = length of the strings + * + * Returns: Nonzero if the relationship between the strings satisfies the + * operator @op@, otherwise zero. + * + * Use: These macros mitigate the author's frequent error of failing + * to compare the result of the underlying standard functions + * against zero, effectively reversing the sense of an intended + * test for equality. + */ + #define MEMCMP(x, op, y, n) (memcmp((x), (y), (n)) op 0) #define STRCMP(x, op, y) (strcmp((x), (y)) op 0) #define STRNCMP(x, op, y, n) (strncmp((x), (y), (n)) op 0) -/*----- Compiler diagnostics ----------------------------------------------*/ +/*----- Compiler-specific definitions -------------------------------------*/ -/* --- Compiler-specific definitions --- */ +/* The descriptions of these are given below, with the fallback + * definitions. + */ #if GCC_VERSION_P(2, 5) || CLANG_VERSION_P(3, 3) # define NORETURN __attribute__((__noreturn__)) @@ -193,40 +277,84 @@ /* --- Fallback definitions, mostly trivial --- */ -#ifndef DEPRECATED -# define DEPRECATED(msg) -#endif - -#ifndef EXECL_LIKE -# define EXECL_LIKE(ntrail) -#endif +/* --- @DISCARD@ --- * + * + * Arguments: @x@ = a function call + * + * Returns: --- + * + * Use: Explicitly discard the result of @x@. This counteracts a + * @MUST_CHECK@ attribute on the called function. + */ #ifndef DISCARD # define DISCARD(x) do if (x); while (0) #endif +/* --- @IGNORE@ --- * + * + * Arguments: @x@ = any expression + * + * Returns: --- + * + * Use: Ignore the value of @x@, overriding compiler warnings. + */ + #ifndef IGNORE # define IGNORE(x) ((void)(x)) #endif -#ifndef MUFFLE_WARNINGS_DECL -# define MUFFLE_WARNINGS_DECL(warns, body) body -#endif +/* --- @LAUNDER@ --- * + * + * Arguments: @x@ = some integer expression + * + * Returns: @x@. + * + * Use: Causes a compiler to know nothing about the value of @x@, + * even if it looks obvious, e.g., it's a constant. + */ -#ifndef MUFFLE_WARNINGS_EXPR -# define MUFFLE_WARNINGS_EXPR(warns, body) (body) +#ifndef LAUNDER +# define LAUNDER(x) (x) #endif -#ifndef MUFFLE_WARNINGS_STMT -# define MUFFLE_WARNINGS_STMT(warns, body) do { body } while (0) -#endif +/* --- @RELAX@ --- * + * + * Arguments: --- + * + * Returns: --- + * + * Use: Does nothing, but the compiler doesn't know that. + */ -#ifndef PRINTF_LIKE -# define PRINF_LIKE(fmtix, argix) +#ifndef RELAX +# define RELAX #endif -#ifndef SCANF_LIKE -# define SCANF_LIKE(fmtix, argix) +/* --- @DEPRECATED@, @NORETURN@, @IGNORABLE@, @MUST_CHECK@ --- * + * + * Use: These are (mostly) function attributes; write them among the + * declaration specifiers for a function definition or + * declaration. These may not do anything, but the intended + * behaviour is as follows. + * + * * @DEPRECATED(msg)@ -- report a warning, quoting the string + * literal @msg@, if the function is called. + * + * * @NORETURN@ -- promise that the function doesn't return to + * its caller: either it kills the process, or it performs + * some nonlocal transfer. + * + * * @IGNORABLE@ -- the item (which might be data rather than + * a function) might not be referred to, but that's OK: + * don't warn about it. + * + * @ @MUST_CHECK@ -- warn if the return value of a function is + * ignored. Use @DISCARD@ if you really don't care. + */ + +#ifndef DEPRECATED +# define DEPRECATED(msg) #endif #ifndef NORETURN @@ -241,18 +369,94 @@ # define MUST_CHECK #endif -#ifndef LAUNDER -# define LAUNDER +/* --- @PRINTF_LIKE@, @SCANF_LIKE@, @EXECL_LIKE@ --- * + * + * Arguments: @int fmtix@ = format string argument index (starting from 1) + * @int argix@ = variable format argument tail index (starting + * from 1) + * @int ntrail@ = number of arguments following terminator + * + * Use: These are function attributes. Again, they might not do + * anything at all. By intention, they give the compiler + * information about a variadic function's arguments, so that it + * can warn about misuse. + * + * * @PRINTF_LIKE@ -- the function takes a @printf@-style + * format string as argument @fmtix@ and an argument tail + * (which may be empty) beginning with argument @argix@. + * + * * @SCANF_LIKE@ -- the function takes a @scanf@-style + * format string as argument @fmtix@ and an argument tail + * (which may be empty) beginning with argument @argix@. + * + * * @EXECL_LIKE@ -- the function takes a sequence of pointer + * arguments terminated by a null pointer, followed by + * @ntrail@ further arguments. + */ + +#ifndef PRINTF_LIKE +# define PRINF_LIKE(fmtix, argix) #endif -#ifndef RELAX -# define RELAX +#ifndef SCANF_LIKE +# define SCANF_LIKE(fmtix, argix) #endif +#ifndef EXECL_LIKE +# define EXECL_LIKE(ntrail) +#endif + +/* --- @MUFFLE_WARNINGS_...@ --- * + * + * Arguments: @warns@ = a sequence of @..._WARNING@ calls (see below) + * @body@ = some program text + * + * Use: Muffle specific warnings within the program text. + * + * For @MUFFLE_WARNINGS_DECL@, the program text is a + * declaration; for @MUFFLE_WARNINGS_EXPR@, it is an expression, + * and for @MUFFLE_WARNINGS_STMT@, it is a statement. + * + * The warnings to be muffled are given as a list of + * @..._WARNING@ macros, with no separators. The list can + * list warnings from multiple different compilers: entries for + * irrelevant compilers will be ignored. + */ + +#ifndef MUFFLE_WARNINGS_DECL +# define MUFFLE_WARNINGS_DECL(warns, body) body +#endif + +#ifndef MUFFLE_WARNINGS_EXPR +# define MUFFLE_WARNINGS_EXPR(warns, body) (body) +#endif + +#ifndef MUFFLE_WARNINGS_STMT +# define MUFFLE_WARNINGS_STMT(warns, body) do { body } while (0) +#endif + +/* --- @GCC_WARNING@ --- * + * + * Arguments: @warn@ = a string literal naming a warning, with `%|-W...|%' + * prefix + * + * Use: Names a GCC warning: use within @MUFFLE_WARNINGS_...@. + * + * Note that GCC's warning suppression is very buggy. + */ + #ifndef GCC_WARNING # define GCC_WARNING(warn) #endif +/* --- @CLANG_WARNING@ --- * + * + * Arguments: @warn@ = a string literal naming a warning, with `%|-W...|%' + * prefix + * + * Use: Names a Clang warning: use within @MUFFLE_WARNINGS_...@. + */ + #ifndef CLANG_WARNING # define CLANG_WARNING(warn) #endif -- 2.11.0