#include <sys/time.h>
#include <unistd.h>
+#ifdef HAVE_LINUX_PERF_EVENT_H
+# include <linux/perf_event.h>
+# include <asm/unistd.h>
+#endif
+
#include <mLib/alloc.h>
+#include <mLib/bits.h>
#include <mLib/dstr.h>
#include <mLib/mdwopt.h>
#include <mLib/quis.h>
typedef struct opts {
const char *name; /* Pre-configured named thing */
+ const char *opwhat; /* What to call operations */
unsigned fbits; /* Field size bits */
unsigned gbits; /* Group size bits */
unsigned n; /* Number of factors */
unsigned i; /* Number of intervals (or zero) */
unsigned k; /* Main loop batch size */
+ unsigned long sc; /* Scale factor */
double t; /* Time for each interval (secs) */
mp *e; /* Public exponent */
unsigned f; /* Flags */
xfree(k);
c->sz = o->gbits ? o->gbits : 65536;
c->n = o->n ? o->n : 16;
+ o->opwhat = "byte"; o->sc = c->n*c->sz;
c->m = xmalloc(c->sz);
return (c);
}
die(1, "hash function `%s' not known", o->name);
c->sz = o->gbits ? o->gbits : 65536;
c->n = o->n ? o->n : 16;
+ o->opwhat = "byte"; o->sc = c->n*c->sz;
c->m = xmalloc(c->sz);
return (c);
}
rand_get(RAND_GLOBAL, c->s, sizeof(c->s));
c->sz = o->gbits ? o->gbits : 65536;
c->n = o->n ? o->n : 16;
+ o->opwhat = "byte"; o->sc = c->n*c->sz;
c->m = xmalloc(c->sz);
return (c);
}
{ 0, 0, 0 }
};
+/*----- Cycle counting ----------------------------------------------------*/
+
+typedef kludge64 cycles;
+static int cyclecount_active_p = 0;
+
+#if defined(__GNUC__) && (CPUFAM_X86 || CPUFAM_AMD64)
+
+static void init_cyclecount(void) { cyclecount_active_p = 1; }
+
+static cycles cyclecount(void)
+{
+ uint32 lo, hi;
+ kludge64 cy;
+
+ __asm__("rdtsc" : "=a"(lo), "=d"(hi));
+ SET64(cy, hi, lo);
+ return cy;
+}
+
+#elif defined(HAVE_LINUX_PERF_EVENT_H) && defined(HAVE_UINT64)
+
+static int perf_fd = -1;
+
+static void init_cyclecount(void)
+{
+ struct perf_event_attr attr = { 0 };
+
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.size = sizeof(attr);
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.disabled = 0;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+
+ if ((perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0)) < 0)
+ moan("failed to open perf event: %s", strerror(errno));
+ else
+ cyclecount_active_p = 1;
+}
+
+static cycles cyclecount(void)
+{
+ kludge64 cy;
+ ssize_t n;
+
+ if (!cyclecount_active_p)
+ goto fail;
+ else if ((n = read(perf_fd, &cy.i, sizeof(cy.i))) != sizeof(cy.i)) {
+ if (n < 0) moan("error reading perf event: %s", strerror(errno));
+ else moan("unexpected short read from perf event");
+ cyclecount_active_p = 0; close(perf_fd); perf_fd = -1;
+ goto fail;
+ }
+end:
+ return (cy);
+fail:
+ SET64(cy, 0, 0);
+ goto end;
+}
+
+#else
+
+static void init_cyclecount(void) { cyclecount_active_p = 0; }
+static cycles cyclecount(void) { kludge64 cy; SET64(cy, 0, 0); return (cy); }
+
+#endif
+
/*----- Main code ---------------------------------------------------------*/
void version(FILE *fp)
opts o = { 0 };
const jobops *j;
struct timeval tv_next, tv_now;
- double t, ttot;
+ double t, ttot, cy, cytot;
unsigned n, k;
unsigned long ii;
clock_t c0, c1;
+ kludge64 cy0, cy1, cydiff;
double itot;
void *p;
ego(argv[0]);
- o.t = 1;
+ o.t = 1; o.k = 1; o.sc = 1; o.opwhat = "op";
for (;;) {
static const struct option opts[] = {
{ "help", 0, 0, 'h' },
p = j->init(&o);
n = 0;
- ttot = itot = 0;
+ ttot = itot = 0; cytot = 0; init_cyclecount();
gettimeofday(&tv_now, 0);
do {
tv_addl(&tv_next, &tv_now, o.t, fmod(o.t * MILLION, MILLION));
ii = 0;
- c0 = clock();
+ c0 = clock(); cy0 = cyclecount();
do {
for (k = 0; k < o.k; k++) { j->run(p); }
ii += k;
gettimeofday(&tv_now, 0);
} while (TV_CMP(&tv_now, <, &tv_next));
- c1 = clock();
- printf("%5u: did = %5lu; /sec = %5f; avg /sec = %5f\n",
+ cy1 = cyclecount(); c1 = clock();
t = (double)(c1 - c0)/CLOCKS_PER_SEC;
itot += ii; ttot += t;
+ printf("%5u: did = %5lu; /sec = %5f; avg /sec = %5f",
n, ii, ii/t, itot/ttot);
+ if (cyclecount_active_p) {
+ SUB64(cydiff, cy1, cy0); cy = LO64(cydiff) + ldexp(HI64(cydiff), 32);
+ cytot += cy;
+ printf(" (cy/%s = %3f; avg cy/%s = %3f)",
+ o.opwhat, cy/ii/o.sc, o.opwhat, cytot/itot/o.sc);
+ }
+ putchar('\n');
fflush(stdout);
n++;
} while (!o.i || n < o.i);