From 08e2be29746248898204da1ff23e0629ec29b644 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 18 May 2015 23:21:02 +0100 Subject: [PATCH] configure.ac, base/dispatch.[ch]: CPU-specific implementations. We now have the capability for a function to have multiple CPU-specific implementations, and to choose the most appropriate one at runtime. The new `cpu_feature_p' function doesn't understand much in the way of features yet, but is ready to grow later. --- base/Makefile.am | 4 ++ base/dispatch.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ base/dispatch.h | 162 +++++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 21 +++++++ 4 files changed, 367 insertions(+) create mode 100644 base/dispatch.c create mode 100644 base/dispatch.h diff --git a/base/Makefile.am b/base/Makefile.am index 35c86ff5..c8608ed4 100644 --- a/base/Makefile.am +++ b/base/Makefile.am @@ -40,6 +40,10 @@ libbase_la_SOURCES += arena.c pkginclude_HEADERS += ct.h libbase_la_SOURCES += ct.c +## CPU-specific dispatch. +pkginclude_HEADERS += dispatch.h +libbase_la_SOURCES += dispatch.c + ## Acceptable key-size descriptions. pkginclude_HEADERS += keysz.h libbase_la_SOURCES += keysz.c keysz-conv.c diff --git a/base/dispatch.c b/base/dispatch.c new file mode 100644 index 00000000..08c189cd --- /dev/null +++ b/base/dispatch.c @@ -0,0 +1,180 @@ +/* -*-c-*- + * + * CPU-specific dispatch + * + * (c) 2015 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include "config.h" + +#include +#include +#include + +#include + +#include "dispatch.h" + +/*----- Main code ---------------------------------------------------------*/ + +#ifdef CPUFAM_X86 + +#define EFLAGS_ID (1u << 21) +#define CPUID1D_SSE2 (1u << 26) + +struct cpuid { unsigned a, b, c, d; }; + +/* --- @cpuid@ --- * + * + * Arguments: @struct cpuid *cc@ = where to write the result + * @unsigned a, c@ = EAX and ECX registers to set + * + * Returns: --- + * + * Use: Minimal C wrapper around the x86 `CPUID' instruction. Checks + * that the instruction is actually available before invoking + * it; fills the output structure with zero if it's not going to + * work. + */ + +#ifdef __GNUC__ +static __inline__ unsigned getflags(void) + { unsigned f; __asm__ ("pushf; popl %0" : "=g" (f)); return (f); } +static __inline__ unsigned setflags(unsigned f) +{ + unsigned ff; + __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf" + : "=g" (ff) + : "g" (f)); + return (ff); +} +#endif + +static void cpuid(struct cpuid *cc, unsigned a, unsigned c) +{ +#ifdef __GNUC__ + unsigned f; +#endif + + cc->a = cc->b = cc->c = cc->d = 0; + +#ifdef __GNUC__ + /* Stupid dance to detect whether the CPUID instruction is available. */ + f = getflags(); + if (!(setflags(f | EFLAGS_ID) & EFLAGS_ID)) return; + if ( setflags(f & ~EFLAGS_ID) & EFLAGS_ID ) return; + setflags(f); + + /* Alas, EBX is magical in PIC code, so abuse ESI instead. This isn't + * pretty, but it works. + */ + __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx" + : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d) + : "a" (a) , "c" (c)); +#endif +} + +static unsigned cpuid_maxleaf(void) + { struct cpuid c; cpuid(&c, 0, 0); return (c.a); } + +static int cpuid_features_p(unsigned dbits, unsigned cbits) +{ + struct cpuid c; + if (cpuid_maxleaf() < 1) return (0); + cpuid(&c, 1, 0); + return ((c.d & dbits) == dbits && (c.c & cbits) == cbits); +} + +#endif + +/* --- @check_env@ --- * + * + * Arguments: @const char *ftok@ = feature token + * + * Returns: Zero if the feature is forced off; positive if it's forced + * on; negative if the user hasn't decided. + * + * Use: Checks the environment variable `CATACOMB_CPUFEAT' for the + * feature token @ftok@. The variable, if it exists, should be + * a space-separated sequence of `+tok' and `-tok' items. These + * tokens may end in `*', which matches any suffix. + */ + +static int IGNORABLE check_env(const char *ftok) +{ + const char *p, *q, *pp; + int d; + + p = getenv("CATACOMB_CPUFEAT"); + if (!p) return (-1); + + for (;;) { + while (isspace((unsigned char)*p)) p++; + if (!*p) return (-1); + switch (*p) { + case '+': d = +1; p++; break; + case '-': d = 0; p++; break; + default: d = -1; break; + } + for (q = p; *q && !isspace((unsigned char)*q); q++); + if (d >= 0) { + for (pp = ftok; p < q && *pp && *p == *pp; p++, pp++); + if ((p == q && !*pp) || (*p == '*' && p + 1 == q)) return (d); + } + p = q; + } + return (-1); +} + +/* --- @cpu_feature_p@ --- * + * + * Arguments: @unsigned feat@ = a @CPUFEAT_...@ code + * + * Returns: Nonzero if the feature is available. + */ + +#include + +int cpu_feature_p(int feat) +{ + int IGNORABLE f; + IGNORE(f); +#define CHECK_ENV(ftok) \ + do { if ((f = check_env(ftok)) >= 0) return (f); } while (0) + + switch (feat) { +#ifdef CPUFAM_X86 + case CPUFEAT_X86_SSE2: { + CHECK_ENV("x86:sse2"); + return (cpuid_features_p(CPUID1D_SSE2, 0)); + } +#endif + default: + return (0); + } +#undef CHECK_ENV +} + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/base/dispatch.h b/base/dispatch.h new file mode 100644 index 00000000..bcf9a13d --- /dev/null +++ b/base/dispatch.h @@ -0,0 +1,162 @@ +/* -*-c-*- + * + * CPU-specific dispatch + * + * (c) 2015 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +#ifndef CATACOMB_DISPATCH_H +#define CATACOMB_DISPATCH_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Header files ------------------------------------------------------*/ + +#include + +/*----- Macros ------------------------------------------------------------*/ + +/* --- Atomic data access machinery --- * + * + * If they're available, use GCC's `__atomic_*' intrinsics. If that doesn't + * work and we're using one of a small number of processors I'm sure won't + * mind, then just stick with simple memory access. Otherwise turn + * dispatching off, because it probably isn't thread-safe. + */ + +#if GCC_VERSION_P(4, 7) +# define CPU_DISPATCH_P 1 +# define DISPATCH_LOAD(g, v) \ + ((v) = __atomic_load_n(&(g), __ATOMIC_RELAXED)) +# define DISPATCH_STORE(g, v) \ + (__atomic_store_n(&(g), (v), __ATOMIC_RELAXED)) +#elif defined(__i386__) || defined(__amd64__) || \ + defined(__arm__) || defined(__aarch64__) || \ + defined(__mips__) +# define CPU_DISPATCH_P 1 +# define DISPATCH_LOAD(g, v) ((v) = (g)) +# define DISPATCH_STORE(g, v) ((g) = (v)) +#endif + +/* --- A simple hack --- */ + +#ifndef EMPTY +# define EMPTY +#endif + +/* --- @CPU_DISPATCH@ --- * + * + * Arguments: @stcls@ = storage class for the main @ext@ function + * (typically either @static@ or @EMPTY@) + * @rtn@ = prefix for tail-calling a function of the appropriate + * type (either @(void)@ or @return@) + * @ret@ = return type for the function + * @ext@ = name for the main function (other named are derived + * from this) + * @argdecls@ = parenthesis-enclosed list of argument types + * @args@ = parenthesis-enclosed list of argument names only + * @pick@ = function to select appropriate implementation + * @dflt@ = fallback implementation + * + * Use: Main machinery for CPU-specfic dispatching. + * + * The macro defines a function + * + * @stcls ret ext argdcls@ + * + * The first time @ext@ is called, it will invoke @pick@ to + * select and a return a pointer to an appropriate + * implementation for the runtime environment. Subsequent calls + * to @ext@ will (usually) call this preferred implementation + * directly. + * + * Some target platforms may not be able to establish the + * necessary function pointer in a threadsafe way. On such + * platforms, the dispatch machinery is disabled and @ext@ will + * simply call @dflt@. + * + * Some additional declarations are made. As a convenience, + * @ext__functype@ is the function type of @ext@. Declarations + * are made for @pick@ and @dflt@, as @static@ functions. + */ + +#ifdef CPU_DISPATCH_P + +#define CPU_DISPATCH(stcls, rtn, ret, ext, argdecls, args, pick, dflt) \ + \ +typedef ret ext##__functype argdecls; \ +static ret dflt argdecls; \ +static ret ext##__dispatch argdecls; \ +static ext##__functype *pick(void); \ +static ext##__functype *ext##__ptr = ext##__dispatch; \ + \ +static ret ext##__dispatch argdecls \ +{ \ + ext##__functype *f = pick(); \ + DISPATCH_STORE(ext##__ptr, f); \ + rtn f args; \ +} \ + \ +stcls ret ext argdecls \ +{ \ + ext##__functype *f; \ + DISPATCH_LOAD(ext##__ptr, f); \ + rtn f args; \ +} + +#else + +#define CPU_DISPATCH(stcls, rtn, ret, ext, argdecls, args, pick, dflt) \ + \ +typedef ret ext##__functype argdecls; \ +static ret dflt argdecls; \ +static ext##__functype *pick(void) IGNORABLE; \ + \ +stcls ret ext argdecls { rtn dflt args; } + +#endif + +/*----- Functions provided ------------------------------------------------*/ + +/* --- @cpu_feature_p@ --- * + * + * Arguments: @unsigned feat@ = a @CPUFEAT_...@ code + * + * Returns: Nonzero if the feature is available. + */ + +enum { + CPUFEAT_X86_SSE2 /* Streaming SIMD Extensions 2 */ +}; + +extern int cpu_feature_p(int /*feat*/); + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/configure.ac b/configure.ac index 55678b31..516e3a05 100644 --- a/configure.ac +++ b/configure.ac @@ -32,6 +32,7 @@ AC_INIT([catacomb], AUTO_VERSION, [mdw@distorted.org.uk]) AC_CONFIG_SRCDIR([catacomb.pc.in]) AC_CONFIG_AUX_DIR([config]) AM_INIT_AUTOMAKE([foreign parallel-tests]) +AC_CANONICAL_HOST mdw_SILENT_RULES AC_PROG_CC @@ -46,6 +47,26 @@ AC_PROG_YACC AC_SUBST(AM_CFLAGS) dnl-------------------------------------------------------------------------- +dnl Host-specific configuration. + +AC_DEFUN([catacomb_CPU_FAMILIES], + [$1([i[[3-6]]86], [X86])]) + +case $host_cpu in + m4_define([catacomb_CPU_CASE], + [$1) + AC_DEFINE([CPUFAM_$2], [1], [Define if host CPU family is $2.]) + cpufam=$2 + ;; +]) + catacomb_CPU_FAMILIES([catacomb_CPU_CASE]) + *) cpufam=nil ;; +esac +m4_define([catacomb_CPU_DEFS], + [AM_CONDITIONAL([CPUFAM_$2], [test x$cpufam = x$2])]) +catacomb_CPU_FAMILIES([catacomb_CPU_DEFS]) + +dnl-------------------------------------------------------------------------- dnl C programming environment. dnl Find out if we're cross-compiling. -- 2.11.0