configure.ac, base/dispatch.[ch]: CPU-specific implementations.
authorMark Wooding <mdw@distorted.org.uk>
Mon, 18 May 2015 22:21:02 +0000 (23:21 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Mon, 20 Jul 2015 12:54:21 +0000 (13:54 +0100)
We now have the capability for a function to have multiple CPU-specific
implementations, and to choose the most appropriate one at runtime.

The new `cpu_feature_p' function doesn't understand much in the way of
features yet, but is ready to grow later.

base/Makefile.am
base/dispatch.c [new file with mode: 0644]
base/dispatch.h [new file with mode: 0644]
configure.ac

index 35c86ff..c8608ed 100644 (file)
@@ -40,6 +40,10 @@ libbase_la_SOURCES   += arena.c
 pkginclude_HEADERS     += ct.h
 libbase_la_SOURCES     += ct.c
 
+## CPU-specific dispatch.
+pkginclude_HEADERS     += dispatch.h
+libbase_la_SOURCES     += dispatch.c
+
 ## Acceptable key-size descriptions.
 pkginclude_HEADERS     += keysz.h
 libbase_la_SOURCES     += keysz.c keysz-conv.c
diff --git a/base/dispatch.c b/base/dispatch.c
new file mode 100644 (file)
index 0000000..08c189c
--- /dev/null
@@ -0,0 +1,180 @@
+/* -*-c-*-
+ *
+ * CPU-specific dispatch
+ *
+ * (c) 2015 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "config.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <mLib/macros.h>
+
+#include "dispatch.h"
+
+/*----- Main code ---------------------------------------------------------*/
+
+#ifdef CPUFAM_X86
+
+#define EFLAGS_ID (1u << 21)
+#define CPUID1D_SSE2 (1u << 26)
+
+struct cpuid { unsigned a, b, c, d; };
+
+/* --- @cpuid@ --- *
+ *
+ * Arguments:  @struct cpuid *cc@ = where to write the result
+ *             @unsigned a, c@ = EAX and ECX registers to set
+ *
+ * Returns:    ---
+ *
+ * Use:                Minimal C wrapper around the x86 `CPUID' instruction.  Checks
+ *             that the instruction is actually available before invoking
+ *             it; fills the output structure with zero if it's not going to
+ *             work.
+ */
+
+#ifdef __GNUC__
+static __inline__ unsigned getflags(void)
+  { unsigned f; __asm__ ("pushf; popl %0" : "=g" (f)); return (f); }
+static __inline__ unsigned setflags(unsigned f)
+{
+  unsigned ff;
+  __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf"
+          : "=g" (ff)
+          : "g" (f));
+  return (ff);
+}
+#endif
+
+static void cpuid(struct cpuid *cc, unsigned a, unsigned c)
+{
+#ifdef __GNUC__
+  unsigned f;
+#endif
+
+  cc->a = cc->b = cc->c = cc->d = 0;
+
+#ifdef __GNUC__
+  /* Stupid dance to detect whether the CPUID instruction is available. */
+  f = getflags();
+  if (!(setflags(f |  EFLAGS_ID) & EFLAGS_ID)) return;
+  if (  setflags(f & ~EFLAGS_ID) & EFLAGS_ID ) return;
+  setflags(f);
+
+  /* Alas, EBX is magical in PIC code, so abuse ESI instead.  This isn't
+   * pretty, but it works.
+   */
+  __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx"
+          : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
+          : "a" (a) , "c" (c));
+#endif
+}
+
+static unsigned cpuid_maxleaf(void)
+  { struct cpuid c; cpuid(&c, 0, 0); return (c.a); }
+
+static int cpuid_features_p(unsigned dbits, unsigned cbits)
+{
+  struct cpuid c;
+  if (cpuid_maxleaf() < 1) return (0);
+  cpuid(&c, 1, 0);
+  return ((c.d & dbits) == dbits && (c.c & cbits) == cbits);
+}
+
+#endif
+
+/* --- @check_env@ --- *
+ *
+ * Arguments:  @const char *ftok@ = feature token
+ *
+ * Returns:    Zero if the feature is forced off; positive if it's forced
+ *             on; negative if the user hasn't decided.
+ *
+ * Use:                Checks the environment variable `CATACOMB_CPUFEAT' for the
+ *             feature token @ftok@.  The variable, if it exists, should be
+ *             a space-separated sequence of `+tok' and `-tok' items.  These
+ *             tokens may end in `*', which matches any suffix.
+ */
+
+static int IGNORABLE check_env(const char *ftok)
+{
+  const char *p, *q, *pp;
+  int d;
+
+  p = getenv("CATACOMB_CPUFEAT");
+  if (!p) return (-1);
+
+  for (;;) {
+    while (isspace((unsigned char)*p)) p++;
+    if (!*p) return (-1);
+    switch (*p) {
+      case '+': d = +1; p++; break;
+      case '-': d =  0; p++; break;
+      default:  d = -1;      break;
+    }
+    for (q = p; *q && !isspace((unsigned char)*q); q++);
+    if (d >= 0) {
+      for (pp = ftok; p < q && *pp && *p == *pp; p++, pp++);
+      if ((p == q && !*pp) || (*p == '*' && p + 1 == q)) return (d);
+    }
+    p = q;
+  }
+  return (-1);
+}
+
+/* --- @cpu_feature_p@ --- *
+ *
+ * Arguments:  @unsigned feat@ = a @CPUFEAT_...@ code
+ *
+ * Returns:    Nonzero if the feature is available.
+ */
+
+#include <stdio.h>
+
+int cpu_feature_p(int feat)
+{
+  int IGNORABLE f;
+  IGNORE(f);
+#define CHECK_ENV(ftok)                                                        \
+  do { if ((f = check_env(ftok)) >= 0) return (f); } while (0)
+
+  switch (feat) {
+#ifdef CPUFAM_X86
+    case CPUFEAT_X86_SSE2: {
+      CHECK_ENV("x86:sse2");
+      return (cpuid_features_p(CPUID1D_SSE2, 0));
+    }
+#endif
+    default:
+      return (0);
+  }
+#undef CHECK_ENV
+}
+
+/*----- That's all, folks -------------------------------------------------*/
diff --git a/base/dispatch.h b/base/dispatch.h
new file mode 100644 (file)
index 0000000..bcf9a13
--- /dev/null
@@ -0,0 +1,162 @@
+/* -*-c-*-
+ *
+ * CPU-specific dispatch
+ *
+ * (c) 2015 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+#ifndef CATACOMB_DISPATCH_H
+#define CATACOMB_DISPATCH_H
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <mLib/macros.h>
+
+/*----- Macros ------------------------------------------------------------*/
+
+/* --- Atomic data access machinery --- *
+ *
+ * If they're available, use GCC's `__atomic_*' intrinsics.  If that doesn't
+ * work and we're using one of a small number of processors I'm sure won't
+ * mind, then just stick with simple memory access.  Otherwise turn
+ * dispatching off, because it probably isn't thread-safe.
+ */
+
+#if GCC_VERSION_P(4, 7)
+#  define CPU_DISPATCH_P 1
+#  define DISPATCH_LOAD(g, v)                                          \
+       ((v) = __atomic_load_n(&(g), __ATOMIC_RELAXED))
+#  define DISPATCH_STORE(g, v)                                         \
+       (__atomic_store_n(&(g), (v), __ATOMIC_RELAXED))
+#elif defined(__i386__) || defined(__amd64__) ||                       \
+      defined(__arm__) || defined(__aarch64__) ||                      \
+      defined(__mips__)
+#  define CPU_DISPATCH_P 1
+#  define DISPATCH_LOAD(g, v) ((v) = (g))
+#  define DISPATCH_STORE(g, v) ((g) = (v))
+#endif
+
+/* --- A simple hack --- */
+
+#ifndef EMPTY
+#  define EMPTY
+#endif
+
+/* --- @CPU_DISPATCH@ --- *
+ *
+ * Arguments:  @stcls@ = storage class for the main @ext@ function
+ *                     (typically either @static@ or @EMPTY@)
+ *             @rtn@ = prefix for tail-calling a function of the appropriate
+ *                     type (either @(void)@ or @return@)
+ *             @ret@ = return type for the function
+ *             @ext@ = name for the main function (other named are derived
+ *                     from this)
+ *             @argdecls@ = parenthesis-enclosed list of argument types
+ *             @args@ = parenthesis-enclosed list of argument names only
+ *             @pick@ = function to select appropriate implementation
+ *             @dflt@ = fallback implementation
+ *
+ * Use:                Main machinery for CPU-specfic dispatching.
+ *
+ *             The macro defines a function
+ *
+ *                     @stcls ret ext argdcls@
+ *
+ *             The first time @ext@ is called, it will invoke @pick@ to
+ *             select and a return a pointer to an appropriate
+ *             implementation for the runtime environment.  Subsequent calls
+ *             to @ext@ will (usually) call this preferred implementation
+ *             directly.
+ *
+ *             Some target platforms may not be able to establish the
+ *             necessary function pointer in a threadsafe way.  On such
+ *             platforms, the dispatch machinery is disabled and @ext@ will
+ *             simply call @dflt@.
+ *
+ *             Some additional declarations are made.  As a convenience,
+ *             @ext__functype@ is the function type of @ext@.  Declarations
+ *             are made for @pick@ and @dflt@, as @static@ functions.
+ */
+
+#ifdef CPU_DISPATCH_P
+
+#define CPU_DISPATCH(stcls, rtn, ret, ext, argdecls, args, pick, dflt) \
+                                                                       \
+typedef ret ext##__functype argdecls;                                  \
+static ret dflt argdecls;                                              \
+static ret ext##__dispatch argdecls;                                   \
+static ext##__functype *pick(void);                                    \
+static ext##__functype *ext##__ptr = ext##__dispatch;                  \
+                                                                       \
+static ret ext##__dispatch argdecls                                    \
+{                                                                      \
+  ext##__functype *f = pick();                                         \
+  DISPATCH_STORE(ext##__ptr, f);                                       \
+  rtn f args;                                                          \
+}                                                                      \
+                                                                       \
+stcls ret ext argdecls                                                 \
+{                                                                      \
+  ext##__functype *f;                                                  \
+  DISPATCH_LOAD(ext##__ptr, f);                                                \
+  rtn f args;                                                          \
+}
+
+#else
+
+#define CPU_DISPATCH(stcls, rtn, ret, ext, argdecls, args, pick, dflt) \
+                                                                       \
+typedef ret ext##__functype argdecls;                                  \
+static ret dflt argdecls;                                              \
+static ext##__functype *pick(void) IGNORABLE;                          \
+                                                                       \
+stcls ret ext argdecls { rtn dflt args; }
+
+#endif
+
+/*----- Functions provided ------------------------------------------------*/
+
+/* --- @cpu_feature_p@ --- *
+ *
+ * Arguments:  @unsigned feat@ = a @CPUFEAT_...@ code
+ *
+ * Returns:    Nonzero if the feature is available.
+ */
+
+enum {
+  CPUFEAT_X86_SSE2                     /* Streaming SIMD Extensions 2 */
+};
+
+extern int cpu_feature_p(int /*feat*/);
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif
index 55678b3..516e3a0 100644 (file)
@@ -32,6 +32,7 @@ AC_INIT([catacomb], AUTO_VERSION, [mdw@distorted.org.uk])
 AC_CONFIG_SRCDIR([catacomb.pc.in])
 AC_CONFIG_AUX_DIR([config])
 AM_INIT_AUTOMAKE([foreign parallel-tests])
+AC_CANONICAL_HOST
 mdw_SILENT_RULES
 
 AC_PROG_CC
@@ -46,6 +47,26 @@ AC_PROG_YACC
 AC_SUBST(AM_CFLAGS)
 
 dnl--------------------------------------------------------------------------
+dnl Host-specific configuration.
+
+AC_DEFUN([catacomb_CPU_FAMILIES],
+  [$1([i[[3-6]]86], [X86])])
+
+case $host_cpu in
+  m4_define([catacomb_CPU_CASE],
+    [$1)
+      AC_DEFINE([CPUFAM_$2], [1], [Define if host CPU family is $2.])
+      cpufam=$2
+      ;;
+])
+  catacomb_CPU_FAMILIES([catacomb_CPU_CASE])
+  *) cpufam=nil ;;
+esac
+m4_define([catacomb_CPU_DEFS],
+  [AM_CONDITIONAL([CPUFAM_$2], [test x$cpufam = x$2])])
+catacomb_CPU_FAMILIES([catacomb_CPU_DEFS])
+
+dnl--------------------------------------------------------------------------
 dnl C programming environment.
 
 dnl Find out if we're cross-compiling.