Add a pile of debug output around the CPU dispatching machinery.
authorMark Wooding <mdw@distorted.org.uk>
Wed, 18 May 2016 09:29:03 +0000 (10:29 +0100)
committerMark Wooding <mdw@distorted.org.uk>
Wed, 18 May 2016 19:12:23 +0000 (20:12 +0100)
Report on finding things in the environment, progress on runtime probes,
and the decisions about which implementations we pick.  Decision-making
isn't time-critical, so this is left in permanently.

base/dispatch.c
base/dispatch.h
symm/chacha.c
symm/rijndael-base.c
symm/rijndael.c
symm/salsa20.c

index 4b5e17a..eedf017 100644 (file)
@@ -30,6 +30,8 @@
 #include "config.h"
 
 #include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -85,8 +87,11 @@ static void cpuid(struct cpuid *cc, unsigned a, unsigned c)
 #ifdef __GNUC__
   /* Stupid dance to detect whether the CPUID instruction is available. */
   f = getflags();
-  if (!(setflags(f |  EFLAGS_ID) & EFLAGS_ID)) return;
-  if (  setflags(f & ~EFLAGS_ID) & EFLAGS_ID ) return;
+  if (!(setflags(f |  EFLAGS_ID) & EFLAGS_ID) ||
+       setflags(f & ~EFLAGS_ID) & EFLAGS_ID) {
+    dispatch_debug("CPUID instruction not available");
+    return;
+  }
   setflags(f);
 
   /* Alas, EBX is magical in PIC code, so abuse ESI instead.  This isn't
@@ -95,6 +100,8 @@ static void cpuid(struct cpuid *cc, unsigned a, unsigned c)
   __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx"
           : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
           : "a" (a) , "c" (c));
+#else
+  dispatch_debug("GNU inline assembler not available; can't CPUID");
 #endif
 }
 
@@ -146,6 +153,7 @@ static int xmm_registers_available_p(void)
           : "%ecx", "%edx");
   return (f);
 #else
+  dispatch_debug("GNU inline assembler not available; can't check for XMM");
   return (0);
 #endif
 }
@@ -154,6 +162,31 @@ static int xmm_registers_available_p(void)
 
 /*----- External interface ------------------------------------------------*/
 
+/* --- @dispatch_debug@ --- *
+ *
+ * Arguments:  @const char *fmt@ = a format string
+ *             @...@ = additional arguments
+ *
+ * Returns:    ---
+ *
+ * Use:                Writes a formatted message to standard output if dispatch
+ *             debugging is enabled.
+ */
+
+void dispatch_debug(const char *fmt, ...)
+{
+  va_list ap;
+  const char *e = getenv("CATACOMB_CPUDISPATCH_DEBUG");
+
+  if (e && *e != 'n' && *e != '0') {
+    va_start(ap, fmt);
+    fputs("Catacomb CPUDISPATCH: ", stderr);
+    vfprintf(stderr, fmt, ap);
+    fputc('\n', stderr);
+    va_end(ap);
+  }
+}
+
 /* --- @check_env@ --- *
  *
  * Arguments:  @const char *ftok@ = feature token
@@ -202,30 +235,41 @@ static int IGNORABLE check_env(const char *ftok)
 
 #include <stdio.h>
 
+static int IGNORABLE
+  feat_debug(const char *ftok, const char *check, int verdict)
+{
+  if (verdict >= 0) {
+    dispatch_debug("feature `%s': %s -> %s", ftok, check,
+                  verdict ? "available" : "absent");
+  }
+  return (verdict);
+}
+
 int cpu_feature_p(int feat)
 {
   int IGNORABLE f;
   IGNORE(f);
-#define CHECK_ENV(ftok)                                                        \
-  do { if ((f = check_env(ftok)) >= 0) return (f); } while (0)
+#define CASE_CPUFEAT(feat, ftok, cond) case CPUFEAT_##feat:            \
+  if ((f = feat_debug(ftok, "environment override",                    \
+                     check_env(ftok))) >= 0)                           \
+    return (f);                                                                \
+  else                                                                 \
+    return (feat_debug(ftok, "runtime probe", cond));
 
   switch (feat) {
 #ifdef CPUFAM_X86
-    case CPUFEAT_X86_SSE2: {
-      CHECK_ENV("x86:sse2");
-      return (xmm_registers_available_p() &&
-             cpuid_features_p(CPUID1D_SSE2, 0));
-    }
-    case CPUFEAT_X86_AESNI: {
-      check_env("x86:aesni");
-      return (xmm_registers_available_p() &&
-             cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI));
-    }
+    CASE_CPUFEAT(X86_SSE2, "x86:sse2",
+                xmm_registers_available_p() &&
+                cpuid_features_p(CPUID1D_SSE2, 0));
+    CASE_CPUFEAT(X86_AESNI, "x86:aesni",
+                xmm_registers_available_p() &&
+                cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI));
 #endif
     default:
+      dispatch_debug("denying unknown feature %d", feat);
       return (0);
   }
-#undef CHECK_ENV
+#undef CASE_CPUFEAT
 }
 
 /*----- That's all, folks -------------------------------------------------*/
index 612cfcd..bbb81f3 100644 (file)
@@ -138,8 +138,34 @@ stcls ret ext argdecls { rtn dflt args; }
 
 #endif
 
+/* --- Some macros for producing useful debugging --- */
+
+#define DISPATCH_PICK_COND(what, func, cond) do {                      \
+  if (cond) {                                                          \
+    dispatch_debug("picked `%s' for `%s'", #func, #what);              \
+    return (func);                                                     \
+  }                                                                    \
+} while (0)
+#define DISPATCH_PICK_FALLBACK(what, func) do {                                \
+  dispatch_debug("using default `%s'", #what);                         \
+  return (func);                                                       \
+} while (0)
+
 /*----- Functions provided ------------------------------------------------*/
 
+/* --- @dispatch_debug@ --- *
+ *
+ * Arguments:  @const char *fmt@ = a format string
+ *             @...@ = additional arguments
+ *
+ * Returns:    ---
+ *
+ * Use:                Writes a formatted message to standard output if dispatch
+ *             debugging is enabled.
+ */
+
+extern void dispatch_debug(const char */*fmt*/, ...);
+
 /* --- @cpu_feature_p@ --- *
  *
  * Arguments:  @unsigned feat@ = a @CPUFEAT_...@ code
index 8fe50e1..5683c8e 100644 (file)
@@ -79,9 +79,10 @@ extern core__functype chacha_core_x86_sse2;
 static core__functype *pick_core(void)
 {
 #ifdef CPUFAM_X86
-  if (cpu_feature_p(CPUFEAT_X86_SSE2)) return chacha_core_x86_sse2;
+  DISPATCH_PICK_COND(chacha_core, chacha_core_x86_sse2,
+                    cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif
-  return simple_core;
+  DISPATCH_PICK_FALLBACK(chacha_core, simple_core);
 }
 
 /* --- @populate@ --- *
index 6e59130..3d2bb8e 100644 (file)
@@ -123,9 +123,10 @@ extern setup__functype rijndael_setup_x86_aesni;
 static setup__functype *pick_setup(void)
 {
 #ifdef CPUFAM_X86
-  if (cpu_feature_p(CPUFEAT_X86_AESNI)) return rijndael_setup_x86_aesni;
+  DISPATCH_PICK_COND(rijndael_setup, rijndael_setup_x86_aesni,
+                    cpu_feature_p(CPUFEAT_X86_AESNI));
 #endif
-  return simple_setup;
+  DISPATCH_PICK_FALLBACK(rijndael_setup, simple_setup);
 }
 
 void rijndael_setup(rijndael_ctx *k, unsigned nb, const void *buf, size_t sz)
index 9ee8aa2..dcb35e6 100644 (file)
@@ -90,17 +90,19 @@ extern rijndael_dblk__functype rijndael_dblk_x86_aesni;
 static rijndael_eblk__functype *pick_eblk(void)
 {
 #ifdef CPUFAM_X86
-  if (cpu_feature_p(CPUFEAT_X86_AESNI)) return rijndael_eblk_x86_aesni;
+  DISPATCH_PICK_COND(rijndael_eblk, rijndael_eblk_x86_aesni,
+                    cpu_feature_p(CPUFEAT_X86_AESNI));
 #endif
-  return simple_eblk;
+  DISPATCH_PICK_FALLBACK(rijndael_eblk, simple_eblk);
 }
 
 static rijndael_dblk__functype *pick_dblk(void)
 {
 #ifdef CPUFAM_X86
-  if (cpu_feature_p(CPUFEAT_X86_AESNI)) return rijndael_dblk_x86_aesni;
+  DISPATCH_PICK_COND(rijndael_dblk, rijndael_dblk_x86_aesni,
+                    cpu_feature_p(CPUFEAT_X86_AESNI));
 #endif
-  return simple_dblk;
+  DISPATCH_PICK_FALLBACK(rijndael_dblk, simple_dblk);
 }
 
 #define DO(what, t, aa, bb, cc, dd, a, b, c, d, w) do {                        \
index d3fb69a..15e4d50 100644 (file)
@@ -59,9 +59,10 @@ extern core__functype salsa20_core_x86_sse2;
 static core__functype *pick_core(void)
 {
 #ifdef CPUFAM_X86
-  if (cpu_feature_p(CPUFEAT_X86_SSE2)) return salsa20_core_x86_sse2;
+  DISPATCH_PICK_COND(salsa20_core, salsa20_core_x86_sse2,
+                    cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif
-  return simple_core;
+  DISPATCH_PICK_FALLBACK(salsa20_core, simple_core);
 }
 
 /* --- @populate@ --- *