Merge branch '2.4.x' into 2.5.x
[catacomb] / base / dispatch.c
index 908a4e3..abd019f 100644 (file)
 #  define EFLAGS_ID (1u << 21)
 #  define CPUID1D_SSE2 (1u << 26)
 #  define CPUID1D_FXSR (1u << 24)
+#  define CPUID1C_PCLMUL (1u << 1)
+#  define CPUID1C_SSSE3 (1u << 9)
 #  define CPUID1C_AESNI (1u << 25)
+#  define CPUID1C_AVX (1u << 28)
 #  define CPUID1C_RDRAND (1u << 30)
 
 struct cpuid { unsigned a, b, c, d; };
@@ -198,6 +201,59 @@ static int xmm_registers_available_p(void)
 #endif
 }
 
+/* --- @rdrand_works_p@ --- *
+ *
+ *
+ * Arguments:  ---
+ *
+ * Returns:    Nonzero if the `rdrand' instruction actually works.  Assumes
+ *             that it's already been verified to be safe to issue.
+ */
+
+#ifdef __GNUC__
+static int rdrand(unsigned *x)
+{
+  int i, rc;
+  unsigned _t;
+
+  i = 16;
+  __asm__ ("" : "=g" (_t));
+  __asm__ ("0: rdrand %2; jc 1f; decl %1; jnz 0b\n"
+          "mov $-1, %0; jmp 9f\n"
+          "1: movl %2, (%3); xorl %0, %0\n"
+          "9:"
+          : "=r" (rc), "+r" (i), "+r" (_t)
+          : "r" (x)
+          : "cc");
+  return (rc);
+}
+#endif
+
+static int rdrand_works_p(void)
+{
+  unsigned ref, x, i;
+
+  /* Check that it doesn't always give the same answer.  Try four times: this
+   * will fail with probability %$2^{-128}$% with a truly random generator,
+   * which seems fair enough.
+   */
+  if (rdrand(&ref)) goto fail;
+  for (i = 0; i < 4; i++) {
+    if (rdrand(&x)) goto fail;
+    if (x != ref) goto not_stuck;
+  }
+  dispatch_debug("RDRAND always returns 0x%08x!", ref);
+  return (0);
+
+not_stuck:
+  dispatch_debug("RDRAND instruction looks plausible");
+  return (1);
+
+fail:
+  dispatch_debug("RDRAND instruction fails too often");
+  return (0);
+}
+
 #endif
 
 /*----- General feature probing using auxiliary vectors -------------------*/
@@ -281,13 +337,15 @@ static unsigned hwcaps = 0;
        _(ARM_NEON, "arm:neon")                                         \
        _(ARM_V4, "arm:v4")                                             \
        _(ARM_D32, "arm:d32")                                           \
-       _(ARM_AES, "arm:aes")
+       _(ARM_AES, "arm:aes")                                           \
+       _(ARM_PMULL, "arm:pmull")
 #endif
 #if CPUFAM_ARM64
 #  define WANTAUX(_)                                                   \
        WANT_AT_HWCAP(_)
 #  define CAPMAP(_)                                                    \
-       _(ARM_AES, "arm:aes")
+       _(ARM_AES, "arm:aes")                                           \
+       _(ARM_PMULL, "arm:pmull")
 #endif
 
 /* Build the bitmask for `hwcaps' from the `CAPMAP' list. */
@@ -401,9 +459,13 @@ static void probe_hwcaps(void)
 #  ifdef HWCAP2_AES
   if (probed.hwcap2 & HWCAP2_AES) hw |= HF_ARM_AES;
 #  endif
+#  ifdef HWCAP2_PMULL
+  if (probed.hwcap2 & HWCAP2_PMULL) hw |= HF_ARM_PMULL;
+#  endif
 #endif
 #if CPUFAM_ARM64
   if (probed.hwcap & HWCAP_AES) hw |= HF_ARM_AES;
+  if (probed.hwcap & HWCAP_PMULL) hw |= HF_ARM_PMULL;
 #endif
 
   /* Store the bitmask of features we probed for everyone to see. */
@@ -538,13 +600,22 @@ int cpu_feature_p(int feat)
   switch (feat) {
 #if CPUFAM_X86 || CPUFAM_AMD64
     CASE_CPUFEAT(X86_SSE2, "x86:sse2",
-                xmm_registers_available_p() &&
-                cpuid_features_p(CPUID1D_SSE2, 0));
+                cpuid_features_p(CPUID1D_SSE2, 0) &&
+                xmm_registers_available_p());
     CASE_CPUFEAT(X86_AESNI, "x86:aesni",
-                xmm_registers_available_p() &&
-                cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI));
+                cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI) &&
+                xmm_registers_available_p());
     CASE_CPUFEAT(X86_RDRAND, "x86:rdrand",
-                cpuid_features_p(0, CPUID1C_RDRAND));
+                cpuid_features_p(0, CPUID1C_RDRAND) && rdrand_works_p());
+    CASE_CPUFEAT(X86_AVX, "x86:avx",
+                cpuid_features_p(0, CPUID1C_AVX) &&
+                xmm_registers_available_p());
+    CASE_CPUFEAT(X86_SSSE3, "x86:ssse3",
+                cpuid_features_p(0, CPUID1C_SSSE3) &&
+                xmm_registers_available_p());
+    CASE_CPUFEAT(X86_PCLMUL, "x86:pclmul",
+                cpuid_features_p(0, CPUID1C_PCLMUL) &&
+                xmm_registers_available_p());
 #endif
 #ifdef CAPMAP
 #  define FEATP__CASE(feat, tok)                                       \