# define EFLAGS_ID (1u << 21)
# define CPUID1D_SSE2 (1u << 26)
# define CPUID1D_FXSR (1u << 24)
+# define CPUID1C_PCLMUL (1u << 1)
+# define CPUID1C_SSSE3 (1u << 9)
# define CPUID1C_AESNI (1u << 25)
# define CPUID1C_AVX (1u << 28)
# define CPUID1C_RDRAND (1u << 30)
#endif
}
+/* --- @rdrand_works_p@ --- *
+ *
+ *
+ * Arguments: ---
+ *
+ * Returns: Nonzero if the `rdrand' instruction actually works. Assumes
+ * that it's already been verified to be safe to issue.
+ */
+
+#ifdef __GNUC__
+static int rdrand(unsigned *x)
+{
+ int i, rc;
+ unsigned _t;
+
+ i = 16;
+ __asm__ ("" : "=g" (_t));
+ __asm__ ("0: rdrand %2; jc 1f; decl %1; jnz 0b\n"
+ "mov $-1, %0; jmp 9f\n"
+ "1: movl %2, (%3); xorl %0, %0\n"
+ "9:"
+ : "=r" (rc), "+r" (i), "+r" (_t)
+ : "r" (x)
+ : "cc");
+ return (rc);
+}
+#endif
+
+static int rdrand_works_p(void)
+{
+ unsigned ref, x, i;
+
+ /* Check that it doesn't always give the same answer. Try four times: this
+ * will fail with probability %$2^{-128}$% with a truly random generator,
+ * which seems fair enough.
+ */
+ if (rdrand(&ref)) goto fail;
+ for (i = 0; i < 4; i++) {
+ if (rdrand(&x)) goto fail;
+ if (x != ref) goto not_stuck;
+ }
+ dispatch_debug("RDRAND always returns 0x%08x!", ref);
+ return (0);
+
+not_stuck:
+ dispatch_debug("RDRAND instruction looks plausible");
+ return (1);
+
+fail:
+ dispatch_debug("RDRAND instruction fails too often");
+ return (0);
+}
+
#endif
/*----- General feature probing using auxiliary vectors -------------------*/
_(ARM_NEON, "arm:neon") \
_(ARM_V4, "arm:v4") \
_(ARM_D32, "arm:d32") \
- _(ARM_AES, "arm:aes")
+ _(ARM_AES, "arm:aes") \
+ _(ARM_PMULL, "arm:pmull")
#endif
#if CPUFAM_ARM64
# define WANTAUX(_) \
WANT_AT_HWCAP(_)
# define CAPMAP(_) \
- _(ARM_AES, "arm:aes")
+ _(ARM_NEON, "arm:neon") \
+ _(ARM_AES, "arm:aes") \
+ _(ARM_PMULL, "arm:pmull")
#endif
/* Build the bitmask for `hwcaps' from the `CAPMAP' list. */
# ifdef HWCAP2_AES
if (probed.hwcap2 & HWCAP2_AES) hw |= HF_ARM_AES;
# endif
+# ifdef HWCAP2_PMULL
+ if (probed.hwcap2 & HWCAP2_PMULL) hw |= HF_ARM_PMULL;
+# endif
#endif
#if CPUFAM_ARM64
+ if (probed.hwcap & HWCAP_ASIMD) hw |= HF_ARM_NEON;
if (probed.hwcap & HWCAP_AES) hw |= HF_ARM_AES;
+ if (probed.hwcap & HWCAP_PMULL) hw |= HF_ARM_PMULL;
#endif
/* Store the bitmask of features we probed for everyone to see. */
switch (feat) {
#if CPUFAM_X86 || CPUFAM_AMD64
CASE_CPUFEAT(X86_SSE2, "x86:sse2",
- xmm_registers_available_p() &&
- cpuid_features_p(CPUID1D_SSE2, 0));
+ cpuid_features_p(CPUID1D_SSE2, 0) &&
+ xmm_registers_available_p());
CASE_CPUFEAT(X86_AESNI, "x86:aesni",
- xmm_registers_available_p() &&
- cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI));
+ cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI) &&
+ xmm_registers_available_p());
CASE_CPUFEAT(X86_RDRAND, "x86:rdrand",
- cpuid_features_p(0, CPUID1C_RDRAND));
+ cpuid_features_p(0, CPUID1C_RDRAND) && rdrand_works_p());
CASE_CPUFEAT(X86_AVX, "x86:avx",
- xmm_registers_available_p() &&
- cpuid_features_p(0, CPUID1C_AVX));
+ cpuid_features_p(0, CPUID1C_AVX) &&
+ xmm_registers_available_p());
+ CASE_CPUFEAT(X86_SSSE3, "x86:ssse3",
+ cpuid_features_p(0, CPUID1C_SSSE3) &&
+ xmm_registers_available_p());
+ CASE_CPUFEAT(X86_PCLMUL, "x86:pclmul",
+ cpuid_features_p(0, CPUID1C_PCLMUL) &&
+ xmm_registers_available_p());
#endif
#ifdef CAPMAP
# define FEATP__CASE(feat, tok) \