X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/e492db887af6561dd33aa18e3887efaeb219fd16..89717a56084f7cac56330c8527fbaff99b15709b:/base/dispatch.c diff --git a/base/dispatch.c b/base/dispatch.c index 908a4e31..abd019f6 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -46,7 +46,10 @@ # define EFLAGS_ID (1u << 21) # define CPUID1D_SSE2 (1u << 26) # define CPUID1D_FXSR (1u << 24) +# define CPUID1C_PCLMUL (1u << 1) +# define CPUID1C_SSSE3 (1u << 9) # define CPUID1C_AESNI (1u << 25) +# define CPUID1C_AVX (1u << 28) # define CPUID1C_RDRAND (1u << 30) struct cpuid { unsigned a, b, c, d; }; @@ -198,6 +201,59 @@ static int xmm_registers_available_p(void) #endif } +/* --- @rdrand_works_p@ --- * + * + * + * Arguments: --- + * + * Returns: Nonzero if the `rdrand' instruction actually works. Assumes + * that it's already been verified to be safe to issue. + */ + +#ifdef __GNUC__ +static int rdrand(unsigned *x) +{ + int i, rc; + unsigned _t; + + i = 16; + __asm__ ("" : "=g" (_t)); + __asm__ ("0: rdrand %2; jc 1f; decl %1; jnz 0b\n" + "mov $-1, %0; jmp 9f\n" + "1: movl %2, (%3); xorl %0, %0\n" + "9:" + : "=r" (rc), "+r" (i), "+r" (_t) + : "r" (x) + : "cc"); + return (rc); +} +#endif + +static int rdrand_works_p(void) +{ + unsigned ref, x, i; + + /* Check that it doesn't always give the same answer. Try four times: this + * will fail with probability %$2^{-128}$% with a truly random generator, + * which seems fair enough. + */ + if (rdrand(&ref)) goto fail; + for (i = 0; i < 4; i++) { + if (rdrand(&x)) goto fail; + if (x != ref) goto not_stuck; + } + dispatch_debug("RDRAND always returns 0x%08x!", ref); + return (0); + +not_stuck: + dispatch_debug("RDRAND instruction looks plausible"); + return (1); + +fail: + dispatch_debug("RDRAND instruction fails too often"); + return (0); +} + #endif /*----- General feature probing using auxiliary vectors -------------------*/ @@ -281,13 +337,15 @@ static unsigned hwcaps = 0; _(ARM_NEON, "arm:neon") \ _(ARM_V4, "arm:v4") \ _(ARM_D32, "arm:d32") \ - _(ARM_AES, "arm:aes") + _(ARM_AES, "arm:aes") \ + _(ARM_PMULL, "arm:pmull") #endif #if CPUFAM_ARM64 # define WANTAUX(_) \ WANT_AT_HWCAP(_) # define CAPMAP(_) \ - _(ARM_AES, "arm:aes") + _(ARM_AES, "arm:aes") \ + _(ARM_PMULL, "arm:pmull") #endif /* Build the bitmask for `hwcaps' from the `CAPMAP' list. */ @@ -401,9 +459,13 @@ static void probe_hwcaps(void) # ifdef HWCAP2_AES if (probed.hwcap2 & HWCAP2_AES) hw |= HF_ARM_AES; # endif +# ifdef HWCAP2_PMULL + if (probed.hwcap2 & HWCAP2_PMULL) hw |= HF_ARM_PMULL; +# endif #endif #if CPUFAM_ARM64 if (probed.hwcap & HWCAP_AES) hw |= HF_ARM_AES; + if (probed.hwcap & HWCAP_PMULL) hw |= HF_ARM_PMULL; #endif /* Store the bitmask of features we probed for everyone to see. */ @@ -538,13 +600,22 @@ int cpu_feature_p(int feat) switch (feat) { #if CPUFAM_X86 || CPUFAM_AMD64 CASE_CPUFEAT(X86_SSE2, "x86:sse2", - xmm_registers_available_p() && - cpuid_features_p(CPUID1D_SSE2, 0)); + cpuid_features_p(CPUID1D_SSE2, 0) && + xmm_registers_available_p()); CASE_CPUFEAT(X86_AESNI, "x86:aesni", - xmm_registers_available_p() && - cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI)); + cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI) && + xmm_registers_available_p()); CASE_CPUFEAT(X86_RDRAND, "x86:rdrand", - cpuid_features_p(0, CPUID1C_RDRAND)); + cpuid_features_p(0, CPUID1C_RDRAND) && rdrand_works_p()); + CASE_CPUFEAT(X86_AVX, "x86:avx", + cpuid_features_p(0, CPUID1C_AVX) && + xmm_registers_available_p()); + CASE_CPUFEAT(X86_SSSE3, "x86:ssse3", + cpuid_features_p(0, CPUID1C_SSSE3) && + xmm_registers_available_p()); + CASE_CPUFEAT(X86_PCLMUL, "x86:pclmul", + cpuid_features_p(0, CPUID1C_PCLMUL) && + xmm_registers_available_p()); #endif #ifdef CAPMAP # define FEATP__CASE(feat, tok) \