X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/61bd904b61ef893246791746517ef7a38ed732db..89717a56084f7cac56330c8527fbaff99b15709b:/base/dispatch.c diff --git a/base/dispatch.c b/base/dispatch.c index 1b0ab2b1..abd019f6 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -46,7 +46,11 @@ # define EFLAGS_ID (1u << 21) # define CPUID1D_SSE2 (1u << 26) # define CPUID1D_FXSR (1u << 24) +# define CPUID1C_PCLMUL (1u << 1) +# define CPUID1C_SSSE3 (1u << 9) # define CPUID1C_AESNI (1u << 25) +# define CPUID1C_AVX (1u << 28) +# define CPUID1C_RDRAND (1u << 30) struct cpuid { unsigned a, b, c, d; }; @@ -71,8 +75,8 @@ static __inline__ unsigned setflags(unsigned f) { unsigned ff; __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf" - : "=g" (ff) - : "g" (f)); + : "=r" (ff) + : "r" (f)); return (ff); } # else @@ -82,8 +86,8 @@ static __inline__ unsigned long long setflags(unsigned long f) { unsigned long ff; __asm__ ("pushf; pushq %1; popf; pushf; popq %0; popf" - : "=g" (ff) - : "g" (f)); + : "=r" (ff) + : "r" (f)); return (ff); } # endif @@ -197,6 +201,59 @@ static int xmm_registers_available_p(void) #endif } +/* --- @rdrand_works_p@ --- * + * + * + * Arguments: --- + * + * Returns: Nonzero if the `rdrand' instruction actually works. Assumes + * that it's already been verified to be safe to issue. + */ + +#ifdef __GNUC__ +static int rdrand(unsigned *x) +{ + int i, rc; + unsigned _t; + + i = 16; + __asm__ ("" : "=g" (_t)); + __asm__ ("0: rdrand %2; jc 1f; decl %1; jnz 0b\n" + "mov $-1, %0; jmp 9f\n" + "1: movl %2, (%3); xorl %0, %0\n" + "9:" + : "=r" (rc), "+r" (i), "+r" (_t) + : "r" (x) + : "cc"); + return (rc); +} +#endif + +static int rdrand_works_p(void) +{ + unsigned ref, x, i; + + /* Check that it doesn't always give the same answer. Try four times: this + * will fail with probability %$2^{-128}$% with a truly random generator, + * which seems fair enough. + */ + if (rdrand(&ref)) goto fail; + for (i = 0; i < 4; i++) { + if (rdrand(&x)) goto fail; + if (x != ref) goto not_stuck; + } + dispatch_debug("RDRAND always returns 0x%08x!", ref); + return (0); + +not_stuck: + dispatch_debug("RDRAND instruction looks plausible"); + return (1); + +fail: + dispatch_debug("RDRAND instruction fails too often"); + return (0); +} + #endif /*----- General feature probing using auxiliary vectors -------------------*/ @@ -204,13 +261,12 @@ static int xmm_registers_available_p(void) /* Try to find the system's definitions for auxiliary vector entries. */ #ifdef HAVE_SYS_AUXV_H # include -#else -# ifdef HAVE_LINUX_AUXVEC_H -# include -# endif -# ifdef HAVE_ASM_HWCAP_H -# include -# endif +#endif +#ifdef HAVE_LINUX_AUXVEC_H +# include +#endif +#ifdef HAVE_ASM_HWCAP_H +# include #endif /* The type of entries in the auxiliary vector. I'm assuming that `unsigned @@ -229,6 +285,16 @@ struct auxentry { unsigned long type; union auxval value; }; # define WANT_AT_HWCAP(_) _(AT_HWCAP, u, hwcap) #endif +#if defined(AT_HWCAP) && CPUFAM_ARM64 +# define WANT_ANY 1 +# define WANT_AT_HWCAP(_) _(AT_HWCAP, u, hwcap) +#endif + +#if defined(AT_HWCAP2) && CPUFAM_ARMEL +# define WANT_ANY 1 +# define WANT_AT_HWCAP2(_) _(AT_HWCAP2, u, hwcap2) +#endif + /* If we couldn't find any interesting entries then we can switch all of this * machinery off. Also do that if we have no means for atomic updates. */ @@ -246,6 +312,9 @@ static unsigned hwcaps = 0; #ifndef WANT_AT_HWCAP # define WANT_AT_HWCAP(_) #endif +#ifndef WANT_AT_HWCAP2 +# define WANT_AT_HWCAP2(_) +#endif /* For each CPU family, define two lists. * @@ -261,12 +330,22 @@ static unsigned hwcaps = 0; */ #if CPUFAM_ARMEL # define WANTAUX(_) \ - WANT_AT_HWCAP(_) + WANT_AT_HWCAP(_) \ + WANT_AT_HWCAP2(_) # define CAPMAP(_) \ _(ARM_VFP, "arm:vfp") \ _(ARM_NEON, "arm:neon") \ _(ARM_V4, "arm:v4") \ - _(ARM_D32, "arm:d32") + _(ARM_D32, "arm:d32") \ + _(ARM_AES, "arm:aes") \ + _(ARM_PMULL, "arm:pmull") +#endif +#if CPUFAM_ARM64 +# define WANTAUX(_) \ + WANT_AT_HWCAP(_) +# define CAPMAP(_) \ + _(ARM_AES, "arm:aes") \ + _(ARM_PMULL, "arm:pmull") #endif /* Build the bitmask for `hwcaps' from the `CAPMAP' list. */ @@ -319,8 +398,8 @@ static void probe_hwcaps(void) /* Shiny new libc lets us request individual entry types. This is almost * too easy. */ -# define CAP__GET(type, slot, ubranch) \ - probed.slot.ubranch = (AUXUTYPE_##ubranch)getauxval(type); +# define CAP__GET(type, ubranch, slot) \ + probed.slot = (AUXUTYPE_##ubranch)getauxval(type); WANTAUX(CAP__GET) #else /* Otherwise we're a bit stuck, really. Modern Linux kernels make a copy @@ -359,6 +438,7 @@ static void probe_hwcaps(void) #define CAP__SWITCH(type, ubranch, slot) \ case type: probed.slot = a->value.ubranch; break; WANTAUX(CAP__SWITCH) + case AT_NULL: goto clean; } } @@ -376,6 +456,16 @@ static void probe_hwcaps(void) if (probed.hwcap & HWCAP_NEON) hw |= HF_ARM_NEON; if (probed.hwcap & HWCAP_VFPD32) hw |= HF_ARM_D32; if (probed.hwcap & HWCAP_VFPv4) hw |= HF_ARM_V4; +# ifdef HWCAP2_AES + if (probed.hwcap2 & HWCAP2_AES) hw |= HF_ARM_AES; +# endif +# ifdef HWCAP2_PMULL + if (probed.hwcap2 & HWCAP2_PMULL) hw |= HF_ARM_PMULL; +# endif +#endif +#if CPUFAM_ARM64 + if (probed.hwcap & HWCAP_AES) hw |= HF_ARM_AES; + if (probed.hwcap & HWCAP_PMULL) hw |= HF_ARM_PMULL; #endif /* Store the bitmask of features we probed for everyone to see. */ @@ -510,15 +600,26 @@ int cpu_feature_p(int feat) switch (feat) { #if CPUFAM_X86 || CPUFAM_AMD64 CASE_CPUFEAT(X86_SSE2, "x86:sse2", - xmm_registers_available_p() && - cpuid_features_p(CPUID1D_SSE2, 0)); + cpuid_features_p(CPUID1D_SSE2, 0) && + xmm_registers_available_p()); CASE_CPUFEAT(X86_AESNI, "x86:aesni", - xmm_registers_available_p() && - cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI)); + cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI) && + xmm_registers_available_p()); + CASE_CPUFEAT(X86_RDRAND, "x86:rdrand", + cpuid_features_p(0, CPUID1C_RDRAND) && rdrand_works_p()); + CASE_CPUFEAT(X86_AVX, "x86:avx", + cpuid_features_p(0, CPUID1C_AVX) && + xmm_registers_available_p()); + CASE_CPUFEAT(X86_SSSE3, "x86:ssse3", + cpuid_features_p(0, CPUID1C_SSSE3) && + xmm_registers_available_p()); + CASE_CPUFEAT(X86_PCLMUL, "x86:pclmul", + cpuid_features_p(0, CPUID1C_PCLMUL) && + xmm_registers_available_p()); #endif #ifdef CAPMAP # define FEATP__CASE(feat, tok) \ - CASE_CPUFEAT(feat, tok, get_hwcaps & HF_##feat) + CASE_CPUFEAT(feat, tok, get_hwcaps() & HF_##feat) CAPMAP(FEATP__CASE) #undef FEATP__CASE #endif