X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/a0e9bb8a70bf2ac9ddb7d1f638ce6af2c80d13e7..4bf3072e34661918a8e6c5e95c3e2ab05c76089b:/base/dispatch.c diff --git a/base/dispatch.c b/base/dispatch.c index 63649748..9ba6a7cd 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -47,6 +47,7 @@ # define CPUID1D_SSE2 (1u << 26) # define CPUID1D_FXSR (1u << 24) # define CPUID1C_AESNI (1u << 25) +# define CPUID1C_AVX (1u << 28) # define CPUID1C_RDRAND (1u << 30) struct cpuid { unsigned a, b, c, d; }; @@ -72,8 +73,8 @@ static __inline__ unsigned setflags(unsigned f) { unsigned ff; __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf" - : "=g" (ff) - : "g" (f)); + : "=r" (ff) + : "r" (f)); return (ff); } # else @@ -83,8 +84,8 @@ static __inline__ unsigned long long setflags(unsigned long f) { unsigned long ff; __asm__ ("pushf; pushq %1; popf; pushf; popq %0; popf" - : "=g" (ff) - : "g" (f)); + : "=r" (ff) + : "r" (f)); return (ff); } # endif @@ -229,6 +230,16 @@ struct auxentry { unsigned long type; union auxval value; }; # define WANT_AT_HWCAP(_) _(AT_HWCAP, u, hwcap) #endif +#if defined(AT_HWCAP) && CPUFAM_ARM64 +# define WANT_ANY 1 +# define WANT_AT_HWCAP(_) _(AT_HWCAP, u, hwcap) +#endif + +#if defined(AT_HWCAP2) && CPUFAM_ARMEL +# define WANT_ANY 1 +# define WANT_AT_HWCAP2(_) _(AT_HWCAP2, u, hwcap2) +#endif + /* If we couldn't find any interesting entries then we can switch all of this * machinery off. Also do that if we have no means for atomic updates. */ @@ -246,6 +257,9 @@ static unsigned hwcaps = 0; #ifndef WANT_AT_HWCAP # define WANT_AT_HWCAP(_) #endif +#ifndef WANT_AT_HWCAP2 +# define WANT_AT_HWCAP2(_) +#endif /* For each CPU family, define two lists. * @@ -261,12 +275,20 @@ static unsigned hwcaps = 0; */ #if CPUFAM_ARMEL # define WANTAUX(_) \ - WANT_AT_HWCAP(_) + WANT_AT_HWCAP(_) \ + WANT_AT_HWCAP2(_) # define CAPMAP(_) \ _(ARM_VFP, "arm:vfp") \ _(ARM_NEON, "arm:neon") \ _(ARM_V4, "arm:v4") \ - _(ARM_D32, "arm:d32") + _(ARM_D32, "arm:d32") \ + _(ARM_AES, "arm:aes") +#endif +#if CPUFAM_ARM64 +# define WANTAUX(_) \ + WANT_AT_HWCAP(_) +# define CAPMAP(_) \ + _(ARM_AES, "arm:aes") #endif /* Build the bitmask for `hwcaps' from the `CAPMAP' list. */ @@ -319,8 +341,8 @@ static void probe_hwcaps(void) /* Shiny new libc lets us request individual entry types. This is almost * too easy. */ -# define CAP__GET(type, slot, ubranch) \ - probed.slot.ubranch = (AUXUTYPE_##ubranch)getauxval(type); +# define CAP__GET(type, ubranch, slot) \ + probed.slot = (AUXUTYPE_##ubranch)getauxval(type); WANTAUX(CAP__GET) #else /* Otherwise we're a bit stuck, really. Modern Linux kernels make a copy @@ -377,6 +399,12 @@ static void probe_hwcaps(void) if (probed.hwcap & HWCAP_NEON) hw |= HF_ARM_NEON; if (probed.hwcap & HWCAP_VFPD32) hw |= HF_ARM_D32; if (probed.hwcap & HWCAP_VFPv4) hw |= HF_ARM_V4; +# ifdef HWCAP2_AES + if (probed.hwcap2 & HWCAP2_AES) hw |= HF_ARM_AES; +# endif +#endif +#if CPUFAM_ARM64 + if (probed.hwcap & HWCAP_AES) hw |= HF_ARM_AES; #endif /* Store the bitmask of features we probed for everyone to see. */ @@ -518,6 +546,9 @@ int cpu_feature_p(int feat) cpuid_features_p(CPUID1D_SSE2, CPUID1C_AESNI)); CASE_CPUFEAT(X86_RDRAND, "x86:rdrand", cpuid_features_p(0, CPUID1C_RDRAND)); + CASE_CPUFEAT(X86_AVX, "x86:avx", + xmm_registers_available_p() && + cpuid_features_p(0, CPUID1C_AVX)); #endif #ifdef CAPMAP # define FEATP__CASE(feat, tok) \