X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/81ceb2c35de440e701d2f4e6960001395d2b7e97..HEAD:/base/dispatch.c diff --git a/base/dispatch.c b/base/dispatch.c index 4ce60159..f1859c2b 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -53,16 +53,20 @@ enum { # define CPUID1C_PCLMUL (1u << 1) # define CPUID1C_SSSE3 (1u << 9) # define CPUID1C_AESNI (1u << 25) +# define CPUID1C_OSXSAVE (1u << 27) # define CPUID1C_AVX (1u << 28) # define CPUID1C_RDRAND (1u << 30) CPUID_7_0_B, /* eax = 7, ecx = 0 => ebx&?? */ +# define CPUID70B_AVX2 (1u << 5) # define CPUID70B_RDSEED (1u << 18) }; struct cpuid { unsigned a, b, c, d; }; +struct xcr { unsigned lo, hi; }; extern int dispatch_x86ish_cpuid(struct cpuid *, unsigned a, unsigned c); extern int dispatch_x86ish_xmmregisters_p(void); +extern int dispatch_x86ish_xgetbv(struct xcr *z_out, unsigned c); extern int dispatch_x86ish_rdrand(unsigned op, unsigned *); static void cpuid(struct cpuid *cc, unsigned a, unsigned c) @@ -111,12 +115,12 @@ static int cpuid_feature_p(unsigned leaf, unsigned bits) return ((r&bits) == bits); } -/* --- @xmm_registers_available_p@ --- * +/* --- @{x,y}mm_registers_available_p@ --- * * * Arguments: --- * - * Returns: Nonzero if the operating system has made the XMM registers - * available for use. + * Returns: Nonzero if the operating system has made the XMM or YMM + * registers available for use. */ static int xmm_registers_available_p(void) @@ -127,6 +131,22 @@ static int xmm_registers_available_p(void) return (f); } +static int ymm_registers_available_p(void) +{ + struct xcr xcr0; + int f; + + f = cpuid_feature_p(CPUID_1_C, CPUID1C_OSXSAVE); + dispatch_debug("XGETBV %savailable", f ? "" : "not "); + if (!f) return (0); + + dispatch_x86ish_xgetbv(&xcr0, 0); f = (xcr0.lo&0x06) == 0x06; + dispatch_debug("YMM state %senabled", f ? "" : "not "); + if (!f) return (0); + + return (1); +} + /* --- @rdrand_works_p@ --- * * * @@ -527,7 +547,11 @@ int cpu_feature_p(int feat) rdrand_works_p(OP_RDRAND)); CASE_CPUFEAT(X86_AVX, "x86:avx", cpuid_feature_p(CPUID_1_C, CPUID1C_AVX) && - xmm_registers_available_p()); + ymm_registers_available_p()); + CASE_CPUFEAT(X86_AVX2, "x86:avx2", + cpuid_feature_p(CPUID_1_C, CPUID1C_AVX) && + cpuid_feature_p(CPUID_7_0_B, CPUID70B_AVX2) && + ymm_registers_available_p()); CASE_CPUFEAT(X86_SSSE3, "x86:ssse3", cpuid_feature_p(CPUID_1_C, CPUID1C_SSSE3) && xmm_registers_available_p());