From 00b52725bf15e844360d07248de1d8cbbc1e165e Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Sat, 2 Mar 2024 12:01:58 +0000 Subject: [PATCH] base/dispatch.c, base/dispatch.h: Add proper detection for AVX2. No plans to use this for anything yet. --- base/dispatch-x86ish.S | 33 +++++++++++++++++++++++++++++++++ base/dispatch.c | 32 ++++++++++++++++++++++++++++---- base/dispatch.h | 3 ++- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/base/dispatch-x86ish.S b/base/dispatch-x86ish.S index 57d8d32b..9510f969 100644 --- a/base/dispatch-x86ish.S +++ b/base/dispatch-x86ish.S @@ -155,6 +155,39 @@ FUNC(dispatch_x86ish_xmmregisters_p) ENDFUNC ///-------------------------------------------------------------------------- +/// Checking extended control registers. + +FUNC(dispatch_x86ish_xgetbv) + // Call with two arguments: a pointer Z_OUT to 8 bytes of output space, and + // a 32-bit integer C. Read the 64-bit value of XCR(C), and store it + // at Z_OUT. + +#if CPUFAM_X86 +# define Z_OUT edi + mov edi, [esp + 4] + mov ecx, [esp + 8] +#endif +#if CPUFAM_AMD64 && ABI_SYSV +# define Z_OUT rdi + mov ecx, esi +#endif +#if CPUFAM_AMD64 && ABI_WIN +# define Z_OUT r8 + mov r8, rcx + mov ecx, edx +#endif + endprologue + + xgetbv + mov [Z_OUT + 0], eax + mov [Z_OUT + 4], edx + + ret + +#undef Z_OUT +ENDFUNC + +///-------------------------------------------------------------------------- /// Checking `rdrand'. FUNC(dispatch_x86ish_rdrand) diff --git a/base/dispatch.c b/base/dispatch.c index 4ce60159..f1859c2b 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -53,16 +53,20 @@ enum { # define CPUID1C_PCLMUL (1u << 1) # define CPUID1C_SSSE3 (1u << 9) # define CPUID1C_AESNI (1u << 25) +# define CPUID1C_OSXSAVE (1u << 27) # define CPUID1C_AVX (1u << 28) # define CPUID1C_RDRAND (1u << 30) CPUID_7_0_B, /* eax = 7, ecx = 0 => ebx&?? */ +# define CPUID70B_AVX2 (1u << 5) # define CPUID70B_RDSEED (1u << 18) }; struct cpuid { unsigned a, b, c, d; }; +struct xcr { unsigned lo, hi; }; extern int dispatch_x86ish_cpuid(struct cpuid *, unsigned a, unsigned c); extern int dispatch_x86ish_xmmregisters_p(void); +extern int dispatch_x86ish_xgetbv(struct xcr *z_out, unsigned c); extern int dispatch_x86ish_rdrand(unsigned op, unsigned *); static void cpuid(struct cpuid *cc, unsigned a, unsigned c) @@ -111,12 +115,12 @@ static int cpuid_feature_p(unsigned leaf, unsigned bits) return ((r&bits) == bits); } -/* --- @xmm_registers_available_p@ --- * +/* --- @{x,y}mm_registers_available_p@ --- * * * Arguments: --- * - * Returns: Nonzero if the operating system has made the XMM registers - * available for use. + * Returns: Nonzero if the operating system has made the XMM or YMM + * registers available for use. */ static int xmm_registers_available_p(void) @@ -127,6 +131,22 @@ static int xmm_registers_available_p(void) return (f); } +static int ymm_registers_available_p(void) +{ + struct xcr xcr0; + int f; + + f = cpuid_feature_p(CPUID_1_C, CPUID1C_OSXSAVE); + dispatch_debug("XGETBV %savailable", f ? "" : "not "); + if (!f) return (0); + + dispatch_x86ish_xgetbv(&xcr0, 0); f = (xcr0.lo&0x06) == 0x06; + dispatch_debug("YMM state %senabled", f ? "" : "not "); + if (!f) return (0); + + return (1); +} + /* --- @rdrand_works_p@ --- * * * @@ -527,7 +547,11 @@ int cpu_feature_p(int feat) rdrand_works_p(OP_RDRAND)); CASE_CPUFEAT(X86_AVX, "x86:avx", cpuid_feature_p(CPUID_1_C, CPUID1C_AVX) && - xmm_registers_available_p()); + ymm_registers_available_p()); + CASE_CPUFEAT(X86_AVX2, "x86:avx2", + cpuid_feature_p(CPUID_1_C, CPUID1C_AVX) && + cpuid_feature_p(CPUID_7_0_B, CPUID70B_AVX2) && + ymm_registers_available_p()); CASE_CPUFEAT(X86_SSSE3, "x86:ssse3", cpuid_feature_p(CPUID_1_C, CPUID1C_SSSE3) && xmm_registers_available_p()); diff --git a/base/dispatch.h b/base/dispatch.h index 2c78d92a..9f335345 100644 --- a/base/dispatch.h +++ b/base/dispatch.h @@ -186,7 +186,8 @@ enum { CPUFEAT_X86_SSSE3, /* Supplementary SSE 3 */ CPUFEAT_X86_PCLMUL, /* Carry-less multiplication */ CPUFEAT_ARM_PMULL, /* Polynomial multiplication */ - CPUFEAT_X86_RDSEED /* Built-in raw entropy source */ + CPUFEAT_X86_RDSEED, /* Built-in raw entropy source */ + CPUFEAT_X86_AVX2 /* AVX 2 (256-bit integer ops) */ }; extern int cpu_feature_p(int /*feat*/); -- 2.11.0