From: Mark Wooding Date: Thu, 26 Sep 2019 11:11:50 +0000 (+0100) Subject: Mostly abolish inline assembler code in favour of dedicated files. X-Git-Tag: 2.6.0~65 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/a3ad4421ce9e122bc9079ffc6e60f45b1b06c1d7 Mostly abolish inline assembler code in favour of dedicated files. Move the fancy feature probing from `dispatch.c'. This makes it easier to understand because it's not covered in `%' sigils and backwards, and also simplifies things because we have better machinery for papering over the differences between 32- and 64-bit instruction sets. Also move the `rdrand' code from `rand.c'. This makes things significantly more complicated because it calls back into C, but it does improve availability of a security feature, so that's good. That leaves only a use of `rdtsc' in `perftest.c', which is hardly critical, and the `rbit' in the ARM64 `gcm.c' code, which has a slightly slower portable alternative. --- diff --git a/base/Makefile.am b/base/Makefile.am index 145f9c35..8b7c0fcd 100644 --- a/base/Makefile.am +++ b/base/Makefile.am @@ -45,6 +45,12 @@ libbase_la_SOURCES += ct.c ct-test.c ## CPU-specific dispatch. pkginclude_HEADERS += dispatch.h libbase_la_SOURCES += dispatch.c +if CPUFAM_X86 +libbase_la_SOURCES += dispatch-x86ish.S +endif +if CPUFAM_AMD64 +libbase_la_SOURCES += dispatch-x86ish.S +endif ## Acceptable key-size descriptions. pkginclude_HEADERS += keysz.h diff --git a/base/dispatch-x86ish.S b/base/dispatch-x86ish.S new file mode 100644 index 00000000..2c267b62 --- /dev/null +++ b/base/dispatch-x86ish.S @@ -0,0 +1,156 @@ +/// -*- mode: asm; asm-comment-char: ?/ -*- +/// +/// CPU dispatch support for x86 +/// +/// (c) 2019 Straylight/Edgeware +/// + +///----- Licensing notice --------------------------------------------------- +/// +/// This file is part of Catacomb. +/// +/// Catacomb is free software: you can redistribute it and/or modify it +/// under the terms of the GNU Library General Public License as published +/// by the Free Software Foundation; either version 2 of the License, or +/// (at your option) any later version. +/// +/// Catacomb is distributed in the hope that it will be useful, but +/// WITHOUT ANY WARRANTY; without even the implied warranty of +/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +/// Library General Public License for more details. +/// +/// You should have received a copy of the GNU Library General Public +/// License along with Catacomb. If not, write to the Free Software +/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +/// USA. + +///-------------------------------------------------------------------------- +/// Preliminaries. + +#include "config.h" +#include "asm-common.h" + + EFLAGS_ID = 1 << 21 + + .text + +///-------------------------------------------------------------------------- +/// Probing for CPUID. + +FUNC(dispatch_x86ish_cpuid) + // Enter with a pointer to 16 bytes of storage for the output A, B, + // C, D values in the first argument, and input A and C values in the + // second and third. Fill the output buffer with `cpuid' results and + // return zero if we can; otherwise fill with zero and return -1. + +#if CPUFAM_X86 + pushreg ebx + pushreg edi + mov edi, [esp + 12] + mov eax, [esp + 16] + mov ecx, [esp + 20] +# define OUT edi +#endif +#if CPUFAM_AMD64 && ABI_SYSV + pushreg rbx + mov eax, esi + mov ecx, edx +# define OUT rdi +#endif +#if CPUFAM_AMD64 && ABI_WIN + pushreg rbx + mov r9, rcx + mov eax, edx + mov ecx, r8d +# define OUT r9 +#endif + endprologue + + // First, check that this is even a thing, using the complicated + // dance with the flags register. + pushf + pop R_d(r) // current flags in d + + or R_d(r), EFLAGS_ID // force the id bit on and check it + push R_d(r) + popf + pushf + pop R_d(r) + test edx, EFLAGS_ID + jz 8f + + and R_d(r), ~EFLAGS_ID // force the id bit off and check it + push R_d(r) + popf + pushf + pop R_d(r) + test edx, EFLAGS_ID + jnz 8f + + // OK, that seemed to work. + cpuid + + mov [OUT + 0], eax + mov [OUT + 4], ebx + mov [OUT + 8], ecx + mov [OUT + 12], edx + xor eax, eax + + // We're done. +9: +#if CPUFAM_X86 + popreg edi + popreg ebx +#endif +#if CPUFAM_AMD64 + popreg rbx +#endif + ret + + // Failed. +8: xor eax, eax + mov [OUT + 0], eax + mov [OUT + 4], eax + mov [OUT + 8], eax + mov [OUT + 12], eax + mov eax, -1 + jmp 9b +ENDFUNC + +///-------------------------------------------------------------------------- +/// Probing for XMM register availability. + +FUNC(dispatch_x86ish_xmmregisters_p) + // Enter with no arguments. Return nonzero if the XMM registers are + // usable. + + pushreg R_bp(r) + setfp + stalloc 512 + and R_sp(r), ~15 + endprologue + + // Save the floating point and SIMD registers, and try to clobber + // xmm0. + fxsave [R_sp(r)] + mov eax, [R_sp(r) + 160] + xor dword ptr [R_sp(r) + 160], 0xaaaa5555 + fxrstor [R_sp(r)] + + // Save them again, and read back the low word of xmm0. Undo the + // clobbering and restore. + fxsave [R_sp(r)] + mov ecx, [R_sp(r) + 160] + mov [R_sp(r) + 160], eax + fxrstor [R_sp(r)] + + // The register are live if we read different things. + xor eax, ecx + + // Done. + dropfp + popreg R_bp(r) + ret +ENDFUNC + +///----- That's all, folks -------------------------------------------------- diff --git a/base/dispatch.c b/base/dispatch.c index bea97b40..c8bbc0b9 100644 --- a/base/dispatch.c +++ b/base/dispatch.c @@ -43,7 +43,6 @@ #if CPUFAM_X86 || CPUFAM_AMD64 -# define EFLAGS_ID (1u << 21) # define CPUID1D_SSE2 (1u << 26) # define CPUID1D_FXSR (1u << 24) # define CPUID1C_PCLMUL (1u << 1) @@ -53,83 +52,17 @@ # define CPUID1C_RDRAND (1u << 30) struct cpuid { unsigned a, b, c, d; }; - -/* --- @cpuid@ --- * - * - * Arguments: @struct cpuid *cc@ = where to write the result - * @unsigned a, c@ = EAX and ECX registers to set - * - * Returns: --- - * - * Use: Minimal C wrapper around the x86 `CPUID' instruction. Checks - * that the instruction is actually available before invoking - * it; fills the output structure with zero if it's not going to - * work. - */ - -#ifdef __GNUC__ -# if CPUFAM_X86 -static __inline__ unsigned getflags(void) - { unsigned f; __asm__ ("pushf; popl %0" : "=g" (f)); return (f); } -static __inline__ unsigned setflags(unsigned f) -{ - unsigned ff; - __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf" - : "=r" (ff) - : "r" (f)); - return (ff); -} -# else -static __inline__ unsigned long getflags(void) - { unsigned long f; __asm__ ("pushf; popq %0" : "=g" (f)); return (f); } -static __inline__ unsigned long long setflags(unsigned long f) -{ - unsigned long ff; - __asm__ ("pushf; pushq %1; popf; pushf; popq %0; popf" - : "=r" (ff) - : "r" (f)); - return (ff); -} -# endif -#endif +extern int dispatch_x86ish_cpuid(struct cpuid *, unsigned a, unsigned c); +extern int dispatch_x86ish_xmmregisters_p(void); static void cpuid(struct cpuid *cc, unsigned a, unsigned c) { -#ifdef __GNUC__ - unsigned f; -#endif - - cc->a = cc->b = cc->c = cc->d = 0; - -#ifdef __GNUC__ - /* Stupid dance to detect whether the CPUID instruction is available. */ - f = getflags(); - if (!(setflags(f | EFLAGS_ID) & EFLAGS_ID) || - setflags(f & ~EFLAGS_ID) & EFLAGS_ID) { + int rc = dispatch_x86ish_cpuid(cc, a, c); + if (rc) dispatch_debug("CPUID instruction not available"); - return; - } - setflags(f); - - /* Alas, EBX is magical in PIC code, so abuse ESI instead. This isn't - * pretty, but it works. - */ -# if CPUFAM_X86 - __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx" - : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d) - : "a" (a) , "c" (c)); -# elif CPUFAM_AMD64 - __asm__ ("pushq %%rbx; cpuid; movl %%ebx, %%esi; popq %%rbx" - : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d) - : "a" (a) , "c" (c)); -# else -# error "I'm confused." -# endif - dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x", - a, c, cc->a, cc->b, cc->c, cc->d); -#else - dispatch_debug("GNU inline assembler not available; can't CPUID"); -#endif + else + dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x", + a, c, cc->a, cc->b, cc->c, cc->d); } static unsigned cpuid_maxleaf(void) @@ -162,43 +95,10 @@ static int cpuid_features_p(unsigned dbits, unsigned cbits) static int xmm_registers_available_p(void) { -#ifdef __GNUC__ - unsigned f; - /* This hack is by Agner Fog. Use FXSAVE/FXRSTOR to figure out whether the - * XMM registers are actually alive. - */ - if (!cpuid_features_p(CPUID1D_FXSR, 0)) return (0); -# if CPUFAM_X86 - __asm__ ("movl %%esp, %%edx; subl $512, %%esp; andl $~15, %%esp\n" - "fxsave (%%esp)\n" - "movl 160(%%esp), %%eax; xorl $0xaaaa5555, 160(%%esp)\n" - "fxrstor (%%esp); fxsave (%%esp)\n" - "movl 160(%%esp), %%ecx; movl %%eax, 160(%%esp)\n" - "fxrstor (%%esp); movl %%edx, %%esp\n" - "xorl %%ecx, %%eax" - : "=a" (f) - : /* no inputs */ - : "%ecx", "%edx"); -# elif CPUFAM_AMD64 - __asm__ ("movq %%rsp, %%rdx; subq $512, %%rsp; andq $~15, %%rsp\n" - "fxsave (%%rsp)\n" - "movl 160(%%rsp), %%eax; xorl $0xaaaa5555, 160(%%rsp)\n" - "fxrstor (%%rsp); fxsave (%%rsp)\n" - "movl 160(%%rsp), %%ecx; movl %%eax, 160(%%rsp)\n" - "fxrstor (%%rsp); movq %%rdx, %%rsp\n" - "xorl %%ecx, %%eax" - : "=a" (f) - : /* no inputs */ - : "%ecx", "%rdx"); -# else -# error "I'm confused." -# endif + int f = dispatch_x86ish_xmmregisters_p(); + dispatch_debug("XMM registers %savailable", f ? "" : "not "); return (f); -#else - dispatch_debug("GNU inline assembler not available; can't check for XMM"); - return (0); -#endif } #endif diff --git a/rand/Makefile.am b/rand/Makefile.am index d97749e9..e02ccc59 100644 --- a/rand/Makefile.am +++ b/rand/Makefile.am @@ -67,6 +67,12 @@ librand_la_SOURCES += noise.c ## Cryptographic laundering for true random data generation. pkginclude_HEADERS += rand.h librand_la_SOURCES += rand.c +if CPUFAM_X86 +librand_la_SOURCES += rand-x86ish.S +endif +if CPUFAM_AMD64 +librand_la_SOURCES += rand-x86ish.S +endif librand_la_SOURCES += randgen.c ## The SSL v3 pseudorandom function. diff --git a/rand/rand-x86ish.S b/rand/rand-x86ish.S new file mode 100644 index 00000000..829bc2cd --- /dev/null +++ b/rand/rand-x86ish.S @@ -0,0 +1,109 @@ +/// -*- mode: asm; asm-comment-char: ?/ -*- +/// +/// Random-number support for x86 +/// +/// (c) 2019 Straylight/Edgeware +/// + +///----- Licensing notice --------------------------------------------------- +/// +/// This file is part of Catacomb. +/// +/// Catacomb is free software: you can redistribute it and/or modify it +/// under the terms of the GNU Library General Public License as published +/// by the Free Software Foundation; either version 2 of the License, or +/// (at your option) any later version. +/// +/// Catacomb is distributed in the hope that it will be useful, but +/// WITHOUT ANY WARRANTY; without even the implied warranty of +/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +/// Library General Public License for more details. +/// +/// You should have received a copy of the GNU Library General Public +/// License along with Catacomb. If not, write to the Free Software +/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +/// USA. + +///-------------------------------------------------------------------------- +/// Preliminaries. + +#include "config.h" +#include "asm-common.h" + + .extern F(rand_add) + + .text + +///-------------------------------------------------------------------------- +/// Quick random generation. + +FUNC(rand_quick_x86ish_rdrand) + // Enter with a pointer to the random context in the first argument. + // Return zero on success, or -1 on error. + +#if CPUFAM_X86 + mov edx, [esp + 4] + stalloc 28 +# define COUNT ecx +#endif +#if CPUFAM_AMD64 && ABI_SYSV + stalloc 8 +# define COUNT ecx +#endif +#if CPUFAM_AMD64 && ABI_WIN + stalloc 40 +# define COUNT r8d +#endif + endprologue + + // Try to fetch a random number. + mov COUNT, 16 +0: rdrand R_a(r) + jc 1f + dec COUNT + jnz 0b + + // Failed. + mov eax, -1 + jmp 9f + + // Success. +1: +#if CPUFAM_X86 + mov [esp + 16], eax + lea ecx, [esp + 16] + mov dword ptr [esp + 12], 32 + mov dword ptr [esp + 8], 4 + mov [esp + 4], ecx + mov [esp + 0], edx +#endif +#if CPUFAM_AMD64 && ABI_SYSV + mov [rsp + 0], rax + mov rsi, rsp + mov edx, 8 + mov ecx, 64 +#endif +#if CPUFAM_AMD64 && ABI_WIN + mov [rsp + 32], rax + lea rdx, [rsp + 32] + mov r8d, 8 + mov r9d, 64 +#endif + callext F(rand_add) + xor eax, eax + + // Done. +9: +#if CPUFAM_X86 + stfree 28 +#endif +#if CPUFAM_AMD64 && ABI_SYSV + stfree 8 +#endif +#if CPUFAM_AMD64 && ABI_WIN + stfree 40 +#endif + ret +ENDFUNC + +///----- That's all, folks -------------------------------------------------- diff --git a/rand/rand.c b/rand/rand.c index f9f16d5d..c2540f7e 100644 --- a/rand/rand.c +++ b/rand/rand.c @@ -161,24 +161,14 @@ CPU_DISPATCH(static, return, int, quick, (rand_pool *r), (r), static int trivial_quick(rand_pool *r) { return (-1); } -#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64) -static int rdrand_quick(rand_pool *r) -{ - unsigned long rr; - int i = 16; - - __asm__ ("0: rdrand %0; jc 9f; dec %1; jnz 0b; 9:" - : "=r" (rr), "=r" (i) : "1" (i) : "cc"); - if (!i) return (-1); - rand_add(r, &rr, sizeof(rr), 8*sizeof(rr)); - return (0); -} +#if CPUFAM_X86 || CPUFAM_AMD64 +extern int rand_quick_x86ish_rdrand(rand_pool */*r*/); #endif static quick__functype *pick_quick(void) { -#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64) - DISPATCH_PICK_COND(rand_quick, rdrand_quick, +#if CPUFAM_X86 || CPUFAM_AMD64 + DISPATCH_PICK_COND(rand_quick, rand_quick_x86ish_rdrand, cpu_feature_p(CPUFEAT_X86_RDRAND)); #endif DISPATCH_PICK_FALLBACK(rand_quick, trivial_quick);