## CPU-specific dispatch.
pkginclude_HEADERS += dispatch.h
libbase_la_SOURCES += dispatch.c
+if CPUFAM_X86
+libbase_la_SOURCES += dispatch-x86ish.S
+endif
+if CPUFAM_AMD64
+libbase_la_SOURCES += dispatch-x86ish.S
+endif
## Acceptable key-size descriptions.
pkginclude_HEADERS += keysz.h
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// CPU dispatch support for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+
+ EFLAGS_ID = 1 << 21
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Probing for CPUID.
+
+FUNC(dispatch_x86ish_cpuid)
+ // Enter with a pointer to 16 bytes of storage for the output A, B,
+ // C, D values in the first argument, and input A and C values in the
+ // second and third. Fill the output buffer with `cpuid' results and
+ // return zero if we can; otherwise fill with zero and return -1.
+
+#if CPUFAM_X86
+ pushreg ebx
+ pushreg edi
+ mov edi, [esp + 12]
+ mov eax, [esp + 16]
+ mov ecx, [esp + 20]
+# define OUT edi
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ pushreg rbx
+ mov eax, esi
+ mov ecx, edx
+# define OUT rdi
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ pushreg rbx
+ mov r9, rcx
+ mov eax, edx
+ mov ecx, r8d
+# define OUT r9
+#endif
+ endprologue
+
+ // First, check that this is even a thing, using the complicated
+ // dance with the flags register.
+ pushf
+ pop R_d(r) // current flags in d
+
+ or R_d(r), EFLAGS_ID // force the id bit on and check it
+ push R_d(r)
+ popf
+ pushf
+ pop R_d(r)
+ test edx, EFLAGS_ID
+ jz 8f
+
+ and R_d(r), ~EFLAGS_ID // force the id bit off and check it
+ push R_d(r)
+ popf
+ pushf
+ pop R_d(r)
+ test edx, EFLAGS_ID
+ jnz 8f
+
+ // OK, that seemed to work.
+ cpuid
+
+ mov [OUT + 0], eax
+ mov [OUT + 4], ebx
+ mov [OUT + 8], ecx
+ mov [OUT + 12], edx
+ xor eax, eax
+
+ // We're done.
+9:
+#if CPUFAM_X86
+ popreg edi
+ popreg ebx
+#endif
+#if CPUFAM_AMD64
+ popreg rbx
+#endif
+ ret
+
+ // Failed.
+8: xor eax, eax
+ mov [OUT + 0], eax
+ mov [OUT + 4], eax
+ mov [OUT + 8], eax
+ mov [OUT + 12], eax
+ mov eax, -1
+ jmp 9b
+ENDFUNC
+
+///--------------------------------------------------------------------------
+/// Probing for XMM register availability.
+
+FUNC(dispatch_x86ish_xmmregisters_p)
+ // Enter with no arguments. Return nonzero if the XMM registers are
+ // usable.
+
+ pushreg R_bp(r)
+ setfp
+ stalloc 512
+ and R_sp(r), ~15
+ endprologue
+
+ // Save the floating point and SIMD registers, and try to clobber
+ // xmm0.
+ fxsave [R_sp(r)]
+ mov eax, [R_sp(r) + 160]
+ xor dword ptr [R_sp(r) + 160], 0xaaaa5555
+ fxrstor [R_sp(r)]
+
+ // Save them again, and read back the low word of xmm0. Undo the
+ // clobbering and restore.
+ fxsave [R_sp(r)]
+ mov ecx, [R_sp(r) + 160]
+ mov [R_sp(r) + 160], eax
+ fxrstor [R_sp(r)]
+
+ // The register are live if we read different things.
+ xor eax, ecx
+
+ // Done.
+ dropfp
+ popreg R_bp(r)
+ ret
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
#if CPUFAM_X86 || CPUFAM_AMD64
-# define EFLAGS_ID (1u << 21)
# define CPUID1D_SSE2 (1u << 26)
# define CPUID1D_FXSR (1u << 24)
# define CPUID1C_PCLMUL (1u << 1)
# define CPUID1C_RDRAND (1u << 30)
struct cpuid { unsigned a, b, c, d; };
-
-/* --- @cpuid@ --- *
- *
- * Arguments: @struct cpuid *cc@ = where to write the result
- * @unsigned a, c@ = EAX and ECX registers to set
- *
- * Returns: ---
- *
- * Use: Minimal C wrapper around the x86 `CPUID' instruction. Checks
- * that the instruction is actually available before invoking
- * it; fills the output structure with zero if it's not going to
- * work.
- */
-
-#ifdef __GNUC__
-# if CPUFAM_X86
-static __inline__ unsigned getflags(void)
- { unsigned f; __asm__ ("pushf; popl %0" : "=g" (f)); return (f); }
-static __inline__ unsigned setflags(unsigned f)
-{
- unsigned ff;
- __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf"
- : "=r" (ff)
- : "r" (f));
- return (ff);
-}
-# else
-static __inline__ unsigned long getflags(void)
- { unsigned long f; __asm__ ("pushf; popq %0" : "=g" (f)); return (f); }
-static __inline__ unsigned long long setflags(unsigned long f)
-{
- unsigned long ff;
- __asm__ ("pushf; pushq %1; popf; pushf; popq %0; popf"
- : "=r" (ff)
- : "r" (f));
- return (ff);
-}
-# endif
-#endif
+extern int dispatch_x86ish_cpuid(struct cpuid *, unsigned a, unsigned c);
+extern int dispatch_x86ish_xmmregisters_p(void);
static void cpuid(struct cpuid *cc, unsigned a, unsigned c)
{
-#ifdef __GNUC__
- unsigned f;
-#endif
-
- cc->a = cc->b = cc->c = cc->d = 0;
-
-#ifdef __GNUC__
- /* Stupid dance to detect whether the CPUID instruction is available. */
- f = getflags();
- if (!(setflags(f | EFLAGS_ID) & EFLAGS_ID) ||
- setflags(f & ~EFLAGS_ID) & EFLAGS_ID) {
+ int rc = dispatch_x86ish_cpuid(cc, a, c);
+ if (rc)
dispatch_debug("CPUID instruction not available");
- return;
- }
- setflags(f);
-
- /* Alas, EBX is magical in PIC code, so abuse ESI instead. This isn't
- * pretty, but it works.
- */
-# if CPUFAM_X86
- __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx"
- : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
- : "a" (a) , "c" (c));
-# elif CPUFAM_AMD64
- __asm__ ("pushq %%rbx; cpuid; movl %%ebx, %%esi; popq %%rbx"
- : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
- : "a" (a) , "c" (c));
-# else
-# error "I'm confused."
-# endif
- dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x",
- a, c, cc->a, cc->b, cc->c, cc->d);
-#else
- dispatch_debug("GNU inline assembler not available; can't CPUID");
-#endif
+ else
+ dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x",
+ a, c, cc->a, cc->b, cc->c, cc->d);
}
static unsigned cpuid_maxleaf(void)
static int xmm_registers_available_p(void)
{
-#ifdef __GNUC__
- unsigned f;
- /* This hack is by Agner Fog. Use FXSAVE/FXRSTOR to figure out whether the
- * XMM registers are actually alive.
- */
- if (!cpuid_features_p(CPUID1D_FXSR, 0)) return (0);
-# if CPUFAM_X86
- __asm__ ("movl %%esp, %%edx; subl $512, %%esp; andl $~15, %%esp\n"
- "fxsave (%%esp)\n"
- "movl 160(%%esp), %%eax; xorl $0xaaaa5555, 160(%%esp)\n"
- "fxrstor (%%esp); fxsave (%%esp)\n"
- "movl 160(%%esp), %%ecx; movl %%eax, 160(%%esp)\n"
- "fxrstor (%%esp); movl %%edx, %%esp\n"
- "xorl %%ecx, %%eax"
- : "=a" (f)
- : /* no inputs */
- : "%ecx", "%edx");
-# elif CPUFAM_AMD64
- __asm__ ("movq %%rsp, %%rdx; subq $512, %%rsp; andq $~15, %%rsp\n"
- "fxsave (%%rsp)\n"
- "movl 160(%%rsp), %%eax; xorl $0xaaaa5555, 160(%%rsp)\n"
- "fxrstor (%%rsp); fxsave (%%rsp)\n"
- "movl 160(%%rsp), %%ecx; movl %%eax, 160(%%rsp)\n"
- "fxrstor (%%rsp); movq %%rdx, %%rsp\n"
- "xorl %%ecx, %%eax"
- : "=a" (f)
- : /* no inputs */
- : "%ecx", "%rdx");
-# else
-# error "I'm confused."
-# endif
+ int f = dispatch_x86ish_xmmregisters_p();
+
dispatch_debug("XMM registers %savailable", f ? "" : "not ");
return (f);
-#else
- dispatch_debug("GNU inline assembler not available; can't check for XMM");
- return (0);
-#endif
}
#endif
## Cryptographic laundering for true random data generation.
pkginclude_HEADERS += rand.h
librand_la_SOURCES += rand.c
+if CPUFAM_X86
+librand_la_SOURCES += rand-x86ish.S
+endif
+if CPUFAM_AMD64
+librand_la_SOURCES += rand-x86ish.S
+endif
librand_la_SOURCES += randgen.c
## The SSL v3 pseudorandom function.
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Random-number support for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+
+ .extern F(rand_add)
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Quick random generation.
+
+FUNC(rand_quick_x86ish_rdrand)
+ // Enter with a pointer to the random context in the first argument.
+ // Return zero on success, or -1 on error.
+
+#if CPUFAM_X86
+ mov edx, [esp + 4]
+ stalloc 28
+# define COUNT ecx
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ stalloc 8
+# define COUNT ecx
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ stalloc 40
+# define COUNT r8d
+#endif
+ endprologue
+
+ // Try to fetch a random number.
+ mov COUNT, 16
+0: rdrand R_a(r)
+ jc 1f
+ dec COUNT
+ jnz 0b
+
+ // Failed.
+ mov eax, -1
+ jmp 9f
+
+ // Success.
+1:
+#if CPUFAM_X86
+ mov [esp + 16], eax
+ lea ecx, [esp + 16]
+ mov dword ptr [esp + 12], 32
+ mov dword ptr [esp + 8], 4
+ mov [esp + 4], ecx
+ mov [esp + 0], edx
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ mov [rsp + 0], rax
+ mov rsi, rsp
+ mov edx, 8
+ mov ecx, 64
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ mov [rsp + 32], rax
+ lea rdx, [rsp + 32]
+ mov r8d, 8
+ mov r9d, 64
+#endif
+ callext F(rand_add)
+ xor eax, eax
+
+ // Done.
+9:
+#if CPUFAM_X86
+ stfree 28
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ stfree 8
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ stfree 40
+#endif
+ ret
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
static int trivial_quick(rand_pool *r) { return (-1); }
-#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64)
-static int rdrand_quick(rand_pool *r)
-{
- unsigned long rr;
- int i = 16;
-
- __asm__ ("0: rdrand %0; jc 9f; dec %1; jnz 0b; 9:"
- : "=r" (rr), "=r" (i) : "1" (i) : "cc");
- if (!i) return (-1);
- rand_add(r, &rr, sizeof(rr), 8*sizeof(rr));
- return (0);
-}
+#if CPUFAM_X86 || CPUFAM_AMD64
+extern int rand_quick_x86ish_rdrand(rand_pool */*r*/);
#endif
static quick__functype *pick_quick(void)
{
-#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64)
- DISPATCH_PICK_COND(rand_quick, rdrand_quick,
+#if CPUFAM_X86 || CPUFAM_AMD64
+ DISPATCH_PICK_COND(rand_quick, rand_quick_x86ish_rdrand,
cpu_feature_p(CPUFEAT_X86_RDRAND));
#endif
DISPATCH_PICK_FALLBACK(rand_quick, trivial_quick);