## CPU-specific dispatch.
pkginclude_HEADERS += dispatch.h
libbase_la_SOURCES += dispatch.c
+if CPUFAM_X86
+libbase_la_SOURCES += dispatch-x86ish.S
+endif
+if CPUFAM_AMD64
+libbase_la_SOURCES += dispatch-x86ish.S
+endif
## Acceptable key-size descriptions.
pkginclude_HEADERS += keysz.h
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// CPU dispatch support for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+
+ EFLAGS_ID = 1 << 21
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Probing for CPUID.
+
+FUNC(dispatch_x86ish_cpuid)
+ // Enter with a pointer to 16 bytes of storage for the output A, B,
+ // C, D values in the first argument, and input A and C values in the
+ // second and third. Fill the output buffer with `cpuid' results and
+ // return zero if we can; otherwise fill with zero and return -1.
+
+#if CPUFAM_X86
+ pushreg ebx
+ pushreg edi
+ mov edi, [esp + 12]
+ mov eax, [esp + 16]
+ mov ecx, [esp + 20]
+# define OUT edi
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ pushreg rbx
+ mov eax, esi
+ mov ecx, edx
+# define OUT rdi
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ pushreg rbx
+ mov r9, rcx
+ mov eax, edx
+ mov ecx, r8d
+# define OUT r9
+#endif
+ endprologue
+
+ // First, check that this is even a thing, using the complicated
+ // dance with the flags register.
+ pushf
+ pop R_d(r) // current flags in d
+
+ or R_d(r), EFLAGS_ID // force the id bit on and check it
+ push R_d(r)
+ popf
+ pushf
+ pop R_d(r)
+ test edx, EFLAGS_ID
+ jz 8f
+
+ and R_d(r), ~EFLAGS_ID // force the id bit off and check it
+ push R_d(r)
+ popf
+ pushf
+ pop R_d(r)
+ test edx, EFLAGS_ID
+ jnz 8f
+
+ // OK, that seemed to work.
+ cpuid
+
+ mov [OUT + 0], eax
+ mov [OUT + 4], ebx
+ mov [OUT + 8], ecx
+ mov [OUT + 12], edx
+ xor eax, eax
+
+ // We're done.
+9:
+#if CPUFAM_X86
+ popreg edi
+ popreg ebx
+#endif
+#if CPUFAM_AMD64
+ popreg rbx
+#endif
+ ret
+
+ // Failed.
+8: xor eax, eax
+ mov [OUT + 0], eax
+ mov [OUT + 4], eax
+ mov [OUT + 8], eax
+ mov [OUT + 12], eax
+ mov eax, -1
+ jmp 9b
+ENDFUNC
+
+///--------------------------------------------------------------------------
+/// Probing for XMM register availability.
+
+FUNC(dispatch_x86ish_xmmregisters_p)
+ // Enter with no arguments. Return nonzero if the XMM registers are
+ // usable.
+
+ pushreg R_bp(r)
+ setfp
+ stalloc 512
+ and R_sp(r), ~15
+ endprologue
+
+ // Save the floating point and SIMD registers, and try to clobber
+ // xmm0.
+ fxsave [R_sp(r)]
+ mov eax, [R_sp(r) + 160]
+ xor dword ptr [R_sp(r) + 160], 0xaaaa5555
+ fxrstor [R_sp(r)]
+
+ // Save them again, and read back the low word of xmm0. Undo the
+ // clobbering and restore.
+ fxsave [R_sp(r)]
+ mov ecx, [R_sp(r) + 160]
+ mov [R_sp(r) + 160], eax
+ fxrstor [R_sp(r)]
+
+ // The register are live if we read different things.
+ xor eax, ecx
+
+ // Done.
+ dropfp
+ popreg R_bp(r)
+ ret
+ENDFUNC
+
+///--------------------------------------------------------------------------
+/// Checking `rdrand'.
+
+FUNC(dispatch_x86ish_rdrand)
+ // Enter with one argument: a pointer X_OUT to a 32-bit word. Try to
+ // generate a random word using `rdrand'. If successful, set *X_OUT
+ // to the generated word, and return zero; otherwise, return -1.
+
+#if CPUFAM_X86
+# define X_OUT edx
+# define COUNT ecx
+ mov X_OUT, [esp + 4]
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+# define X_OUT rdi
+# define COUNT ecx
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+# define X_OUT rcx
+# define COUNT edx
+#endif
+ endprologue
+
+ mov COUNT, 16 // fairly persistent
+0: rdrand eax
+ jc 9f
+ dec COUNT
+ jnz 0b
+
+ // Failed to come up with a random value.
+ mov eax, -1
+ ret
+
+ // Success.
+9: mov [X_OUT], eax
+ xor eax, eax
+ ret
+
+#undef X_OUT
+#undef COUNT
+
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
#if CPUFAM_X86 || CPUFAM_AMD64
-# define EFLAGS_ID (1u << 21)
# define CPUID1D_SSE2 (1u << 26)
# define CPUID1D_FXSR (1u << 24)
# define CPUID1C_PCLMUL (1u << 1)
# define CPUID1C_RDRAND (1u << 30)
struct cpuid { unsigned a, b, c, d; };
-
-/* --- @cpuid@ --- *
- *
- * Arguments: @struct cpuid *cc@ = where to write the result
- * @unsigned a, c@ = EAX and ECX registers to set
- *
- * Returns: ---
- *
- * Use: Minimal C wrapper around the x86 `CPUID' instruction. Checks
- * that the instruction is actually available before invoking
- * it; fills the output structure with zero if it's not going to
- * work.
- */
-
-#ifdef __GNUC__
-# if CPUFAM_X86
-static __inline__ unsigned getflags(void)
- { unsigned f; __asm__ ("pushf; popl %0" : "=g" (f)); return (f); }
-static __inline__ unsigned setflags(unsigned f)
-{
- unsigned ff;
- __asm__ ("pushf; pushl %1; popf; pushf; popl %0; popf"
- : "=r" (ff)
- : "r" (f));
- return (ff);
-}
-# else
-static __inline__ unsigned long getflags(void)
- { unsigned long f; __asm__ ("pushf; popq %0" : "=g" (f)); return (f); }
-static __inline__ unsigned long long setflags(unsigned long f)
-{
- unsigned long ff;
- __asm__ ("pushf; pushq %1; popf; pushf; popq %0; popf"
- : "=r" (ff)
- : "r" (f));
- return (ff);
-}
-# endif
-#endif
+extern int dispatch_x86ish_cpuid(struct cpuid *, unsigned a, unsigned c);
+extern int dispatch_x86ish_xmmregisters_p(void);
+extern int dispatch_x86ish_rdrand(unsigned *);
static void cpuid(struct cpuid *cc, unsigned a, unsigned c)
{
-#ifdef __GNUC__
- unsigned f;
-#endif
-
- cc->a = cc->b = cc->c = cc->d = 0;
-
-#ifdef __GNUC__
- /* Stupid dance to detect whether the CPUID instruction is available. */
- f = getflags();
- if (!(setflags(f | EFLAGS_ID) & EFLAGS_ID) ||
- setflags(f & ~EFLAGS_ID) & EFLAGS_ID) {
+ int rc = dispatch_x86ish_cpuid(cc, a, c);
+ if (rc)
dispatch_debug("CPUID instruction not available");
- return;
- }
- setflags(f);
-
- /* Alas, EBX is magical in PIC code, so abuse ESI instead. This isn't
- * pretty, but it works.
- */
-# if CPUFAM_X86
- __asm__ ("pushl %%ebx; cpuid; movl %%ebx, %%esi; popl %%ebx"
- : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
- : "a" (a) , "c" (c));
-# elif CPUFAM_AMD64
- __asm__ ("pushq %%rbx; cpuid; movl %%ebx, %%esi; popq %%rbx"
- : "=a" (cc->a), "=S" (cc->b), "=c" (cc->c), "=d" (cc->d)
- : "a" (a) , "c" (c));
-# else
-# error "I'm confused."
-# endif
- dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x",
- a, c, cc->a, cc->b, cc->c, cc->d);
-#else
- dispatch_debug("GNU inline assembler not available; can't CPUID");
-#endif
+ else
+ dispatch_debug("CPUID(%08x, %08x) -> %08x, %08x, %08x, %08x",
+ a, c, cc->a, cc->b, cc->c, cc->d);
}
static unsigned cpuid_maxleaf(void)
static int xmm_registers_available_p(void)
{
-#ifdef __GNUC__
- unsigned f;
- /* This hack is by Agner Fog. Use FXSAVE/FXRSTOR to figure out whether the
- * XMM registers are actually alive.
- */
- if (!cpuid_features_p(CPUID1D_FXSR, 0)) return (0);
-# if CPUFAM_X86
- __asm__ ("movl %%esp, %%edx; subl $512, %%esp; andl $~15, %%esp\n"
- "fxsave (%%esp)\n"
- "movl 160(%%esp), %%eax; xorl $0xaaaa5555, 160(%%esp)\n"
- "fxrstor (%%esp); fxsave (%%esp)\n"
- "movl 160(%%esp), %%ecx; movl %%eax, 160(%%esp)\n"
- "fxrstor (%%esp); movl %%edx, %%esp\n"
- "xorl %%ecx, %%eax"
- : "=a" (f)
- : /* no inputs */
- : "%ecx", "%edx");
-# elif CPUFAM_AMD64
- __asm__ ("movq %%rsp, %%rdx; subq $512, %%rsp; andq $~15, %%rsp\n"
- "fxsave (%%rsp)\n"
- "movl 160(%%rsp), %%eax; xorl $0xaaaa5555, 160(%%rsp)\n"
- "fxrstor (%%rsp); fxsave (%%rsp)\n"
- "movl 160(%%rsp), %%ecx; movl %%eax, 160(%%rsp)\n"
- "fxrstor (%%rsp); movq %%rdx, %%rsp\n"
- "xorl %%ecx, %%eax"
- : "=a" (f)
- : /* no inputs */
- : "%ecx", "%rdx");
-# else
-# error "I'm confused."
-# endif
+ int f = dispatch_x86ish_xmmregisters_p();
+
dispatch_debug("XMM registers %savailable", f ? "" : "not ");
return (f);
-#else
- dispatch_debug("GNU inline assembler not available; can't check for XMM");
- return (0);
-#endif
}
/* --- @rdrand_works_p@ --- *
* that it's already been verified to be safe to issue.
*/
-#ifdef __GNUC__
-static int rdrand(unsigned *x)
-{
- int i, rc;
- unsigned _t;
-
- i = 16;
- __asm__ ("" : "=g" (_t));
- __asm__ ("0: rdrand %2; jc 1f; decl %1; jnz 0b\n"
- "mov $-1, %0; jmp 9f\n"
- "1: movl %2, (%3); xorl %0, %0\n"
- "9:"
- : "=r" (rc), "+r" (i), "+r" (_t)
- : "r" (x)
- : "cc");
- return (rc);
-}
-#endif
-
static int rdrand_works_p(void)
{
unsigned ref, x, i;
* will fail with probability %$2^{-128}$% with a truly random generator,
* which seems fair enough.
*/
- if (rdrand(&ref)) goto fail;
+ if (dispatch_x86ish_rdrand(&ref)) goto fail;
for (i = 0; i < 4; i++) {
- if (rdrand(&x)) goto fail;
+ if (dispatch_x86ish_rdrand(&x)) goto fail;
if (x != ref) goto not_stuck;
}
dispatch_debug("RDRAND always returns 0x%08x!", ref);
enum {
KSZ_ANY, /* Allows any key at all */
KSZ_RANGE, /* Allows keys within a range */
- KSZ_SET, /* Allows specific sizes of keys */
+ KSZ_SET /* Allows specific sizes of keys */
};
#define KSZ_16BIT 0x20 /* Arguments are 16 bits long */
AC_SUBST([CATACOMB_LIBS])
dnl Necessary support libraries.
-PKG_CHECK_MODULES([mLib], [mLib >= 2.3.0])
+PKG_CHECK_MODULES([mLib], [mLib >= 2.4.1])
AM_CFLAGS="$AM_CFLAGS $mLib_CFLAGS"
dnl--------------------------------------------------------------------------
*.debhelper
catacomb
catacomb-bin
+catacomb-data
catacomb-dev
catacomb2
cpu_feature_p@Base 2.2.3
dispatch_debug@Base 2.2.3
(optional|arch=i386 amd64)dispatch_x86ish_cpuid@Base 2.5.0
+ (optional|arch=i386 amd64)dispatch_x86ish_rdrand@Base 2.5.99~
(optional|arch=i386 amd64)dispatch_x86ish_xmmregisters_p@Base 2.5.0
## regdump (available with `--enable-asm-debug')
+catacomb (2.5.99~) experimental; urgency=medium
+
+ * (placeholder for next minor release)
+
+ -- Mark Wooding <mdw@distorted.org.uk> Mon, 30 Sep 2019 02:15:20 +0100
+
catacomb (2.5.2) experimental; urgency=medium
* Merge changes from 2.4.5.
Section: libs
Priority: extra
Build-Depends: debhelper (>= 10), python, valgrind [!armel], pkg-config,
- mlib-dev (>= 2.3.0)
+ mlib-dev (>= 2.4.1)
Maintainer: Mark Wooding <mdw@distorted.org.uk>
Standards-Version: 3.1.1
* and lower bounds are achievable.
*
* All of the x_i at this point are positive, so we don't need to do
- * anything wierd when masking them.
+ * anything weird when masking them.
*/
b = x9&B24; c = 19&((b >> 19) - (b >> 24)); x9 -= b << 1;
b = x8&B25; x9 += b >> 25; x8 -= b << 1;
void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
-/* --- @mpx_uaddnlsl@ --- *
+/* --- @mpx_usubnlsl@ --- *
*
* Arguments: @mpw *dv, *dvl@ = destination and first argument vector
* @mpw a@ = second argument
## Cryptographic laundering for true random data generation.
pkginclude_HEADERS += rand.h
librand_la_SOURCES += rand.c
+if CPUFAM_X86
+librand_la_SOURCES += rand-x86ish.S
+endif
+if CPUFAM_AMD64
+librand_la_SOURCES += rand-x86ish.S
+endif
librand_la_SOURCES += randgen.c
## The SSL v3 pseudorandom function.
#include <string.h>
#include <mLib/bits.h>
+#include <mLib/macros.h>
#include <mLib/sub.h>
#include "grand.h"
/* --- Now reduce mod p --- *
*
- * I'm using shifts and adds to do the multiply step here. This needs to
- * be changed if @D@ ever becomes something other than 5.
+ * I'm using shifts and adds to do the multiply step here.
*/
-#if D != 5
-# error "Change shift sequence!"
-#endif
-
{
+ STATIC_ASSERT(D == 5, "Shift sequence doesn't match prime");
uint32 q;
q = yy[1];
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Random-number support for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+
+ .extern F(rand_add)
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Quick random generation.
+
+FUNC(rand_quick_x86ish_rdrand)
+ // Enter with a pointer to the random context in the first argument.
+ // Return zero on success, or -1 on error.
+
+#if CPUFAM_X86
+ mov edx, [esp + 4]
+ stalloc 28
+# define COUNT ecx
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ stalloc 8
+# define COUNT ecx
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ stalloc 40
+# define COUNT r8d
+#endif
+ endprologue
+
+ // Try to fetch a random number.
+ mov COUNT, 16
+0: rdrand R_a(r)
+ jc 1f
+ dec COUNT
+ jnz 0b
+
+ // Failed.
+ mov eax, -1
+ jmp 9f
+
+ // Success.
+1:
+#if CPUFAM_X86
+ mov [esp + 16], eax
+ lea ecx, [esp + 16]
+ mov dword ptr [esp + 12], 32
+ mov dword ptr [esp + 8], 4
+ mov [esp + 4], ecx
+ mov [esp + 0], edx
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ mov [rsp + 0], rax
+ mov rsi, rsp
+ mov edx, 8
+ mov ecx, 64
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ mov [rsp + 32], rax
+ lea rdx, [rsp + 32]
+ mov r8d, 8
+ mov r9d, 64
+#endif
+ callext F(rand_add)
+ xor eax, eax
+
+ // Done.
+9:
+#if CPUFAM_X86
+ stfree 28
+#endif
+#if CPUFAM_AMD64 && ABI_SYSV
+ stfree 8
+#endif
+#if CPUFAM_AMD64 && ABI_WIN
+ stfree 40
+#endif
+ ret
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
static int trivial_quick(rand_pool *r) { return (-1); }
-#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64)
-static int rdrand_quick(rand_pool *r)
-{
- unsigned long rr;
- int i = 16;
-
- __asm__ ("0: rdrand %0; jc 9f; dec %1; jnz 0b; 9:"
- : "=r" (rr), "=r" (i) : "1" (i) : "cc");
- if (!i) return (-1);
- rand_add(r, &rr, sizeof(rr), 8*sizeof(rr));
- return (0);
-}
+#if CPUFAM_X86 || CPUFAM_AMD64
+extern int rand_quick_x86ish_rdrand(rand_pool */*r*/);
#endif
static quick__functype *pick_quick(void)
{
-#if __GNUC__ && (CPUFAM_X86 || CPUFAM_AMD64)
- DISPATCH_PICK_COND(rand_quick, rdrand_quick,
+#if CPUFAM_X86 || CPUFAM_AMD64
+ DISPATCH_PICK_COND(rand_quick, rand_quick_x86ish_rdrand,
cpu_feature_p(CPUFEAT_X86_RDRAND));
#endif
DISPATCH_PICK_FALLBACK(rand_quick, trivial_quick);
const octet *c = p;
int i, rot;
-#if RAND_POOLSZ != 128
-# error Polynomial in rand_add is out of date. Fix it.
-#endif
+ STATIC_ASSERT(RAND_POOLSZ == 128, "Polynomial doesn't match pool size");
RAND_RESOLVE(r);
* `keccak1600_round' below for the details.
*/
+#define COMPL_MASK 0x00121106u
+
#define STATE_INIT(z) do { \
lane cmpl = LANE_CMPL; \
(z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \
#else
/* A target with fused and/not (`bic', `andc2'). Everything is simple. */
+#define COMPL_MASK 0u
+
#define STATE_INIT(z) do ; while (0)
#define STATE_OUT(z) do ; while (0)
void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n)
{
+ uint32 m = COMPL_MASK;
unsigned i;
- keccak1600_state t;
+ lane t;
- t = *s; STATE_OUT(&t);
- for (i = 0; i < n; i++) p[i] = FROM_LANE(t.S[i]);
+ for (i = 0; i < n; i++) {
+ t = s->S[i]; if (m&1) NOT_LANE(t, t);
+ *p++ = FROM_LANE(t); m >>= 1;
+ }
}
/*----- Test rig ----------------------------------------------------------*/