AC_INIT([catacomb], AUTO_VERSION, [mdw@distorted.org.uk])
AC_CONFIG_SRCDIR([catacomb.pc.in])
AC_CONFIG_AUX_DIR([config])
-AM_INIT_AUTOMAKE([foreign parallel-tests])
+AM_INIT_AUTOMAKE([foreign parallel-tests color-tests subdir-objects])
+ AC_CANONICAL_HOST
mdw_SILENT_RULES
AC_PROG_CC
AM_PROG_LIBTOOL
mdw_LIBTOOL_VERSION_INFO
+ AM_PROG_AS
+
AC_PROG_YACC
AC_SUBST(AM_CFLAGS)
dnl--------------------------------------------------------------------------
+ dnl Host-specific configuration.
+
+ AC_MSG_CHECKING([CPU family and ABI])
+
+ dnl The table of CPU families and ABIs which we might support. Support is
+ dnl not uniform: each dispatched function might or might not have an
+ dnl implementation for any particular CPU/ABI combination.
+ AC_DEFUN([catacomb_CPU_FAMILIES],
+ [$1([i[[3-6]]86,cygwin], [x86], [win])
+ $1([i[[3-6]]86,*], [x86], [sysv])
+ $1([x86_64,cygwin], [amd64], [win])
+ $1([x86_64,*], [amd64], [sysv])
+ $1([armv*,*-gnueabi | armv*,*-gnueabihf], [armel], [gnueabi])])
+
+ dnl A utility to clear the `seen' flags, used so as to process each CPU or
+ dnl ABI once.
+ m4_define([catacomb_CLEAR_FLAGS],
+ [m4_ifdef([catacomb_seen_cpu/$2],
+ [m4_undefine([catacomb_seen_cpu/$2])])dnl
+ m4_ifdef([catacomb_seen_abi/$3],
+ [m4_undefine([catacomb_seen_abi/$3])])])
+
+ dnl Identify the current host.
+ case $host_cpu,$host_os in
+ m4_define([catacomb_CPU_CASE],
+ [$1) CPUFAM=$2 ABI=$3 ;;
+ ])
+ catacomb_CPU_FAMILIES([catacomb_CPU_CASE])
+ *) CPUFAM=nil ABI=nil ;;
+ esac
+
+ dnl Figure out the current CPU.
+ catacomb_CPU_FAMILIES([catacomb_CLEAR_FLAGS])
+ case $CPUFAM in
+ m4_define([catacomb_DEFINE_CPU],
+ [m4_ifdef([catacomb_seen_cpu/$2], [],
+ [$2)
+ AC_DEFINE([CPUFAM_]m4_translit([$2], [a-z], [A-Z]), [1],
+ [Define if host CPU family is \`$2\'.])
+ ;;m4_define([catacomb_seen_cpu/$2], [t])])])
+ catacomb_CPU_FAMILIES([catacomb_DEFINE_CPU])
+ nil) ;;
+ *) AC_MSG_ERROR([BUG: unexpected cpufam \`$CPUFAM']) ;;
+ esac
+ AC_SUBST([CPUFAM])
+
+ dnl Figure out the current ABI.
+ catacomb_CPU_FAMILIES([catacomb_CLEAR_FLAGS])
+ case $ABI in
+ m4_define([catacomb_DEFINE_ABI],
+ [m4_ifdef([catacomb_seen_abi/$3], [],
+ [$3)
+ AC_DEFINE([ABI_]m4_translit([$3], [a-z], [A-Z]), [1],
+ [Define if host ABI variant is \`$3\'.])
+ ;;m4_define([catacomb_seen_abi/$3], [t])])])
+ catacomb_CPU_FAMILIES([catacomb_DEFINE_ABI])
+ nil) ;;
+ *) AC_MSG_ERROR([BUG: unexpected ABI \`$ABI']) ;;
+ esac
+ AC_SUBST([ABI])
+
+ dnl Establish Automake conditions for things.
+ catacomb_CPU_FAMILIES([catacomb_CLEAR_FLAGS])
+ m4_define([catacomb_COND_CPU],
+ [m4_define([_CPU], m4_translit([$2], [a-z], [A-Z]))
+ m4_define([_ABI], m4_translit([$3], [a-z], [A-Z]))
+ AM_CONDITIONAL([CPUABI_]_CPU[_]_ABI, [test x$CPUFAM/$ABI = x$2/$3])
+ m4_ifdef([catacomb_seen_cpu/$2], [],
+ [AM_CONDITIONAL([CPUFAM_]_CPU, [test x$CPUFAM = x$2])dnl
+ m4_define([catacomb_seen_cpu/$2], [t])])
+ m4_ifdef([catacomb_seen_abi/$3], [],
+ [AM_CONDITIONAL([ABI_]_ABI, [test x$ABI = x$3])dnl
+ m4_define([catacomb_seen_abi/$3], [t])])])
+ catacomb_CPU_FAMILIES([catacomb_COND_CPU])
+ AM_CONDITIONAL([KNOWN_CPUFAM], [test x$CPUFAM != xnil])
+
+ dnl Report on what we found.
+ case $CPUFAM in
+ nil) AC_MSG_RESULT([not supported]) ;;
+ *) AC_MSG_RESULT([$CPUFAM/$ABI]) ;;
+ esac
+
+ dnl Some equipment wanted for checking CPU features at runtime.
+ AC_CHECK_HEADERS([asm/hwcap.h])
+ AC_CHECK_HEADERS([sys/auxv.h])
+ AC_CHECK_HEADERS([linux/auxvec.h])
+ AC_CHECK_FUNCS([getauxval])
+
+ dnl--------------------------------------------------------------------------
dnl C programming environment.
dnl Find out if we're cross-compiling.
AC_CHECK_FUNCS([mlock])
dnl Necessary support libraries.
-PKG_CHECK_MODULES([mLib], [mLib >= 2.2.1])
+PKG_CHECK_MODULES([mLib], [mLib >= 2.2.2.1])
AM_CFLAGS="$AM_CFLAGS $mLib_CFLAGS"
dnl--------------------------------------------------------------------------
## Schneier's `Blowfish' block cipher.
BLKCS += blowfish
-libsymm_la_SOURCES += $(precomp)/blowfish-tab.c
-PRECOMPS += $(precomp)/blowfish-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/blowfish-tab.c
+PRECOMPS += $(precomp)/symm/blowfish-tab.c
PRECOMP_PROGS += blowfish-mktab
blowfish_mktab_CPPFLAGS = $(AM_CPPFLAGS) -DQUIET
if !CROSS_COMPILING
-$(precomp)/blowfish-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) blowfish-mktab$e
- $(AM_V_GEN)./blowfish-mktab >$(precomp)/blowfish-tab.c.new && \
- mv $(precomp)/blowfish-tab.c.new $(precomp)/blowfish-tab.c
+$(precomp)/symm/blowfish-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) blowfish-mktab$(EXEEXT)
+ $(AM_V_GEN)./blowfish-mktab >$(precomp)/symm/blowfish-tab.c.new && \
+ mv $(precomp)/symm/blowfish-tab.c.new \
+ $(precomp)/symm/blowfish-tab.c
endif
## Adams and Tavares' `CAST' block ciphers.
BLKCS += cast128 cast256
libsymm_la_SOURCES += cast-s.c cast-sk.c cast-base.h
-cast256.$t: t/cast256
+cast256.t$(EXEEXT): t/cast256
EXTRA_DIST += t/cast256.aes
MAINTAINERCLEANFILES += $(srcdir)/t/cast256
t/cast256: t/cast256.aes
## IBM's `DES' block cipher, by Feistel, Coppersmith, and others.
BLKCS += des des3
-libsymm_la_SOURCES += des-base.h $(precomp)/des-tab.c
-PRECOMPS += $(precomp)/des-tab.c
+libsymm_la_SOURCES += des-base.h
+nodist_libsymm_la_SOURCES += ../precomp/symm/des-tab.c
+PRECOMPS += $(precomp)/symm/des-tab.c
PRECOMP_PROGS += des-mktab
if !CROSS_COMPILING
-$(precomp)/des-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) des-mktab$e
- $(AM_V_GEN)./des-mktab >$(precomp)/des-tab.c.new && \
- mv $(precomp)/des-tab.c.new $(precomp)/des-tab.c
+$(precomp)/symm/des-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) des-mktab$(EXEEXT)
+ $(AM_V_GEN)./des-mktab >$(precomp)/symm/des-tab.c.new && \
+ mv $(precomp)/symm/des-tab.c.new $(precomp)/symm/des-tab.c
endif
## Rivest's `DESX' variant, with pre- and post-whitening.
## IBM's `MARS' block cipher.
BLKCS += mars
-libsymm_la_SOURCES += $(precomp)/mars-tab.c
-PRECOMPS += $(precomp)/mars-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/mars-tab.c
+PRECOMPS += $(precomp)/symm/mars-tab.c
PRECOMP_PROGS += mars-mktab
if !CROSS_COMPILING
-$(precomp)/mars-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) mars-mktab$e
- $(AM_V_GEN)./mars-mktab >$(precomp)/mars-tab.c.new && \
- mv $(precomp)/mars-tab.c.new $(precomp)/mars-tab.c
+$(precomp)/symm/mars-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) mars-mktab$(EXEEXT)
+ $(AM_V_GEN)./mars-mktab >$(precomp)/symm/mars-tab.c.new && \
+ mv $(precomp)/symm/mars-tab.c.new $(precomp)/symm/mars-tab.c
endif
-mars.$t: t/mars
+mars.t$(EXEEXT): t/mars
EXTRA_DIST += t/mars.aes
MAINTAINERCLEANFILES += $(srcdir)/t/mars
t/mars: t/mars.aes
## Daemen and Rijmen's `Rijndael' block cipher, selected as AES.
BLKCS += rijndael rijndael192 rijndael256
libsymm_la_SOURCES += rijndael-base.h rijndael-base.c
-libsymm_la_SOURCES += $(precomp)/rijndael-tab.c
-PRECOMPS += $(precomp)/rijndael-tab.c
+ if CPUFAM_X86
+ libsymm_la_SOURCES += rijndael-x86ish-aesni.S
+ endif
+ if CPUFAM_AMD64
+ libsymm_la_SOURCES += rijndael-x86ish-aesni.S
+ endif
+nodist_libsymm_la_SOURCES += ../precomp/symm/rijndael-tab.c
+PRECOMPS += $(precomp)/symm/rijndael-tab.c
PRECOMP_PROGS += rijndael-mktab
if !CROSS_COMPILING
-$(precomp)/rijndael-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) rijndael-mktab$e
- $(AM_V_GEN)./rijndael-mktab >$(precomp)/rijndael-tab.c.new && \
- mv $(precomp)/rijndael-tab.c.new $(precomp)/rijndael-tab.c
+$(precomp)/symm/rijndael-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) rijndael-mktab$(EXEEXT)
+ $(AM_V_GEN)./rijndael-mktab >$(precomp)/symm/rijndael-tab.c.new && \
+ mv $(precomp)/symm/rijndael-tab.c.new \
+ $(precomp)/symm/rijndael-tab.c
endif
-rijndael.$t: t/rijndael
+rijndael.t$(EXEEXT): t/rijndael
EXTRA_DIST += t/rijndael.aes
MAINTAINERCLEANFILES += $(srcdir)/t/rijndael
t/rijndael: t/rijndael.aes
## Massey's `SAFER' block ciphers.
BLKCS += safer safersk
-libsymm_la_SOURCES += $(precomp)/safer-tab.c
-PRECOMPS += $(precomp)/safer-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/safer-tab.c
+PRECOMPS += $(precomp)/symm/safer-tab.c
PRECOMP_PROGS += safer-mktab
STUBS_HDR += SAFER-SK,safersk,safer
if !CROSS_COMPILING
-$(precomp)/safer-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) safer-mktab$e
- $(AM_V_GEN)./safer-mktab >$(precomp)/safer-tab.c.new && \
- mv $(precomp)/safer-tab.c.new $(precomp)/safer-tab.c
+$(precomp)/symm/safer-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) safer-mktab$(EXEEXT)
+ $(AM_V_GEN)./safer-mktab >$(precomp)/symm/safer-tab.c.new && \
+ mv $(precomp)/symm/safer-tab.c.new \
+ $(precomp)/symm/safer-tab.c
endif
## Anderson, Biham and Knudsen's `Serpent' block cipher.
libsymm_la_SOURCES += serpent-sbox.h
check_PROGRAMS += serpent-check
TESTS += serpent-check
-serpent.$t: t/serpent
+serpent.t$(EXEEXT): t/serpent
EXTRA_DIST += t/serpent.aes
MAINTAINERCLEANFILES += $(srcdir)/t/serpent
t/serpent: t/serpent.aes
## Daemen and Rijmen's `Square' block cipher.
BLKCS += square
-libsymm_la_SOURCES += $(precomp)/square-tab.c
-PRECOMPS += $(precomp)/square-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/square-tab.c
+PRECOMPS += $(precomp)/symm/square-tab.c
PRECOMP_PROGS += square-mktab
if !CROSS_COMPILING
-$(precomp)/square-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) square-mktab$e
- $(AM_V_GEN)./square-mktab >$(precomp)/square-tab.c.new && \
- mv $(precomp)/square-tab.c.new $(precomp)/square-tab.c
+$(precomp)/symm/square-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) square-mktab$(EXEEXT)
+ $(AM_V_GEN)./square-mktab >$(precomp)/symm/square-tab.c.new && \
+ mv $(precomp)/symm/square-tab.c.new \
+ $(precomp)/symm/square-tab.c
endif
## Wheeler and Needham's `TEA' and `XTEA' block ciphers.
## Schneier, Kelsey, Whiting, Wagner, Hall and Ferguson's `Twofish' block
## cipher.
BLKCS += twofish
-libsymm_la_SOURCES += $(precomp)/twofish-tab.c
-PRECOMPS += $(precomp)/twofish-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/twofish-tab.c
+PRECOMPS += $(precomp)/symm/twofish-tab.c
PRECOMP_PROGS += twofish-mktab
if !CROSS_COMPILING
-$(precomp)/twofish-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) twofish-mktab$e
- $(AM_V_GEN)./twofish-mktab >$(precomp)/twofish-tab.c.new && \
- mv $(precomp)/twofish-tab.c.new $(precomp)/twofish-tab.c
+$(precomp)/symm/twofish-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) twofish-mktab$(EXEEXT)
+ $(AM_V_GEN)./twofish-mktab >$(precomp)/symm/twofish-tab.c.new && \
+ mv $(precomp)/symm/twofish-tab.c.new \
+ $(precomp)/symm/twofish-tab.c
endif
-twofish.$t: t/twofish
+twofish.t$(EXEEXT): t/twofish
EXTRA_DIST += t/twofish.aes
MAINTAINERCLEANFILES += $(srcdir)/t/twofish
t/twofish: t/twofish.aes
## Anderson and Biham's `Tiger' hash function.
HASHES += tiger
-libsymm_la_SOURCES += tiger-base.h $(precomp)/tiger-tab.c
-PRECOMPS += $(precomp)/tiger-tab.c
+libsymm_la_SOURCES += tiger-base.h
+nodist_libsymm_la_SOURCES += ../precomp/symm/tiger-tab.c
+PRECOMPS += $(precomp)/symm/tiger-tab.c
PRECOMP_PROGS += tiger-mktab
if !CROSS_COMPILING
-$(precomp)/tiger-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) tiger-mktab$e
- $(AM_V_GEN)./tiger-mktab >$(precomp)/tiger-tab.c.new && \
- mv $(precomp)/tiger-tab.c.new $(precomp)/tiger-tab.c
+$(precomp)/symm/tiger-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) tiger-mktab$(EXEEXT)
+ $(AM_V_GEN)./tiger-mktab >$(precomp)/symm/tiger-tab.c.new && \
+ mv $(precomp)/symm/tiger-tab.c.new \
+ $(precomp)/symm/tiger-tab.c
endif
## Barreto and Rijmen's `Whirlpool' hash function.
HASHES += whirlpool whirlpool256
-libsymm_la_SOURCES += $(precomp)/whirlpool-tab.c
-PRECOMPS += $(precomp)/whirlpool-tab.c
+nodist_libsymm_la_SOURCES += ../precomp/symm/whirlpool-tab.c
+PRECOMPS += $(precomp)/symm/whirlpool-tab.c
PRECOMP_PROGS += whirlpool-mktab
STUBS_HDR += Whirlpool-256,whirlpool256,whirlpool
if !CROSS_COMPILING
-$(precomp)/whirlpool-tab.c:
- $(AM_V_at)$(MKDIR_P) $(precomp)
- $(AM_V_at)$(MAKE) whirlpool-mktab$e
- $(AM_V_GEN)./whirlpool-mktab >$(precomp)/whirlpool-tab.c.new && \
- mv $(precomp)/whirlpool-tab.c.new $(precomp)/whirlpool-tab.c
+$(precomp)/symm/whirlpool-tab.c:
+ $(AM_V_at)$(MKDIR_P) $(precomp)/symm
+ $(AM_V_at)$(MAKE) whirlpool-mktab$(EXEEXT)
+ $(AM_V_GEN)./whirlpool-mktab \
+ >$(precomp)/symm/whirlpool-tab.c.new && \
+ mv $(precomp)/symm/whirlpool-tab.c.new \
+ $(precomp)/symm/whirlpool-tab.c
endif
## Bellare, Canetti and Krawczyk's `HMAC' mode for message authentication.
## Rivest's `RC4' stream cipher.
pkginclude_HEADERS += rc4.h
libsymm_la_SOURCES += rc4.c
-TESTS += rc4.$t
+TESTS += rc4.t$(EXEEXT)
EXTRA_DIST += t/rc4
ALL_CIPHERS += rc4
## Coppersmith and Rogaway's `SEAL' pseudorandom function.
pkginclude_HEADERS += seal.h
libsymm_la_SOURCES += seal.c
-TESTS += seal.$t
+TESTS += seal.t$(EXEEXT)
EXTRA_DIST += t/seal
ALL_CIPHERS += seal
EXTRA_DIST += salsa20-tvconv
pkginclude_HEADERS += salsa20.h salsa20-core.h
libsymm_la_SOURCES += salsa20.c
-TESTS += salsa20.$t
+ if CPUFAM_X86
+ libsymm_la_SOURCES += salsa20-x86ish-sse2.S
+ endif
+ if CPUFAM_AMD64
+ libsymm_la_SOURCES += salsa20-x86ish-sse2.S
+ endif
+TESTS += salsa20.t$(EXEEXT)
ALL_CIPHERS += salsa20 salsa2012 salsa208
ALL_CIPHERS += xsalsa20 xsalsa2012 xsalsa208
STUBS_HDR += Salsa20/12,salsa2012,salsa20
STUBS_HDR += XSalsa20/8,xsalsa208,salsa20
EXTRA_DIST += t/salsa20
MAINTAINERCLEANFILES += t/salsa20
-salsa20.$t: t/salsa20
+salsa20.t$(EXEEXT): t/salsa20
SALSA20_ESTREAM_TV = t/salsa20.estream
SALSA20_ESTREAM_TV += t/salsa2012.estream
SALSA20_ESTREAM_TV += t/salsa208.estream
## Bernstein's `ChaCha' stream cipher.
pkginclude_HEADERS += chacha.h chacha-core.h
libsymm_la_SOURCES += chacha.c
-TESTS += chacha.$t
+ if CPUFAM_X86
+ libsymm_la_SOURCES += chacha-x86ish-sse2.S
+ endif
+ if CPUFAM_AMD64
+ libsymm_la_SOURCES += chacha-x86ish-sse2.S
+ endif
+TESTS += chacha.t$(EXEEXT)
EXTRA_DIST += t/chacha
ALL_CIPHERS += chacha20 chacha12 chacha8
ALL_CIPHERS += xchacha20 xchacha12 xchacha8
EXTRA_DIST += daftstory.h
## Clean the debris from the `modes' subdirectory.
-CLEANFILES += modes/*.to modes/*.$t
+CLEANFILES += modes/*.to modes/*.t$(EXEEXT)
###----- That's all, folks --------------------------------------------------
/*----- Header files ------------------------------------------------------*/
+ #include "config.h"
+
#include <stdarg.h>
#include <mLib/bits.h>
#include "arena.h"
#include "chacha.h"
#include "chacha-core.h"
+ #include "dispatch.h"
#include "gcipher.h"
#include "grand.h"
#include "keysz.h"
* the feedforward step.
*/
- static void core(unsigned r, const chacha_matrix src, chacha_matrix dest)
+ CPU_DISPATCH(static, (void),
+ void, core, (unsigned r, const chacha_matrix src,
+ chacha_matrix dest),
+ (r, src, dest),
+ pick_core, simple_core);
+
+ static void simple_core(unsigned r, const chacha_matrix src,
+ chacha_matrix dest)
{ CHACHA_nR(dest, src, r); CHACHA_FFWD(dest, src); }
+ #if CPUFAM_X86 || CPUFAM_AMD64
+ extern core__functype chacha_core_x86ish_sse2;
+ #endif
+
+ static core__functype *pick_core(void)
+ {
+ #if CPUFAM_X86 || CPUFAM_AMD64
+ DISPATCH_PICK_COND(chacha_core, chacha_core_x86ish_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+ #endif
+ DISPATCH_PICK_FALLBACK(chacha_core, simple_core);
+ }
+
/* --- @populate@ --- *
*
* Arguments: @chacha_matrix a@ = a matrix to fill in
static const grand_ops grops_rand_##rr = { \
"chacha" #rr, GRAND_CRYPTO, 0, \
grmisc, grdestroy, grword, \
- grbyte, grword, grand_range, grfill \
+ grbyte, grword, grand_defaultrange, grfill \
}; \
\
grand *chacha##rr##_rand(const void *k, size_t ksz, const void *n) \
static const grand_ops grxops_rand_##rr = { \
"xchacha" #rr, GRAND_CRYPTO, 0, \
grmisc, grxdestroy_##rr, grword, \
- grbyte, grword, grand_range, grfill \
+ grbyte, grword, grand_defaultrange, grfill \
}; \
\
grand *xchacha##rr##_rand(const void *k, size_t ksz, const void *n) \
/*----- Header files ------------------------------------------------------*/
+ #include "config.h"
+
#include <stdarg.h>
#include <mLib/bits.h>
#include "arena.h"
+ #include "dispatch.h"
#include "gcipher.h"
#include "grand.h"
#include "keysz.h"
* the feedforward step.
*/
- static void core(unsigned r, const salsa20_matrix src, salsa20_matrix dest)
+ CPU_DISPATCH(static, (void),
+ void, core, (unsigned r, const salsa20_matrix src,
+ salsa20_matrix dest),
+ (r, src, dest),
+ pick_core, simple_core);
+
+ static void simple_core(unsigned r, const salsa20_matrix src,
+ salsa20_matrix dest)
{ SALSA20_nR(dest, src, r); SALSA20_FFWD(dest, src); }
+ #if CPUFAM_X86 || CPUFAM_AMD64
+ extern core__functype salsa20_core_x86ish_sse2;
+ #endif
+
+ static core__functype *pick_core(void)
+ {
+ #if CPUFAM_X86 || CPUFAM_AMD64
+ DISPATCH_PICK_COND(salsa20_core, salsa20_core_x86ish_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+ #endif
+ DISPATCH_PICK_FALLBACK(salsa20_core, simple_core);
+ }
+
/* --- @populate@ --- *
*
* Arguments: @salsa20_matrix a@ = a matrix to fill in
KSZ_ASSERT(salsa20, ksz);
- a[ 1] = LOAD32_L(k + 0);
- a[ 2] = LOAD32_L(k + 4);
+ /* Here's the pattern of key, constant, nonce, and counter pieces in the
+ * matrix, before and after our permutation.
+ *
+ * [ C0 K0 K1 K2 ] [ C0 C1 C2 C3 ]
+ * [ K3 C1 N0 N1 ] --> [ K3 T1 K7 K2 ]
+ * [ T0 T1 C2 K4 ] [ T0 K6 K1 N1 ]
+ * [ K5 K6 K7 C3 ] [ K5 K0 N0 K4 ]
+ */
+
+ a[13] = LOAD32_L(k + 0);
+ a[10] = LOAD32_L(k + 4);
if (ksz == 10) {
- a[ 3] = LOAD16_L(k + 8);
+ a[ 7] = LOAD16_L(k + 8);
a[ 4] = 0;
} else {
- a[ 3] = LOAD32_L(k + 8);
+ a[ 7] = LOAD32_L(k + 8);
a[ 4] = LOAD32_L(k + 12);
}
if (ksz <= 16) {
- a[11] = a[ 1];
- a[12] = a[ 2];
- a[13] = a[ 3];
- a[14] = a[ 4];
+ a[15] = a[13];
+ a[12] = a[10];
+ a[ 9] = a[ 7];
+ a[ 6] = a[ 4];
a[ 0] = SALSA20_A128;
- a[ 5] = SALSA20_B128;
- a[10] = ksz == 10 ? SALSA20_C80 : SALSA20_C128;
- a[15] = SALSA20_D128;
+ a[ 1] = SALSA20_B128;
+ a[ 2] = ksz == 10 ? SALSA20_C80 : SALSA20_C128;
+ a[ 3] = SALSA20_D128;
} else {
- a[11] = LOAD32_L(k + 16);
+ a[15] = LOAD32_L(k + 16);
a[12] = LOAD32_L(k + 20);
- a[13] = LOAD32_L(k + 24);
- a[14] = LOAD32_L(k + 28);
+ a[ 9] = LOAD32_L(k + 24);
+ a[ 6] = LOAD32_L(k + 28);
a[ 0] = SALSA20_A256;
- a[ 5] = SALSA20_B256;
- a[10] = SALSA20_C256;
- a[15] = SALSA20_D256;
+ a[ 1] = SALSA20_B256;
+ a[ 2] = SALSA20_C256;
+ a[ 3] = SALSA20_D256;
}
}
{
const octet *n = nonce;
- ctx->a[6] = LOAD32_L(n + 0);
- ctx->a[7] = LOAD32_L(n + 4);
+ ctx->a[14] = LOAD32_L(n + 0);
+ ctx->a[11] = LOAD32_L(n + 4);
salsa20_seek(ctx, 0);
}
void salsa20_seeku64(salsa20_ctx *ctx, kludge64 i)
{
- ctx->a[8] = LO64(i); ctx->a[9] = HI64(i);
+ ctx->a[8] = LO64(i); ctx->a[5] = HI64(i);
ctx->bufi = SALSA20_OUTSZ;
}
{ kludge64 i = salsa20_tellu64(ctx); return (GET64(unsigned long, i)); }
kludge64 salsa20_tellu64(salsa20_ctx *ctx)
- { kludge64 i; SET64(i, ctx->a[9], ctx->a[8]); return (i); }
+ { kludge64 i; SET64(i, ctx->a[5], ctx->a[8]); return (i); }
/* --- @salsa20{,12,8}_encrypt@ --- *
*
* speed critical, so we do it the harder way. \
*/ \
\
- for (i = 0; i < 4; i++) k[i + 6] = src[i]; \
+ for (i = 0; i < 4; i++) k[14 - 3*i] = src[i]; \
core(r, k, a); \
- for (i = 0; i < 4; i++) dest[i] = a[5*i] - k[5*i]; \
- for (i = 4; i < 8; i++) dest[i] = a[i + 2] - k[i + 2]; \
+ for (i = 0; i < 4; i++) dest[i] = a[5*i] - k[i]; \
+ for (i = 4; i < 8; i++) dest[i] = a[i + 2] - k[26 - 3*i]; \
} \
\
void HSALSA20_PRF(r, salsa20_ctx *ctx, const void *src, void *dest) \
\
populate(ctx->k, key, ksz); \
ctx->s.a[ 0] = SALSA20_A256; \
- ctx->s.a[ 5] = SALSA20_B256; \
- ctx->s.a[10] = SALSA20_C256; \
- ctx->s.a[15] = SALSA20_D256; \
+ ctx->s.a[ 1] = SALSA20_B256; \
+ ctx->s.a[ 2] = SALSA20_C256; \
+ ctx->s.a[ 3] = SALSA20_D256; \
XSALSA20_SETNONCE(r, ctx, nonce ? nonce : zerononce); \
}
SALSA20_VARS(DEFXINIT)
\
for (i = 0; i < 4; i++) in[i] = LOAD32_L(n + 4*i); \
HSALSA20_RAW(r, ctx->k, in, out); \
- for (i = 0; i < 4; i++) ctx->s.a[i + 1] = out[i]; \
- for (i = 4; i < 8; i++) ctx->s.a[i + 7] = out[i]; \
+ for (i = 0; i < 4; i++) ctx->s.a[13 - 3*i] = out[i]; \
+ for (i = 4; i < 8; i++) ctx->s.a[27 - 3*i] = out[i]; \
salsa20_setnonce(&ctx->s, n + 16); \
}
SALSA20_VARS(DEFXNONCE)
static const grand_ops grops_rand_##rr = { \
SALSA20_NAME_##rr, GRAND_CRYPTO, 0, \
grmisc, grdestroy, grword, \
- grbyte, grword, grand_range, grfill \
+ grbyte, grword, grand_defaultrange, grfill \
}; \
\
grand *SALSA20_DECOR(salsa20, rr, _rand) \
static const grand_ops grxops_rand_##rr = { \
"x" SALSA20_NAME_##rr, GRAND_CRYPTO, 0, \
grmisc, grxdestroy_##rr, grword, \
- grbyte, grword, grand_range, grfill \
+ grbyte, grword, grand_defaultrange, grfill \
}; \
\
grand *SALSA20_DECOR(xsalsa20, rr, _rand) \
#include <mLib/quis.h>
#include <mLib/testrig.h>
+ static const int perm[] = {
+ 0, 13, 10, 7,
+ 4, 1, 14, 11,
+ 8, 5, 2, 15,
+ 12, 9, 6, 3
+ };
+
#define DEFVCORE(r) \
static int v_core_##r(dstr *v) \
{ \
salsa20_matrix a, b; \
dstr d = DSTR_INIT; \
- int i, n; \
+ int i, j, n; \
int ok = 1; \
\
DENSURE(&d, SALSA20_OUTSZ); d.len = SALSA20_OUTSZ; \
n = *(int *)v[0].buf; \
for (i = 0; i < SALSA20_OUTSZ/4; i++) \
- a[i] = LOAD32_L(v[1].buf + 4*i); \
+ b[i] = LOAD32_L(v[1].buf + 4*i); \
for (i = 0; i < n; i++) { \
+ for (j = 0; j < 16; j++) a[perm[j]] = b[j]; \
core(r, a, b); \
memcpy(a, b, sizeof(a)); \
} \
- for (i = 0; i < SALSA20_OUTSZ/4; i++) STORE32_L(d.buf + 4*i, a[i]); \
+ for (i = 0; i < SALSA20_OUTSZ/4; i++) STORE32_L(d.buf + 4*i, b[i]); \
\
if (d.len != v[2].len || memcmp(d.buf, v[2].buf, v[2].len) != 0) { \
ok = 0; \