X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/2b3f6527d522b647a6c8b5039228ebe569686c38..HEAD:/symm/salsa20.c diff --git a/symm/salsa20.c b/symm/salsa20.c index f424b746..f0fe3d7c 100644 --- a/symm/salsa20.c +++ b/symm/salsa20.c @@ -39,6 +39,7 @@ #include "grand.h" #include "keysz.h" #include "paranoia.h" +#include "rsvr.h" #include "salsa20.h" #include "salsa20-core.h" @@ -72,6 +73,7 @@ static void simple_core(unsigned r, const salsa20_matrix src, #if CPUFAM_X86 || CPUFAM_AMD64 extern core__functype salsa20_core_x86ish_sse2; +extern core__functype salsa20_core_x86ish_avx; #endif #if CPUFAM_ARMEL @@ -85,6 +87,8 @@ extern core__functype salsa20_core_arm64; static core__functype *pick_core(void) { #if CPUFAM_X86 || CPUFAM_AMD64 + DISPATCH_PICK_COND(salsa20_core, salsa20_core_x86ish_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); DISPATCH_PICK_COND(salsa20_core, salsa20_core_x86ish_sse2, cpu_feature_p(CPUFEAT_X86_SSE2)); #endif @@ -159,6 +163,8 @@ static void populate(salsa20_matrix a, const void *key, size_t ksz) /*----- Salsa20 implementation --------------------------------------------*/ +static const octet zerononce[XSALSA20_NONCESZ]; + /* --- @salsa20_init@ --- * * * Arguments: @salsa20_ctx *ctx@ = context to fill in @@ -174,8 +180,6 @@ static void populate(salsa20_matrix a, const void *key, size_t ksz) void salsa20_init(salsa20_ctx *ctx, const void *key, size_t ksz, const void *nonce) { - static const octet zerononce[SALSA20_NONCESZ]; - populate(ctx->a, key, ksz); salsa20_setnonce(ctx, nonce ? nonce : zerononce); } @@ -231,7 +235,7 @@ void salsa20_seek(salsa20_ctx *ctx, unsigned long i) void salsa20_seeku64(salsa20_ctx *ctx, kludge64 i) { ctx->a[8] = LO64(i); ctx->a[5] = HI64(i); - ctx->bufi = SALSA20_OUTSZ; + ctx->off = 0; } void salsa20_seek_ietf(salsa20_ctx *ctx, uint32 i) @@ -271,6 +275,8 @@ uint32 salsa20_tell_ietf(salsa20_ctx *ctx) * to @dest@. */ +static const rsvr_policy policy = { 0, SALSA20_OUTSZ, SALSA20_OUTSZ }; + #define SALSA20_ENCRYPT(r, ctx, src, dest, sz) \ SALSA20_DECOR(salsa20, r, _encrypt)(ctx, src, dest, sz) #define DEFENCRYPT(r) \ @@ -280,41 +286,40 @@ uint32 salsa20_tell_ietf(salsa20_ctx *ctx) salsa20_matrix b; \ const octet *s = src; \ octet *d = dest; \ - size_t n; \ + rsvr_plan plan; \ kludge64 pos, delta; \ \ - SALSA20_OUTBUF(ctx, d, s, sz); \ - if (!sz) return; \ - \ - if (!dest) { \ - n = sz/SALSA20_OUTSZ; \ - pos = salsa20_tellu64(ctx); \ - ASSIGN64(delta, n); \ - ADD64(pos, pos, delta); \ - salsa20_seeku64(ctx, pos); \ - sz = sz%SALSA20_OUTSZ; \ - } else if (!src) { \ - while (sz >= SALSA20_OUTSZ) { \ - core(r, ctx->a, b); \ - SALSA20_STEP(ctx->a); \ - SALSA20_GENFULL(b, d); \ - sz -= SALSA20_OUTSZ; \ + rsvr_mkplan(&plan, &policy, ctx->off, sz); \ + \ + if (plan.head) { \ + if (!ctx->off) { \ + core(r, ctx->a, b); SALSA20_STEP(ctx->a); \ + SALSA20_PREPBUF(ctx, b); \ } \ - } else { \ - while (sz >= SALSA20_OUTSZ) { \ - core(r, ctx->a, b); \ - SALSA20_STEP(ctx->a); \ - SALSA20_MIXFULL(b, d, s); \ - sz -= SALSA20_OUTSZ; \ + SALSA20_OUTBUF(ctx, d, s, plan.head); \ + } \ + \ + ctx->off -= plan.from_rsvr; \ + \ + if (!d) { \ + if (plan.from_input) { \ + pos = salsa20_tellu64(ctx); \ + ASSIGN64(delta, plan.from_input/SALSA20_OUTSZ); \ + ADD64(pos, pos, delta); \ + salsa20_seeku64(ctx, pos); \ } \ + } else if (!s) while (plan.from_input) { \ + core(r, ctx->a, b); SALSA20_STEP(ctx->a); \ + SALSA20_GENFULL(b, d); plan.from_input -= SALSA20_OUTSZ; \ + } else while (plan.from_input) { \ + core(r, ctx->a, b); SALSA20_STEP(ctx->a); \ + SALSA20_MIXFULL(b, d, s); plan.from_input -= SALSA20_OUTSZ; \ } \ \ - if (sz) { \ - core(r, ctx->a, b); \ - SALSA20_STEP(ctx->a); \ + if (plan.tail) { \ + core(r, ctx->a, b); SALSA20_STEP(ctx->a); \ SALSA20_PREPBUF(ctx, b); \ - SALSA20_OUTBUF(ctx, d, s, sz); \ - assert(!sz); \ + SALSA20_OUTBUF(ctx, d, s, plan.tail); \ } \ } SALSA20_VARS(DEFENCRYPT) @@ -419,8 +424,6 @@ SALSA20_VARS(DEFHSALSA20) void XSALSA20_INIT(r, XSALSA20_CTX(r) *ctx, \ const void *key, size_t ksz, const void *nonce) \ { \ - static const octet zerononce[XSALSA20_NONCESZ]; \ - \ populate(ctx->k, key, ksz); \ ctx->s.a[ 0] = SALSA20_A256; \ ctx->s.a[ 1] = SALSA20_B256; \ @@ -868,9 +871,14 @@ SALSA20_VARS(DEFXGRAND) #include #include +#include #include #include +#ifdef ENABLE_ASM_DEBUG +# include "regdump.h" +#endif + static const int perm[] = { 0, 13, 10, 7, 4, 1, 14, 11, @@ -897,7 +905,7 @@ static const int perm[] = { } \ for (i = 0; i < SALSA20_OUTSZ/4; i++) STORE32_L(d.buf + 4*i, b[i]); \ \ - if (d.len != v[2].len || memcmp(d.buf, v[2].buf, v[2].len) != 0) { \ + if (d.len != v[2].len || MEMCMP(d.buf, !=, v[2].buf, v[2].len)) { \ ok = 0; \ printf("\nfail core:" \ "\n\titerations = %d" \ @@ -967,7 +975,7 @@ SALSA20_VARS(DEFVCORE) } \ if (sz) BASE##_ENCRYPT(r, &ctx, p, q, sz); \ \ - if (d.len != v[5].len || memcmp(d.buf, v[5].buf, v[5].len) != 0) { \ + if (d.len != v[5].len || MEMCMP(d.buf, !=, v[5].buf, v[5].len)) { \ ok = 0; \ printf("\nfail encrypt:" \ "\n\tstep = %lu" \ @@ -1013,6 +1021,9 @@ SALSA20_VARS(DEFXTAB) int main(int argc, char *argv[]) { +#ifdef ENABLE_ASM_DEBUG + regdump_init(); +#endif test_run(argc, argv, defs, SRCDIR"/t/salsa20"); return (0); }