X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/a905c0d66f0cb9d58a897a27233be1881c246a0d..10f61ef8a2deafb3b801786ad37338c410ed21d7:/symm/keccak1600.c diff --git a/symm/keccak1600.c b/symm/keccak1600.c index e57aaa81..c2d82926 100644 --- a/symm/keccak1600.c +++ b/symm/keccak1600.c @@ -221,6 +221,8 @@ static const lane rcon[24] = { * `keccak1600_round' below for the details. */ +#define COMPL_MASK 0x00121106u + #define STATE_INIT(z) do { \ lane cmpl = LANE_CMPL; \ (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \ @@ -240,6 +242,8 @@ static const lane rcon[24] = { #else /* A target with fused and/not (`bic', `andc2'). Everything is simple. */ +#define COMPL_MASK 0u + #define STATE_INIT(z) do ; while (0) #define STATE_OUT(z) do ; while (0) @@ -338,25 +342,25 @@ static void keccak1600_round(keccak1600_state *z, * result to Z. */ - lane b[5], c[5], d[5], t; + lane c[5], d[5], t; /* Theta, first step: calculate the column parities. */ #define COLPARITY(j) do { \ - c[j] = x->S[I(j, 0)]; \ - XOR_LANE(c[j], c[j], x->S[I(j, 1)]); \ - XOR_LANE(c[j], c[j], x->S[I(j, 2)]); \ - XOR_LANE(c[j], c[j], x->S[I(j, 3)]); \ - XOR_LANE(c[j], c[j], x->S[I(j, 4)]); \ + d[j] = x->S[I(j, 0)]; \ + XOR_LANE(d[j], d[j], x->S[I(j, 1)]); \ + XOR_LANE(d[j], d[j], x->S[I(j, 2)]); \ + XOR_LANE(d[j], d[j], x->S[I(j, 3)]); \ + XOR_LANE(d[j], d[j], x->S[I(j, 4)]); \ } while (0) COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4); #undef COLPARITY /* Theta, second step: calculate the combined effect. */ - ROTL_LANE(d[0], c[1], 1); XOR_LANE(d[0], d[0], c[4]); - ROTL_LANE(d[1], c[2], 1); XOR_LANE(d[1], d[1], c[0]); - ROTL_LANE(d[2], c[3], 1); XOR_LANE(d[2], d[2], c[1]); - ROTL_LANE(d[3], c[4], 1); XOR_LANE(d[3], d[3], c[2]); - ROTL_LANE(d[4], c[0], 1); XOR_LANE(d[4], d[4], c[3]); + ROTL_LANE(c[0], d[1], 1); XOR_LANE(c[0], c[0], d[4]); + ROTL_LANE(c[1], d[2], 1); XOR_LANE(c[1], c[1], d[0]); + ROTL_LANE(c[2], d[3], 1); XOR_LANE(c[2], c[2], d[1]); + ROTL_LANE(c[3], d[4], 1); XOR_LANE(c[3], c[3], d[2]); + ROTL_LANE(c[4], d[0], 1); XOR_LANE(c[4], c[4], d[3]); /* Now we work plane by plane through the output. To do this, we must undo * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and @@ -365,18 +369,18 @@ static void keccak1600_round(keccak1600_state *z, #define THETA_RHO(i0, i1, i2, i3, i4) do { \ \ /* First, theta. */ \ - XOR_LANE(b[0], x->S[I(i0, 0)], d[i0]); \ - XOR_LANE(b[1], x->S[I(i1, 1)], d[i1]); \ - XOR_LANE(b[2], x->S[I(i2, 2)], d[i2]); \ - XOR_LANE(b[3], x->S[I(i3, 3)], d[i3]); \ - XOR_LANE(b[4], x->S[I(i4, 4)], d[i4]); \ + XOR_LANE(d[0], x->S[I(i0, 0)], c[i0]); \ + XOR_LANE(d[1], x->S[I(i1, 1)], c[i1]); \ + XOR_LANE(d[2], x->S[I(i2, 2)], c[i2]); \ + XOR_LANE(d[3], x->S[I(i3, 3)], c[i3]); \ + XOR_LANE(d[4], x->S[I(i4, 4)], c[i4]); \ \ /* Then rho. */ \ - ROTL_LANE(b[0], b[0], ROT_##i0##_0); \ - ROTL_LANE(b[1], b[1], ROT_##i1##_1); \ - ROTL_LANE(b[2], b[2], ROT_##i2##_2); \ - ROTL_LANE(b[3], b[3], ROT_##i3##_3); \ - ROTL_LANE(b[4], b[4], ROT_##i4##_4); \ + ROTL_LANE(d[0], d[0], ROT_##i0##_0); \ + ROTL_LANE(d[1], d[1], ROT_##i1##_1); \ + ROTL_LANE(d[2], d[2], ROT_##i2##_2); \ + ROTL_LANE(d[3], d[3], ROT_##i3##_3); \ + ROTL_LANE(d[4], d[4], ROT_##i4##_4); \ } while (0) /* The basic chi operation is: z = w ^ (~a&b), but this involves an @@ -435,55 +439,55 @@ static void keccak1600_round(keccak1600_state *z, * This is hairy because we must worry about complementation. */ THETA_RHO(0, 1, 2, 3, 4); - CHI_COMPL(t, b[2]); /* [.] */ - CHI_101_0(z->S[I(0, 0)], b[0], b[1], b[2]); /* * . * -> . */ - CHI_001_1(z->S[I(1, 0)], b[1], t, b[3]); /* . [.] * -> * */ - CHI_110_1(z->S[I(2, 0)], b[2], b[3], b[4]); /* * * . -> * */ - CHI_101_0(z->S[I(3, 0)], b[3], b[4], b[0]); /* * * . -> . */ - CHI_010_0(z->S[I(4, 0)], b[4], b[0], b[1]); /* * . . -> . */ + CHI_COMPL(t, d[2]); /* [.] */ + CHI_101_0(z->S[I(0, 0)], d[0], d[1], d[2]); /* * . * -> . */ + CHI_001_1(z->S[I(1, 0)], d[1], t, d[3]); /* . [.] * -> * */ + CHI_110_1(z->S[I(2, 0)], d[2], d[3], d[4]); /* * * . -> * */ + CHI_101_0(z->S[I(3, 0)], d[3], d[4], d[0]); /* * * . -> . */ + CHI_010_0(z->S[I(4, 0)], d[4], d[0], d[1]); /* * . . -> . */ /* We'd better do iota before we forget. */ XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]); /* That was fun. Maybe y' = 1 will be as good. */ THETA_RHO(3, 4, 0, 1, 2); - CHI_COMPL(t, b[4]); /* [*] */ - CHI_101_0(z->S[I(0, 1)], b[0], b[1], b[2]); /* * . * -> . */ - CHI_010_0(z->S[I(1, 1)], b[1], b[2], b[3]); /* . * . -> . */ - CHI_101_0(z->S[I(2, 1)], b[2], b[3], t); /* * . [*] -> . */ - CHI_001_1(z->S[I(3, 1)], b[3], b[4], b[0]); /* * . . -> * */ - CHI_010_0(z->S[I(4, 1)], b[4], b[0], b[1]); /* * . . -> . */ + CHI_COMPL(t, d[4]); /* [*] */ + CHI_101_0(z->S[I(0, 1)], d[0], d[1], d[2]); /* * . * -> . */ + CHI_010_0(z->S[I(1, 1)], d[1], d[2], d[3]); /* . * . -> . */ + CHI_101_0(z->S[I(2, 1)], d[2], d[3], t); /* * . [*] -> . */ + CHI_001_1(z->S[I(3, 1)], d[3], d[4], d[0]); /* * . . -> * */ + CHI_010_0(z->S[I(4, 1)], d[4], d[0], d[1]); /* * . . -> . */ /* We're getting the hang of this. The y' = 2 plane shouldn't be any * trouble. */ THETA_RHO(1, 2, 3, 4, 0); - CHI_COMPL(t, b[3]); /* [*] */ - CHI_101_0(z->S[I(0, 2)], b[0], b[1], b[2]); /* * . * -> . */ - CHI_010_0(z->S[I(1, 2)], b[1], b[2], b[3]); /* . * . -> . */ - CHI_110_1(z->S[I(2, 2)], b[2], t, b[4]); /* * [*] . -> * */ - CHI_101_0(z->S[I(3, 2)], t, b[4], b[0]); /* * [*] . -> . */ - CHI_010_0(z->S[I(4, 2)], b[4], b[0], b[1]); /* * . . -> . */ + CHI_COMPL(t, d[3]); /* [*] */ + CHI_101_0(z->S[I(0, 2)], d[0], d[1], d[2]); /* * . * -> . */ + CHI_010_0(z->S[I(1, 2)], d[1], d[2], d[3]); /* . * . -> . */ + CHI_110_1(z->S[I(2, 2)], d[2], t, d[4]); /* * [*] . -> * */ + CHI_101_0(z->S[I(3, 2)], t, d[4], d[0]); /* * [*] . -> . */ + CHI_010_0(z->S[I(4, 2)], d[4], d[0], d[1]); /* * . . -> . */ /* This isn't as interesting any more. Let's do y' = 3 before boredom sets * in. */ THETA_RHO(4, 0, 1, 2, 3); - CHI_COMPL(t, b[3]); /* [.] */ - CHI_010_0(z->S[I(0, 3)], b[0], b[1], b[2]); /* . * . -> . */ - CHI_101_0(z->S[I(1, 3)], b[1], b[2], b[3]); /* * . * -> . */ - CHI_001_1(z->S[I(2, 3)], b[2], t, b[4]); /* . [.] * -> * */ - CHI_010_0(z->S[I(3, 3)], t, b[4], b[0]); /* . [.] * -> . */ - CHI_101_0(z->S[I(4, 3)], b[4], b[0], b[1]); /* . * * -> . */ + CHI_COMPL(t, d[3]); /* [.] */ + CHI_010_0(z->S[I(0, 3)], d[0], d[1], d[2]); /* . * . -> . */ + CHI_101_0(z->S[I(1, 3)], d[1], d[2], d[3]); /* * . * -> . */ + CHI_001_1(z->S[I(2, 3)], d[2], t, d[4]); /* . [.] * -> * */ + CHI_010_0(z->S[I(3, 3)], t, d[4], d[0]); /* . [.] * -> . */ + CHI_101_0(z->S[I(4, 3)], d[4], d[0], d[1]); /* . * * -> . */ /* Last plane. Just y' = 4 to go. */ THETA_RHO(2, 3, 4, 0, 1); - CHI_COMPL(t, b[1]); /* [*] */ - CHI_110_1(z->S[I(0, 4)], b[0], t, b[2]); /* * [*] . -> * */ - CHI_101_0(z->S[I(1, 4)], t, b[2], b[3]); /* [*] . * -> . */ - CHI_010_0(z->S[I(2, 4)], b[2], b[3], b[4]); /* . * . -> . */ - CHI_101_0(z->S[I(3, 4)], b[3], b[4], b[0]); /* * * . -> . */ - CHI_010_0(z->S[I(4, 4)], b[4], b[0], b[1]); /* * . . -> . */ + CHI_COMPL(t, d[1]); /* [*] */ + CHI_110_1(z->S[I(0, 4)], d[0], t, d[2]); /* * [*] . -> * */ + CHI_101_0(z->S[I(1, 4)], t, d[2], d[3]); /* [*] . * -> . */ + CHI_010_0(z->S[I(2, 4)], d[2], d[3], d[4]); /* . * . -> . */ + CHI_101_0(z->S[I(3, 4)], d[3], d[4], d[0]); /* * * . -> . */ + CHI_010_0(z->S[I(4, 4)], d[4], d[0], d[1]); /* * . . -> . */ /* And we're done. */ #undef THETA_RHO @@ -534,7 +538,7 @@ void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n) keccak1600_round(&u, &v, i++); case 3: keccak1600_round(&v, &u, i++); keccak1600_round(&u, &v, i++); - case 1: keccak1600_round(z, &u, i++); + case 1: keccak1600_round( z, &u, i++); break; case 8: keccak1600_round(&v, &u, i++); keccak1600_round(&u, &v, i++); @@ -543,7 +547,7 @@ void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n) case 4: keccak1600_round(&v, &u, i++); keccak1600_round(&u, &v, i++); case 2: keccak1600_round(&v, &u, i++); - keccak1600_round(z, &v, i++); + keccak1600_round( z, &v, i++); break; } #ifdef KECCAK_DEBUG @@ -585,6 +589,35 @@ void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n) { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); } } +/* --- @keccak1600_set@ --- * + * + * Arguments: @keccak1600_state *s@ = a state to update + * @const kludge64 *p@ = pointer to 64-bit words to mix in + * @size_t n@ = size of the input, in 64-bit words + * + * Returns: --- + * + * Use: Stores data into a %$\Keccak[r, 1600 - r]$% state. Note that + * it's the caller's responsibility to pass in no more than + * %$r$% bits of data. + * + * This is not the operation you wanted for ordinary hashing. + * It's provided for the use of higher-level protocols which use + * duplexing and other fancy sponge features. + */ + +void keccak1600_set(keccak1600_state *s, const kludge64 *p, size_t n) +{ + uint32 m = COMPL_MASK; + unsigned i; + lane a; + + for (i = 0; i < n; i++) { + a = TO_LANE(p[i]); if (m&1) NOT_LANE(a, a); + s->S[i] = a; m >>= 1; + } +} + /* --- @keccak1600_extract@ --- * * * Arguments: @const keccak1600_state *s@ = a state to extract output from @@ -600,11 +633,14 @@ void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n) void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n) { + uint32 m = COMPL_MASK; unsigned i; - keccak1600_state t; + lane t; - t = *s; STATE_OUT(&t); - for (i = 0; i < n; i++) p[i] = FROM_LANE(t.S[i]); + for (i = 0; i < n; i++) { + t = s->S[i]; if (m&1) NOT_LANE(t, t); + *p++ = FROM_LANE(t); m >>= 1; + } } /*----- Test rig ----------------------------------------------------------*/