progs/perftest.c: Use from Glibc syscall numbers.
[catacomb] / symm / keccak1600.c
CommitLineData
a905c0d6
MW
1/* -*-c-*-
2 *
3 * The Keccak-p[1600, n] permutation
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <limits.h>
31#include <string.h>
32
33#include <mLib/bits.h>
34
35#include "keccak1600.h"
d3f33b9a 36#include "permute.h"
a905c0d6
MW
37
38/* #define KECCAK_DEBUG */
39
40/*----- Miscellaneous utilities -------------------------------------------*/
41
42#define I(x, y) ((x) + 5*(y)) /* Column-major indexing */
43
44/*----- Interlacing or not ------------------------------------------------*/
45
46/* We should prefer the interlaced representation if the target is really
47 * 32-bit and only providing synthetic 64-bit integers. Alas, the Windows
48 * 64-bit ABI specifies that `long' is only 32-bits (i.e., it is IL32/LLP64),
49 * so detect x86 specifically.
50 */
51#if (ULONG_MAX >> 31) <= 0xffffffff && \
52 !defined(__amd64__) && !defined(_M_AMD64)
53# define KECCAK_I32
54#endif
55
56#ifdef KECCAK_I32
57/* A 32-bit target with at best weak support for 64-bit shifts. Maintain a
58 * lane as two 32-bit pieces representing the even and odd bits of the lane.
59 * There are slightly fiddly transformations to apply on the way in and out
60 * of the main permutation.
61 */
62
63typedef keccak1600_lane_i32 lane;
64#define S si32
65
66static lane interlace(kludge64 x)
67{
68 /* Given a 64-bit string X, return a lane Z containing the even- and
69 * odd-numbered bits of X.
a905c0d6
MW
70 */
71
d3f33b9a
MW
72typedef uint32 regty;
73#define REGWD 32
74
75 uint32 x0 = LO64(x), x1 = HI64(x);
a905c0d6 76 lane z;
d3f33b9a
MW
77 /* 5, 4, 3, 2, 1, 0 */
78 TWIZZLE_EXCH(x1, x0, 4); /* 4, 5, 3, 2, 1, 0 */
79 TWIZZLE_EXCH(x1, x0, 3); /* 3, 5, 4, 2, 1, 0 */
80 TWIZZLE_EXCH(x1, x0, 2); /* 2, 5, 4, 3, 1, 0 */
81 TWIZZLE_EXCH(x1, x0, 1); /* 1, 5, 4, 3, 2, 0 */
82 TWIZZLE_EXCH(x1, x0, 0); /* 0, 5, 4, 3, 2, 1 */
a905c0d6 83 z.even = x0; z.odd = x1; return (z);
d3f33b9a
MW
84
85#undef REGWD
a905c0d6
MW
86}
87
88static kludge64 deinterlace(lane x)
89{
90 /* Given a lane X, return the combined 64-bit value. This is the inverse
91 * to `interlace' above, and the principle is the same
92 */
93
d3f33b9a
MW
94typedef uint32 regty;
95#define REGWD 32
96
97 uint32 x0 = x.even, x1 = x.odd;
a905c0d6 98 kludge64 z;
d3f33b9a
MW
99 /* 0, 5, 4, 3, 2, 1 */
100 TWIZZLE_EXCH(x1, x0, 0); /* 1, 5, 4, 3, 2, 0 */
101 TWIZZLE_EXCH(x1, x0, 1); /* 2, 5, 4, 3, 1, 0 */
102 TWIZZLE_EXCH(x1, x0, 2); /* 3, 5, 4, 2, 1, 0 */
103 TWIZZLE_EXCH(x1, x0, 3); /* 4, 5, 3, 2, 1, 0 */
104 TWIZZLE_EXCH(x1, x0, 4); /* 5, 4, 3, 2, 1, 0 */
a905c0d6 105 SET64(z, x1, x0); return (z);
d3f33b9a
MW
106
107#undef REGWD
a905c0d6
MW
108}
109
110#define TO_LANE(x) (interlace(x))
111#define FROM_LANE(x) (deinterlace(x))
112
113#define PRINTFMT_LANE "%08lx:%08lx"
114#define PRINTARGS_LANE(x) (unsigned long)(x).even, (unsigned long)(x).odd
115
116#define BINOP_LANE(z, op, x, y) \
117 ((z).even = (x).even op (y).even, (z).odd = (x).odd op (y).odd)
118#define XOR_LANE(z, x, y) BINOP_LANE(z, ^, x, y)
119#define AND_LANE(z, x, y) BINOP_LANE(z, &, x, y)
120#define OR_LANE(z, x, y) BINOP_LANE(z, |, x, y)
121#define NOT_LANE(z, x) ((z).even = ~(x).even, (z).odd = ~(x).odd)
122
123#define ROTL_LANE(z, x, n) do { \
124 lane _t = (x); \
125 (z).even = (n)%2 ? ROL32(_t.odd, ((n) + 1)/2) \
126 : ROL32(_t.even, (n)/2); \
127 (z).odd = (n)%2 ? ROL32(_t.even, ((n) - 1)/2) \
128 : ROL32(_t.odd, (n)/2); \
129} while (0)
130
131#define LANE_ZERO { 0, 0 }
132#define LANE_CMPL { 0xffffffff, 0xffffffff }
133
134static const lane rcon[24] = {
135 { 0x00000001, 0x00000000 }, { 0x00000000, 0x00000089 },
136 { 0x00000000, 0x8000008b }, { 0x00000000, 0x80008080 },
137 { 0x00000001, 0x0000008b }, { 0x00000001, 0x00008000 },
138 { 0x00000001, 0x80008088 }, { 0x00000001, 0x80000082 },
139 { 0x00000000, 0x0000000b }, { 0x00000000, 0x0000000a },
140 { 0x00000001, 0x00008082 }, { 0x00000000, 0x00008003 },
141 { 0x00000001, 0x0000808b }, { 0x00000001, 0x8000000b },
142 { 0x00000001, 0x8000008a }, { 0x00000001, 0x80000081 },
143 { 0x00000000, 0x80000081 }, { 0x00000000, 0x80000008 },
144 { 0x00000000, 0x00000083 }, { 0x00000000, 0x80008003 },
145 { 0x00000001, 0x80008088 }, { 0x00000000, 0x80000088 },
146 { 0x00000001, 0x00008000 }, { 0x00000000, 0x80008082 }
147};
148
149#else
150/* A target with good support for 64-bit shifts. We store lanes as 64-bit
151 * quantities and deal with them in the obvious, natural way.
152 */
153
154typedef keccak1600_lane_64 lane;
155#define S s64
156
157#define TO_LANE(x) (x)
158#define FROM_LANE(x) (x)
159
160#define PRINTFMT_LANE "%08lx%08lx"
161#define PRINTARGS_LANE(x) (unsigned long)HI64(x), (unsigned long)LO64(x)
162
163#define XOR_LANE(z, x, y) XOR64((z), (x), (y))
164#define AND_LANE(z, x, y) AND64((z), (x), (y))
165#define OR_LANE(z, x, y) OR64((z), (x), (y))
166#define NOT_LANE(z, x) CPL64((z), (x))
167#define ROTL_LANE(z, x, n) ROL64_((z), (x), (n))
168
169#define LANE_ZERO X64( 0, 0)
170#define LANE_CMPL X64(ffffffff, ffffffff)
171
172static const lane rcon[24] = {
173 X64(00000000, 00000001), X64(00000000, 00008082),
174 X64(80000000, 0000808a), X64(80000000, 80008000),
175 X64(00000000, 0000808b), X64(00000000, 80000001),
176 X64(80000000, 80008081), X64(80000000, 00008009),
177 X64(00000000, 0000008a), X64(00000000, 00000088),
178 X64(00000000, 80008009), X64(00000000, 8000000a),
179 X64(00000000, 8000808b), X64(80000000, 0000008b),
180 X64(80000000, 00008089), X64(80000000, 00008003),
181 X64(80000000, 00008002), X64(80000000, 00000080),
182 X64(00000000, 0000800a), X64(80000000, 8000000a),
183 X64(80000000, 80008081), X64(80000000, 00008080),
184 X64(00000000, 80000001), X64(80000000, 80008008)
185};
186
187#endif
188
189/*----- Complementing or not ----------------------------------------------*/
190
191/* We should use the complemented representation if the target doesn't have a
192 * fused and-not operation. There doesn't appear to be a principled way to
193 * do this, so we'll just have to make do with a big list. Worse, in my
194 * brief survey of the architecture reference manuals I have lying about,
195 * they've split close to 50/50 on this question, so I don't have an
196 * especially good way to pick a default. The `no-fused-op' architectures
197 * seem generally a bit more modern than the `fused-op' architectures, so I
198 * guess I'll make the complemented representation the default.
199 *
200 * and-not No and-not
201 * ------- ----------
202 * ARM (`bic') x86/amd64
203 * Sparc (`andn') z/Architecture
204 * MMIX (`andn') MIPS
205 * IA64 (`andcm') 68k
206 * VAX (`bic') RISC-V
207 * PDP-10 (`andc')
208 */
209#if !(defined(__arm__) || defined(__thumb__) || defined(__aarch64__) || \
210 defined(_M_ARM) || defined(_M_THUMB)) && \
211 !(defined(__ia64__) || defined(__ia64) || defined(__itanium__) || \
212 defined(_M_IA64)) && \
213 !defined(__mmix__) && \
214 !(defined(__sparc__) || defined(__sparc)) && \
215 !defined(__vax__) && \
216 !defined(__pdp10__)
217# define KECCAK_COMPL
218#endif
219
220#ifdef KECCAK_COMPL
221/* A target without fused and/not (`bic', `andc2'). We complement some of
222 * the lanes in the initial state and undo this on output. (Absorbing XORs
223 * input into the state, so this is unaffected.) See the handling of chi in
224 * `keccak1600_round' below for the details.
225 */
226
1ccb258a
MW
227#define COMPL_MASK 0x00121106u
228
a905c0d6
MW
229#define STATE_INIT(z) do { \
230 lane cmpl = LANE_CMPL; \
231 (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \
232 (z)->S[I(3, 1)] = cmpl; (z)->S[I(2, 2)] = cmpl; \
233 (z)->S[I(2, 3)] = cmpl; (z)->S[I(0, 4)] = cmpl; \
234} while (0)
235
236#define STATE_OUT(z) do { \
237 NOT_LANE((z)->S[I(1, 0)], (z)->S[I(1, 0)]); \
238 NOT_LANE((z)->S[I(2, 0)], (z)->S[I(2, 0)]); \
239 NOT_LANE((z)->S[I(3, 1)], (z)->S[I(3, 1)]); \
240 NOT_LANE((z)->S[I(2, 2)], (z)->S[I(2, 2)]); \
241 NOT_LANE((z)->S[I(2, 3)], (z)->S[I(2, 3)]); \
242 NOT_LANE((z)->S[I(0, 4)], (z)->S[I(0, 4)]); \
243} while (0)
244
245#else
246/* A target with fused and/not (`bic', `andc2'). Everything is simple. */
247
1ccb258a
MW
248#define COMPL_MASK 0u
249
a905c0d6
MW
250#define STATE_INIT(z) do ; while (0)
251#define STATE_OUT(z) do ; while (0)
252
253#endif
254
255/*----- Other magic constants ---------------------------------------------*/
256
257/* The rotation constants. These are systematically named -- see `THETA_RHO'
258 * below.
259 */
260#define ROT_0_0 0
261#define ROT_1_0 1
262#define ROT_2_0 62
263#define ROT_3_0 28
264#define ROT_4_0 27
265
266#define ROT_0_1 36
267#define ROT_1_1 44
268#define ROT_2_1 6
269#define ROT_3_1 55
270#define ROT_4_1 20
271
272#define ROT_0_2 3
273#define ROT_1_2 10
274#define ROT_2_2 43
275#define ROT_3_2 25
276#define ROT_4_2 39
277
278#define ROT_0_3 41
279#define ROT_1_3 45
280#define ROT_2_3 15
281#define ROT_3_3 21
282#define ROT_4_3 8
283
284#define ROT_0_4 18
285#define ROT_1_4 2
286#define ROT_2_4 61
287#define ROT_3_4 56
288#define ROT_4_4 14
289
290/*----- Debugging ---------------------------------------------------------*/
291
292#ifdef KECCAK_DEBUG
293
294#include <stdio.h>
295
296static void dump_state(const char *what, unsigned ir,
297 const keccak1600_state *x)
298{
299 unsigned i, j;
300 keccak1600_state y;
301 kludge64 a;
302 int sep;
303
304 printf(";; %s [round %u]\n", what, ir);
305 printf(";; raw state...\n");
306 for (j = 0; j < 5; j++) {
307 printf(";;");
308 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
309 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(x->S[I(i, j)]));
310 fputc('\n', stdout);
311 }
312 y = *x; STATE_OUT(&y);
313#ifdef KECCAK_COMPL
314 printf(";; uncomplemented state...\n");
315 for (j = 0; j < 5; j++) {
316 printf(";;");
317 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
318 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(y.S[I(i, j)]));
319 fputc('\n', stdout);
320 }
321#endif
322#ifdef KECCAK_I32
323 printf(";; deinterlaced state...\n");
324 for (j = 0; j < 5; j++) {
325 printf(";;");
326 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') {
327 a = FROM_LANE(y.S[I(i, j)]);
328 printf("%c%08lx%08lx", sep,
329 (unsigned long)HI64(a), (unsigned long)LO64(a));
330 }
331 fputc('\n', stdout);
332 }
333#endif
334 fputc('\n', stdout);
335}
336
337#endif
338
339/*----- The Keccak-p[1600, n] permutation ---------------------------------*/
340
341static void keccak1600_round(keccak1600_state *z,
342 const keccak1600_state *x, unsigned i)
343{
344 /* Perform a round of Keccak-p[1600, n]. Process the state X and write the
345 * result to Z.
346 */
347
608e360f 348 lane c[5], d[5], t;
a905c0d6
MW
349
350 /* Theta, first step: calculate the column parities. */
351#define COLPARITY(j) do { \
608e360f
MW
352 d[j] = x->S[I(j, 0)]; \
353 XOR_LANE(d[j], d[j], x->S[I(j, 1)]); \
354 XOR_LANE(d[j], d[j], x->S[I(j, 2)]); \
355 XOR_LANE(d[j], d[j], x->S[I(j, 3)]); \
356 XOR_LANE(d[j], d[j], x->S[I(j, 4)]); \
a905c0d6
MW
357} while (0)
358 COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4);
359#undef COLPARITY
360
361 /* Theta, second step: calculate the combined effect. */
608e360f
MW
362 ROTL_LANE(c[0], d[1], 1); XOR_LANE(c[0], c[0], d[4]);
363 ROTL_LANE(c[1], d[2], 1); XOR_LANE(c[1], c[1], d[0]);
364 ROTL_LANE(c[2], d[3], 1); XOR_LANE(c[2], c[2], d[1]);
365 ROTL_LANE(c[3], d[4], 1); XOR_LANE(c[3], c[3], d[2]);
366 ROTL_LANE(c[4], d[0], 1); XOR_LANE(c[4], c[4], d[3]);
a905c0d6
MW
367
368 /* Now we work plane by plane through the output. To do this, we must undo
369 * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and
370 * x = (y' - 3 y)/2 = 3 (y' - 3 x') = x' + 3 y'.
371 */
372#define THETA_RHO(i0, i1, i2, i3, i4) do { \
373 \
374 /* First, theta. */ \
608e360f
MW
375 XOR_LANE(d[0], x->S[I(i0, 0)], c[i0]); \
376 XOR_LANE(d[1], x->S[I(i1, 1)], c[i1]); \
377 XOR_LANE(d[2], x->S[I(i2, 2)], c[i2]); \
378 XOR_LANE(d[3], x->S[I(i3, 3)], c[i3]); \
379 XOR_LANE(d[4], x->S[I(i4, 4)], c[i4]); \
a905c0d6
MW
380 \
381 /* Then rho. */ \
608e360f
MW
382 ROTL_LANE(d[0], d[0], ROT_##i0##_0); \
383 ROTL_LANE(d[1], d[1], ROT_##i1##_1); \
384 ROTL_LANE(d[2], d[2], ROT_##i2##_2); \
385 ROTL_LANE(d[3], d[3], ROT_##i3##_3); \
386 ROTL_LANE(d[4], d[4], ROT_##i4##_4); \
a905c0d6
MW
387} while (0)
388
389 /* The basic chi operation is: z = w ^ (~a&b), but this involves an
390 * inversion which we can mostly avoid by being clever: observe that
391 *
392 * w ^ (~a&~~b) = w ^ ~(a | ~b) = ~w ^ (a | ~b)
393 *
394 * by De Morgan's law. Furthermore, complementing w or z is basically
395 * equivalent. Bertoni, Daemen, Peeters, Van Assche, and Van Keer, `Keccak
396 * implementation overview', describe a pattern of lane complementation
397 * which propagates through theta and pi in exactly the right way to be
398 * restored easily by chi, here, with exactly one inversion per plane.
399 *
400 * Here's the pattern.
401 *
402 * [ * . * * . ] [ . * * . . ]
403 * [ * . * . . ] [ . . . * . ]
404 * [ * . * . . ] -> [ . . * . . ]
405 * [ . * . * * ] [ . . * . . ]
406 * [ * . . * . ] [ * . . . . ]
407 *
408 * where a `.' means that the lane is unchanged, and a `*' means that it
409 * has been complemented.
410 *
411 * The macros `CHI_wxy_z' calculate z in terms of w, x, y assuming that the
412 * inputs w, x, y marked with a `1' are complemented on input, and arrange
413 * for z to be complemented on output if z is so marked.
414 *
415 * The diagrams to the right show the fragment of the complementation
416 * pattern being handled by the corresponding line of code. A symbol in
417 * brackets indicates a deviation from the input pattern forced by explicit
418 * complementation: there will be exactly one of these for each plane.
419 */
420#ifdef KECCAK_COMPL
421# define CHI_COMPL(z, x) NOT_LANE((z), (x))
422# define CHI_001_1(z, w, x, y) \
423 (OR_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
424# define CHI_010_0(z, w, x, y) \
425 (AND_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
426# define CHI_101_0 CHI_001_1
427# define CHI_110_1 CHI_010_0
428#else
429# define CHI(z, w, x, y) \
430 (NOT_LANE((z), (x)), \
431 AND_LANE((z), (z), (y)), \
432 XOR_LANE((z), (z), (w)))
433# define CHI_COMPL(z, x) ((z) = (x))
434# define CHI_001_1 CHI
435# define CHI_010_0 CHI
436# define CHI_101_0 CHI
437# define CHI_110_1 CHI
438#endif
439
440 /* Let's do the y' = 0 plane first. Theta and rho are easy with our macro,
441 * and we've done pi with the coordinate hacking. That leaves chi next.
442 * This is hairy because we must worry about complementation.
443 */
444 THETA_RHO(0, 1, 2, 3, 4);
608e360f
MW
445 CHI_COMPL(t, d[2]); /* [.] */
446 CHI_101_0(z->S[I(0, 0)], d[0], d[1], d[2]); /* * . * -> . */
447 CHI_001_1(z->S[I(1, 0)], d[1], t, d[3]); /* . [.] * -> * */
448 CHI_110_1(z->S[I(2, 0)], d[2], d[3], d[4]); /* * * . -> * */
449 CHI_101_0(z->S[I(3, 0)], d[3], d[4], d[0]); /* * * . -> . */
450 CHI_010_0(z->S[I(4, 0)], d[4], d[0], d[1]); /* * . . -> . */
a905c0d6
MW
451
452 /* We'd better do iota before we forget. */
453 XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]);
454
455 /* That was fun. Maybe y' = 1 will be as good. */
456 THETA_RHO(3, 4, 0, 1, 2);
608e360f
MW
457 CHI_COMPL(t, d[4]); /* [*] */
458 CHI_101_0(z->S[I(0, 1)], d[0], d[1], d[2]); /* * . * -> . */
459 CHI_010_0(z->S[I(1, 1)], d[1], d[2], d[3]); /* . * . -> . */
460 CHI_101_0(z->S[I(2, 1)], d[2], d[3], t); /* * . [*] -> . */
461 CHI_001_1(z->S[I(3, 1)], d[3], d[4], d[0]); /* * . . -> * */
462 CHI_010_0(z->S[I(4, 1)], d[4], d[0], d[1]); /* * . . -> . */
a905c0d6
MW
463
464 /* We're getting the hang of this. The y' = 2 plane shouldn't be any
465 * trouble.
466 */
467 THETA_RHO(1, 2, 3, 4, 0);
608e360f
MW
468 CHI_COMPL(t, d[3]); /* [*] */
469 CHI_101_0(z->S[I(0, 2)], d[0], d[1], d[2]); /* * . * -> . */
470 CHI_010_0(z->S[I(1, 2)], d[1], d[2], d[3]); /* . * . -> . */
471 CHI_110_1(z->S[I(2, 2)], d[2], t, d[4]); /* * [*] . -> * */
472 CHI_101_0(z->S[I(3, 2)], t, d[4], d[0]); /* * [*] . -> . */
473 CHI_010_0(z->S[I(4, 2)], d[4], d[0], d[1]); /* * . . -> . */
a905c0d6
MW
474
475 /* This isn't as interesting any more. Let's do y' = 3 before boredom sets
476 * in.
477 */
478 THETA_RHO(4, 0, 1, 2, 3);
608e360f
MW
479 CHI_COMPL(t, d[3]); /* [.] */
480 CHI_010_0(z->S[I(0, 3)], d[0], d[1], d[2]); /* . * . -> . */
481 CHI_101_0(z->S[I(1, 3)], d[1], d[2], d[3]); /* * . * -> . */
482 CHI_001_1(z->S[I(2, 3)], d[2], t, d[4]); /* . [.] * -> * */
483 CHI_010_0(z->S[I(3, 3)], t, d[4], d[0]); /* . [.] * -> . */
484 CHI_101_0(z->S[I(4, 3)], d[4], d[0], d[1]); /* . * * -> . */
a905c0d6
MW
485
486 /* Last plane. Just y' = 4 to go. */
487 THETA_RHO(2, 3, 4, 0, 1);
608e360f
MW
488 CHI_COMPL(t, d[1]); /* [*] */
489 CHI_110_1(z->S[I(0, 4)], d[0], t, d[2]); /* * [*] . -> * */
490 CHI_101_0(z->S[I(1, 4)], t, d[2], d[3]); /* [*] . * -> . */
491 CHI_010_0(z->S[I(2, 4)], d[2], d[3], d[4]); /* . * . -> . */
492 CHI_101_0(z->S[I(3, 4)], d[3], d[4], d[0]); /* * * . -> . */
493 CHI_010_0(z->S[I(4, 4)], d[4], d[0], d[1]); /* * . . -> . */
a905c0d6
MW
494
495 /* And we're done. */
496#undef THETA_RHO
497#undef CHI_COMPL
498#undef CHI_001_1
499#undef CHI_010_0
500#undef CHI_101_0
501#undef CHI_110_1
502#undef CHI
503}
504
505/* --- @keccak1600_p@ --- *
506 *
507 * Arguments: @keccak1600_state *z@ = where to write the output state
508 * @conts keccak1600_state *x@ = input state
509 * @unsigned n@ = number of rounds to perform
510 *
511 * Returns: ---
512 *
513 * Use: Implements the %$\Keccak[1600, n]$% permutation at the core
514 * of Keccak and the SHA-3 standard.
515 */
516
517void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n)
518{
519 keccak1600_state u, v;
520 unsigned i = 0;
521
522#ifdef KECCAK_DEBUG
523 dump_state("init", 0, x);
524#endif
525 keccak1600_round(&u, x, i++); n--;
526 while (n > 8) {
527 keccak1600_round(&v, &u, i++);
528 keccak1600_round(&u, &v, i++);
529 keccak1600_round(&v, &u, i++);
530 keccak1600_round(&u, &v, i++);
531 keccak1600_round(&v, &u, i++);
532 keccak1600_round(&u, &v, i++);
533 keccak1600_round(&v, &u, i++);
534 keccak1600_round(&u, &v, i++);
535 n -= 8;
536 }
537 switch (n) {
538 case 7: keccak1600_round(&v, &u, i++);
539 keccak1600_round(&u, &v, i++);
540 case 5: keccak1600_round(&v, &u, i++);
541 keccak1600_round(&u, &v, i++);
542 case 3: keccak1600_round(&v, &u, i++);
543 keccak1600_round(&u, &v, i++);
718a6891 544 case 1: keccak1600_round( z, &u, i++);
a905c0d6
MW
545 break;
546 case 8: keccak1600_round(&v, &u, i++);
547 keccak1600_round(&u, &v, i++);
548 case 6: keccak1600_round(&v, &u, i++);
549 keccak1600_round(&u, &v, i++);
550 case 4: keccak1600_round(&v, &u, i++);
551 keccak1600_round(&u, &v, i++);
552 case 2: keccak1600_round(&v, &u, i++);
718a6891 553 keccak1600_round( z, &v, i++);
a905c0d6
MW
554 break;
555 }
556#ifdef KECCAK_DEBUG
557 dump_state("final", 0, z);
558#endif
559}
560
561/* --- @keccack1600_init@ --- *
562 *
563 * Arguments: @keccak1600_state *s@ = a state to initialize
564 *
565 * Returns: ---
566 *
567 * Use: Initialize @s@ to the root state.
568 */
569
570void keccak1600_init(keccak1600_state *s)
571 { memset(s->S, 0, sizeof(s->S)); STATE_INIT(s); }
572
573/* --- @keccak1600_mix@ --- *
574 *
575 * Arguments: @keccak1600_state *s@ = a state to update
576 * @const kludge64 *p@ = pointer to 64-bit words to mix in
577 * @size_t n@ = size of the input, in 64-bit words
578 *
579 * Returns: ---
580 *
581 * Use: Mixes data into a %$\Keccak[r, 1600 - r]$% state. Note that
582 * it's the caller's responsibility to pass in no more than
583 * %$r$% bits of data.
584 */
585
586void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n)
587{
588 unsigned i;
589 lane a;
590
591 for (i = 0; i < n; i++)
592 { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); }
593}
594
10f61ef8
MW
595/* --- @keccak1600_set@ --- *
596 *
597 * Arguments: @keccak1600_state *s@ = a state to update
598 * @const kludge64 *p@ = pointer to 64-bit words to mix in
599 * @size_t n@ = size of the input, in 64-bit words
600 *
601 * Returns: ---
602 *
603 * Use: Stores data into a %$\Keccak[r, 1600 - r]$% state. Note that
604 * it's the caller's responsibility to pass in no more than
605 * %$r$% bits of data.
606 *
607 * This is not the operation you wanted for ordinary hashing.
608 * It's provided for the use of higher-level protocols which use
609 * duplexing and other fancy sponge features.
610 */
611
612void keccak1600_set(keccak1600_state *s, const kludge64 *p, size_t n)
613{
614 uint32 m = COMPL_MASK;
615 unsigned i;
616 lane a;
617
618 for (i = 0; i < n; i++) {
619 a = TO_LANE(p[i]); if (m&1) NOT_LANE(a, a);
620 s->S[i] = a; m >>= 1;
621 }
622}
623
a905c0d6
MW
624/* --- @keccak1600_extract@ --- *
625 *
626 * Arguments: @const keccak1600_state *s@ = a state to extract output from
627 * @kludge64 *p@ = pointer to 64-bit words to write
628 * @size_t n@ = size of the output, in 64-bit words
629 *
630 * Returns: ---
631 *
632 * Use: Reads output from a %$\Keccak[r, 1600 - r]$% state. Note
633 * that it's the caller's responsibility to extract no more than
634 * %$r$% bits of data.
635 */
636
637void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n)
638{
1ccb258a 639 uint32 m = COMPL_MASK;
a905c0d6 640 unsigned i;
1ccb258a 641 lane t;
a905c0d6 642
1ccb258a
MW
643 for (i = 0; i < n; i++) {
644 t = s->S[i]; if (m&1) NOT_LANE(t, t);
645 *p++ = FROM_LANE(t); m >>= 1;
646 }
a905c0d6
MW
647}
648
649/*----- Test rig ----------------------------------------------------------*/
650
651#ifdef TEST_RIG
652
653#include <stdio.h>
654
141c1284 655#include <mLib/macros.h>
a905c0d6
MW
656#include <mLib/quis.h>
657#include <mLib/report.h>
658#include <mLib/testrig.h>
659
660static int vrf_p(dstr v[])
661{
662 keccak1600_state u;
663 kludge64 t[25];
664 dstr d = DSTR_INIT;
665 int n;
666 unsigned i;
667 int ok = 1;
668
669 if (v[0].len != 200) die(1, "bad input size");
670 if (v[2].len != 200) die(1, "bad output size");
671 n = *(int *)v[1].buf;
672 dstr_ensure(&d, 200); d.len = 200;
673
674 keccak1600_init(&u);
675 for (i = 0; i < 25; i++) LOAD64_L_(t[i], v[0].buf + 8*i);
676 keccak1600_mix(&u, t, 25);
677 keccak1600_p(&u, &u, n);
678 keccak1600_extract(&u, t, 25);
679 for (i = 0; i < 25; i++) STORE64_L_(d.buf + 8*i, t[i]);
141c1284 680 if (MEMCMP(d.buf, !=, v[2].buf, 200)) {
a905c0d6
MW
681 ok = 0;
682 fprintf(stderr, "failed!");
683 fprintf(stderr, "\n\t input = "); type_hex.dump(&v[0], stderr);
684 fprintf(stderr, "\n\t rounds = %d", n);
685 fprintf(stderr, "\n\t expected = "); type_hex.dump(&v[2], stderr);
686 fprintf(stderr, "\n\t calclated = "); type_hex.dump(&d, stderr);
687 }
688
689 dstr_destroy(&d);
690 return (ok);
691}
692
693static test_chunk defs[] = {
694 { "p", vrf_p, { &type_hex, &type_int, &type_hex } },
695 { 0, 0, { 0 } }
696};
697
698int main(int argc, char *argv[])
699{
700 test_run(argc, argv, defs, SRCDIR"/t/keccak1600");
701 return (0);
702}
703
704#endif
705
706/*----- That's all, folks -------------------------------------------------*/