symm/keccak1600.c: Add new function to overwrite the state.
[catacomb] / symm / keccak1600.c
1 /* -*-c-*-
2 *
3 * The Keccak-p[1600, n] permutation
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include <limits.h>
31 #include <string.h>
32
33 #include <mLib/bits.h>
34
35 #include "keccak1600.h"
36
37 /* #define KECCAK_DEBUG */
38
39 /*----- Miscellaneous utilities -------------------------------------------*/
40
41 #define I(x, y) ((x) + 5*(y)) /* Column-major indexing */
42
43 /*----- Interlacing or not ------------------------------------------------*/
44
45 /* We should prefer the interlaced representation if the target is really
46 * 32-bit and only providing synthetic 64-bit integers. Alas, the Windows
47 * 64-bit ABI specifies that `long' is only 32-bits (i.e., it is IL32/LLP64),
48 * so detect x86 specifically.
49 */
50 #if (ULONG_MAX >> 31) <= 0xffffffff && \
51 !defined(__amd64__) && !defined(_M_AMD64)
52 # define KECCAK_I32
53 #endif
54
55 #ifdef KECCAK_I32
56 /* A 32-bit target with at best weak support for 64-bit shifts. Maintain a
57 * lane as two 32-bit pieces representing the even and odd bits of the lane.
58 * There are slightly fiddly transformations to apply on the way in and out
59 * of the main permutation.
60 */
61
62 typedef keccak1600_lane_i32 lane;
63 #define S si32
64
65 static lane interlace(kludge64 x)
66 {
67 /* Given a 64-bit string X, return a lane Z containing the even- and
68 * odd-numbered bits of X.
69 *
70 * This becomes more manageable if we look at what happens to the bit
71 * indices: bit i of X becomes bit ROR_6(i, 1) of Z. We can effectively
72 * swap two bits of the indices by swapping the object bits where those
73 * index bits differ. Fortunately, this is fairly easy.
74 *
75 * We arrange to swap bits between the two halves of X, rather than within
76 * a half.
77 */
78
79 uint32 x0 = LO64(x), x1 = HI64(x), t;
80 lane z;
81 /* 543210 */
82 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 453210 */
83 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 354210 */
84 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 254310 */
85 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 154320 */
86 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 054321 */
87 z.even = x0; z.odd = x1; return (z);
88 }
89
90 static kludge64 deinterlace(lane x)
91 {
92 /* Given a lane X, return the combined 64-bit value. This is the inverse
93 * to `interlace' above, and the principle is the same
94 */
95
96 uint32 x0 = x.even, x1 = x.odd, t;
97 kludge64 z;
98 /* 054321 */
99 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 154320 */
100 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 254310 */
101 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 354210 */
102 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 453210 */
103 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 543210 */
104 SET64(z, x1, x0); return (z);
105 }
106
107 #define TO_LANE(x) (interlace(x))
108 #define FROM_LANE(x) (deinterlace(x))
109
110 #define PRINTFMT_LANE "%08lx:%08lx"
111 #define PRINTARGS_LANE(x) (unsigned long)(x).even, (unsigned long)(x).odd
112
113 #define BINOP_LANE(z, op, x, y) \
114 ((z).even = (x).even op (y).even, (z).odd = (x).odd op (y).odd)
115 #define XOR_LANE(z, x, y) BINOP_LANE(z, ^, x, y)
116 #define AND_LANE(z, x, y) BINOP_LANE(z, &, x, y)
117 #define OR_LANE(z, x, y) BINOP_LANE(z, |, x, y)
118 #define NOT_LANE(z, x) ((z).even = ~(x).even, (z).odd = ~(x).odd)
119
120 #define ROTL_LANE(z, x, n) do { \
121 lane _t = (x); \
122 (z).even = (n)%2 ? ROL32(_t.odd, ((n) + 1)/2) \
123 : ROL32(_t.even, (n)/2); \
124 (z).odd = (n)%2 ? ROL32(_t.even, ((n) - 1)/2) \
125 : ROL32(_t.odd, (n)/2); \
126 } while (0)
127
128 #define LANE_ZERO { 0, 0 }
129 #define LANE_CMPL { 0xffffffff, 0xffffffff }
130
131 static const lane rcon[24] = {
132 { 0x00000001, 0x00000000 }, { 0x00000000, 0x00000089 },
133 { 0x00000000, 0x8000008b }, { 0x00000000, 0x80008080 },
134 { 0x00000001, 0x0000008b }, { 0x00000001, 0x00008000 },
135 { 0x00000001, 0x80008088 }, { 0x00000001, 0x80000082 },
136 { 0x00000000, 0x0000000b }, { 0x00000000, 0x0000000a },
137 { 0x00000001, 0x00008082 }, { 0x00000000, 0x00008003 },
138 { 0x00000001, 0x0000808b }, { 0x00000001, 0x8000000b },
139 { 0x00000001, 0x8000008a }, { 0x00000001, 0x80000081 },
140 { 0x00000000, 0x80000081 }, { 0x00000000, 0x80000008 },
141 { 0x00000000, 0x00000083 }, { 0x00000000, 0x80008003 },
142 { 0x00000001, 0x80008088 }, { 0x00000000, 0x80000088 },
143 { 0x00000001, 0x00008000 }, { 0x00000000, 0x80008082 }
144 };
145
146 #else
147 /* A target with good support for 64-bit shifts. We store lanes as 64-bit
148 * quantities and deal with them in the obvious, natural way.
149 */
150
151 typedef keccak1600_lane_64 lane;
152 #define S s64
153
154 #define TO_LANE(x) (x)
155 #define FROM_LANE(x) (x)
156
157 #define PRINTFMT_LANE "%08lx%08lx"
158 #define PRINTARGS_LANE(x) (unsigned long)HI64(x), (unsigned long)LO64(x)
159
160 #define XOR_LANE(z, x, y) XOR64((z), (x), (y))
161 #define AND_LANE(z, x, y) AND64((z), (x), (y))
162 #define OR_LANE(z, x, y) OR64((z), (x), (y))
163 #define NOT_LANE(z, x) CPL64((z), (x))
164 #define ROTL_LANE(z, x, n) ROL64_((z), (x), (n))
165
166 #define LANE_ZERO X64( 0, 0)
167 #define LANE_CMPL X64(ffffffff, ffffffff)
168
169 static const lane rcon[24] = {
170 X64(00000000, 00000001), X64(00000000, 00008082),
171 X64(80000000, 0000808a), X64(80000000, 80008000),
172 X64(00000000, 0000808b), X64(00000000, 80000001),
173 X64(80000000, 80008081), X64(80000000, 00008009),
174 X64(00000000, 0000008a), X64(00000000, 00000088),
175 X64(00000000, 80008009), X64(00000000, 8000000a),
176 X64(00000000, 8000808b), X64(80000000, 0000008b),
177 X64(80000000, 00008089), X64(80000000, 00008003),
178 X64(80000000, 00008002), X64(80000000, 00000080),
179 X64(00000000, 0000800a), X64(80000000, 8000000a),
180 X64(80000000, 80008081), X64(80000000, 00008080),
181 X64(00000000, 80000001), X64(80000000, 80008008)
182 };
183
184 #endif
185
186 /*----- Complementing or not ----------------------------------------------*/
187
188 /* We should use the complemented representation if the target doesn't have a
189 * fused and-not operation. There doesn't appear to be a principled way to
190 * do this, so we'll just have to make do with a big list. Worse, in my
191 * brief survey of the architecture reference manuals I have lying about,
192 * they've split close to 50/50 on this question, so I don't have an
193 * especially good way to pick a default. The `no-fused-op' architectures
194 * seem generally a bit more modern than the `fused-op' architectures, so I
195 * guess I'll make the complemented representation the default.
196 *
197 * and-not No and-not
198 * ------- ----------
199 * ARM (`bic') x86/amd64
200 * Sparc (`andn') z/Architecture
201 * MMIX (`andn') MIPS
202 * IA64 (`andcm') 68k
203 * VAX (`bic') RISC-V
204 * PDP-10 (`andc')
205 */
206 #if !(defined(__arm__) || defined(__thumb__) || defined(__aarch64__) || \
207 defined(_M_ARM) || defined(_M_THUMB)) && \
208 !(defined(__ia64__) || defined(__ia64) || defined(__itanium__) || \
209 defined(_M_IA64)) && \
210 !defined(__mmix__) && \
211 !(defined(__sparc__) || defined(__sparc)) && \
212 !defined(__vax__) && \
213 !defined(__pdp10__)
214 # define KECCAK_COMPL
215 #endif
216
217 #ifdef KECCAK_COMPL
218 /* A target without fused and/not (`bic', `andc2'). We complement some of
219 * the lanes in the initial state and undo this on output. (Absorbing XORs
220 * input into the state, so this is unaffected.) See the handling of chi in
221 * `keccak1600_round' below for the details.
222 */
223
224 #define COMPL_MASK 0x00121106u
225
226 #define STATE_INIT(z) do { \
227 lane cmpl = LANE_CMPL; \
228 (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \
229 (z)->S[I(3, 1)] = cmpl; (z)->S[I(2, 2)] = cmpl; \
230 (z)->S[I(2, 3)] = cmpl; (z)->S[I(0, 4)] = cmpl; \
231 } while (0)
232
233 #define STATE_OUT(z) do { \
234 NOT_LANE((z)->S[I(1, 0)], (z)->S[I(1, 0)]); \
235 NOT_LANE((z)->S[I(2, 0)], (z)->S[I(2, 0)]); \
236 NOT_LANE((z)->S[I(3, 1)], (z)->S[I(3, 1)]); \
237 NOT_LANE((z)->S[I(2, 2)], (z)->S[I(2, 2)]); \
238 NOT_LANE((z)->S[I(2, 3)], (z)->S[I(2, 3)]); \
239 NOT_LANE((z)->S[I(0, 4)], (z)->S[I(0, 4)]); \
240 } while (0)
241
242 #else
243 /* A target with fused and/not (`bic', `andc2'). Everything is simple. */
244
245 #define COMPL_MASK 0u
246
247 #define STATE_INIT(z) do ; while (0)
248 #define STATE_OUT(z) do ; while (0)
249
250 #endif
251
252 /*----- Other magic constants ---------------------------------------------*/
253
254 /* The rotation constants. These are systematically named -- see `THETA_RHO'
255 * below.
256 */
257 #define ROT_0_0 0
258 #define ROT_1_0 1
259 #define ROT_2_0 62
260 #define ROT_3_0 28
261 #define ROT_4_0 27
262
263 #define ROT_0_1 36
264 #define ROT_1_1 44
265 #define ROT_2_1 6
266 #define ROT_3_1 55
267 #define ROT_4_1 20
268
269 #define ROT_0_2 3
270 #define ROT_1_2 10
271 #define ROT_2_2 43
272 #define ROT_3_2 25
273 #define ROT_4_2 39
274
275 #define ROT_0_3 41
276 #define ROT_1_3 45
277 #define ROT_2_3 15
278 #define ROT_3_3 21
279 #define ROT_4_3 8
280
281 #define ROT_0_4 18
282 #define ROT_1_4 2
283 #define ROT_2_4 61
284 #define ROT_3_4 56
285 #define ROT_4_4 14
286
287 /*----- Debugging ---------------------------------------------------------*/
288
289 #ifdef KECCAK_DEBUG
290
291 #include <stdio.h>
292
293 static void dump_state(const char *what, unsigned ir,
294 const keccak1600_state *x)
295 {
296 unsigned i, j;
297 keccak1600_state y;
298 kludge64 a;
299 int sep;
300
301 printf(";; %s [round %u]\n", what, ir);
302 printf(";; raw state...\n");
303 for (j = 0; j < 5; j++) {
304 printf(";;");
305 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
306 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(x->S[I(i, j)]));
307 fputc('\n', stdout);
308 }
309 y = *x; STATE_OUT(&y);
310 #ifdef KECCAK_COMPL
311 printf(";; uncomplemented state...\n");
312 for (j = 0; j < 5; j++) {
313 printf(";;");
314 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
315 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(y.S[I(i, j)]));
316 fputc('\n', stdout);
317 }
318 #endif
319 #ifdef KECCAK_I32
320 printf(";; deinterlaced state...\n");
321 for (j = 0; j < 5; j++) {
322 printf(";;");
323 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') {
324 a = FROM_LANE(y.S[I(i, j)]);
325 printf("%c%08lx%08lx", sep,
326 (unsigned long)HI64(a), (unsigned long)LO64(a));
327 }
328 fputc('\n', stdout);
329 }
330 #endif
331 fputc('\n', stdout);
332 }
333
334 #endif
335
336 /*----- The Keccak-p[1600, n] permutation ---------------------------------*/
337
338 static void keccak1600_round(keccak1600_state *z,
339 const keccak1600_state *x, unsigned i)
340 {
341 /* Perform a round of Keccak-p[1600, n]. Process the state X and write the
342 * result to Z.
343 */
344
345 lane c[5], d[5], t;
346
347 /* Theta, first step: calculate the column parities. */
348 #define COLPARITY(j) do { \
349 d[j] = x->S[I(j, 0)]; \
350 XOR_LANE(d[j], d[j], x->S[I(j, 1)]); \
351 XOR_LANE(d[j], d[j], x->S[I(j, 2)]); \
352 XOR_LANE(d[j], d[j], x->S[I(j, 3)]); \
353 XOR_LANE(d[j], d[j], x->S[I(j, 4)]); \
354 } while (0)
355 COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4);
356 #undef COLPARITY
357
358 /* Theta, second step: calculate the combined effect. */
359 ROTL_LANE(c[0], d[1], 1); XOR_LANE(c[0], c[0], d[4]);
360 ROTL_LANE(c[1], d[2], 1); XOR_LANE(c[1], c[1], d[0]);
361 ROTL_LANE(c[2], d[3], 1); XOR_LANE(c[2], c[2], d[1]);
362 ROTL_LANE(c[3], d[4], 1); XOR_LANE(c[3], c[3], d[2]);
363 ROTL_LANE(c[4], d[0], 1); XOR_LANE(c[4], c[4], d[3]);
364
365 /* Now we work plane by plane through the output. To do this, we must undo
366 * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and
367 * x = (y' - 3 y)/2 = 3 (y' - 3 x') = x' + 3 y'.
368 */
369 #define THETA_RHO(i0, i1, i2, i3, i4) do { \
370 \
371 /* First, theta. */ \
372 XOR_LANE(d[0], x->S[I(i0, 0)], c[i0]); \
373 XOR_LANE(d[1], x->S[I(i1, 1)], c[i1]); \
374 XOR_LANE(d[2], x->S[I(i2, 2)], c[i2]); \
375 XOR_LANE(d[3], x->S[I(i3, 3)], c[i3]); \
376 XOR_LANE(d[4], x->S[I(i4, 4)], c[i4]); \
377 \
378 /* Then rho. */ \
379 ROTL_LANE(d[0], d[0], ROT_##i0##_0); \
380 ROTL_LANE(d[1], d[1], ROT_##i1##_1); \
381 ROTL_LANE(d[2], d[2], ROT_##i2##_2); \
382 ROTL_LANE(d[3], d[3], ROT_##i3##_3); \
383 ROTL_LANE(d[4], d[4], ROT_##i4##_4); \
384 } while (0)
385
386 /* The basic chi operation is: z = w ^ (~a&b), but this involves an
387 * inversion which we can mostly avoid by being clever: observe that
388 *
389 * w ^ (~a&~~b) = w ^ ~(a | ~b) = ~w ^ (a | ~b)
390 *
391 * by De Morgan's law. Furthermore, complementing w or z is basically
392 * equivalent. Bertoni, Daemen, Peeters, Van Assche, and Van Keer, `Keccak
393 * implementation overview', describe a pattern of lane complementation
394 * which propagates through theta and pi in exactly the right way to be
395 * restored easily by chi, here, with exactly one inversion per plane.
396 *
397 * Here's the pattern.
398 *
399 * [ * . * * . ] [ . * * . . ]
400 * [ * . * . . ] [ . . . * . ]
401 * [ * . * . . ] -> [ . . * . . ]
402 * [ . * . * * ] [ . . * . . ]
403 * [ * . . * . ] [ * . . . . ]
404 *
405 * where a `.' means that the lane is unchanged, and a `*' means that it
406 * has been complemented.
407 *
408 * The macros `CHI_wxy_z' calculate z in terms of w, x, y assuming that the
409 * inputs w, x, y marked with a `1' are complemented on input, and arrange
410 * for z to be complemented on output if z is so marked.
411 *
412 * The diagrams to the right show the fragment of the complementation
413 * pattern being handled by the corresponding line of code. A symbol in
414 * brackets indicates a deviation from the input pattern forced by explicit
415 * complementation: there will be exactly one of these for each plane.
416 */
417 #ifdef KECCAK_COMPL
418 # define CHI_COMPL(z, x) NOT_LANE((z), (x))
419 # define CHI_001_1(z, w, x, y) \
420 (OR_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
421 # define CHI_010_0(z, w, x, y) \
422 (AND_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
423 # define CHI_101_0 CHI_001_1
424 # define CHI_110_1 CHI_010_0
425 #else
426 # define CHI(z, w, x, y) \
427 (NOT_LANE((z), (x)), \
428 AND_LANE((z), (z), (y)), \
429 XOR_LANE((z), (z), (w)))
430 # define CHI_COMPL(z, x) ((z) = (x))
431 # define CHI_001_1 CHI
432 # define CHI_010_0 CHI
433 # define CHI_101_0 CHI
434 # define CHI_110_1 CHI
435 #endif
436
437 /* Let's do the y' = 0 plane first. Theta and rho are easy with our macro,
438 * and we've done pi with the coordinate hacking. That leaves chi next.
439 * This is hairy because we must worry about complementation.
440 */
441 THETA_RHO(0, 1, 2, 3, 4);
442 CHI_COMPL(t, d[2]); /* [.] */
443 CHI_101_0(z->S[I(0, 0)], d[0], d[1], d[2]); /* * . * -> . */
444 CHI_001_1(z->S[I(1, 0)], d[1], t, d[3]); /* . [.] * -> * */
445 CHI_110_1(z->S[I(2, 0)], d[2], d[3], d[4]); /* * * . -> * */
446 CHI_101_0(z->S[I(3, 0)], d[3], d[4], d[0]); /* * * . -> . */
447 CHI_010_0(z->S[I(4, 0)], d[4], d[0], d[1]); /* * . . -> . */
448
449 /* We'd better do iota before we forget. */
450 XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]);
451
452 /* That was fun. Maybe y' = 1 will be as good. */
453 THETA_RHO(3, 4, 0, 1, 2);
454 CHI_COMPL(t, d[4]); /* [*] */
455 CHI_101_0(z->S[I(0, 1)], d[0], d[1], d[2]); /* * . * -> . */
456 CHI_010_0(z->S[I(1, 1)], d[1], d[2], d[3]); /* . * . -> . */
457 CHI_101_0(z->S[I(2, 1)], d[2], d[3], t); /* * . [*] -> . */
458 CHI_001_1(z->S[I(3, 1)], d[3], d[4], d[0]); /* * . . -> * */
459 CHI_010_0(z->S[I(4, 1)], d[4], d[0], d[1]); /* * . . -> . */
460
461 /* We're getting the hang of this. The y' = 2 plane shouldn't be any
462 * trouble.
463 */
464 THETA_RHO(1, 2, 3, 4, 0);
465 CHI_COMPL(t, d[3]); /* [*] */
466 CHI_101_0(z->S[I(0, 2)], d[0], d[1], d[2]); /* * . * -> . */
467 CHI_010_0(z->S[I(1, 2)], d[1], d[2], d[3]); /* . * . -> . */
468 CHI_110_1(z->S[I(2, 2)], d[2], t, d[4]); /* * [*] . -> * */
469 CHI_101_0(z->S[I(3, 2)], t, d[4], d[0]); /* * [*] . -> . */
470 CHI_010_0(z->S[I(4, 2)], d[4], d[0], d[1]); /* * . . -> . */
471
472 /* This isn't as interesting any more. Let's do y' = 3 before boredom sets
473 * in.
474 */
475 THETA_RHO(4, 0, 1, 2, 3);
476 CHI_COMPL(t, d[3]); /* [.] */
477 CHI_010_0(z->S[I(0, 3)], d[0], d[1], d[2]); /* . * . -> . */
478 CHI_101_0(z->S[I(1, 3)], d[1], d[2], d[3]); /* * . * -> . */
479 CHI_001_1(z->S[I(2, 3)], d[2], t, d[4]); /* . [.] * -> * */
480 CHI_010_0(z->S[I(3, 3)], t, d[4], d[0]); /* . [.] * -> . */
481 CHI_101_0(z->S[I(4, 3)], d[4], d[0], d[1]); /* . * * -> . */
482
483 /* Last plane. Just y' = 4 to go. */
484 THETA_RHO(2, 3, 4, 0, 1);
485 CHI_COMPL(t, d[1]); /* [*] */
486 CHI_110_1(z->S[I(0, 4)], d[0], t, d[2]); /* * [*] . -> * */
487 CHI_101_0(z->S[I(1, 4)], t, d[2], d[3]); /* [*] . * -> . */
488 CHI_010_0(z->S[I(2, 4)], d[2], d[3], d[4]); /* . * . -> . */
489 CHI_101_0(z->S[I(3, 4)], d[3], d[4], d[0]); /* * * . -> . */
490 CHI_010_0(z->S[I(4, 4)], d[4], d[0], d[1]); /* * . . -> . */
491
492 /* And we're done. */
493 #undef THETA_RHO
494 #undef CHI_COMPL
495 #undef CHI_001_1
496 #undef CHI_010_0
497 #undef CHI_101_0
498 #undef CHI_110_1
499 #undef CHI
500 }
501
502 /* --- @keccak1600_p@ --- *
503 *
504 * Arguments: @keccak1600_state *z@ = where to write the output state
505 * @conts keccak1600_state *x@ = input state
506 * @unsigned n@ = number of rounds to perform
507 *
508 * Returns: ---
509 *
510 * Use: Implements the %$\Keccak[1600, n]$% permutation at the core
511 * of Keccak and the SHA-3 standard.
512 */
513
514 void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n)
515 {
516 keccak1600_state u, v;
517 unsigned i = 0;
518
519 #ifdef KECCAK_DEBUG
520 dump_state("init", 0, x);
521 #endif
522 keccak1600_round(&u, x, i++); n--;
523 while (n > 8) {
524 keccak1600_round(&v, &u, i++);
525 keccak1600_round(&u, &v, i++);
526 keccak1600_round(&v, &u, i++);
527 keccak1600_round(&u, &v, i++);
528 keccak1600_round(&v, &u, i++);
529 keccak1600_round(&u, &v, i++);
530 keccak1600_round(&v, &u, i++);
531 keccak1600_round(&u, &v, i++);
532 n -= 8;
533 }
534 switch (n) {
535 case 7: keccak1600_round(&v, &u, i++);
536 keccak1600_round(&u, &v, i++);
537 case 5: keccak1600_round(&v, &u, i++);
538 keccak1600_round(&u, &v, i++);
539 case 3: keccak1600_round(&v, &u, i++);
540 keccak1600_round(&u, &v, i++);
541 case 1: keccak1600_round( z, &u, i++);
542 break;
543 case 8: keccak1600_round(&v, &u, i++);
544 keccak1600_round(&u, &v, i++);
545 case 6: keccak1600_round(&v, &u, i++);
546 keccak1600_round(&u, &v, i++);
547 case 4: keccak1600_round(&v, &u, i++);
548 keccak1600_round(&u, &v, i++);
549 case 2: keccak1600_round(&v, &u, i++);
550 keccak1600_round( z, &v, i++);
551 break;
552 }
553 #ifdef KECCAK_DEBUG
554 dump_state("final", 0, z);
555 #endif
556 }
557
558 /* --- @keccack1600_init@ --- *
559 *
560 * Arguments: @keccak1600_state *s@ = a state to initialize
561 *
562 * Returns: ---
563 *
564 * Use: Initialize @s@ to the root state.
565 */
566
567 void keccak1600_init(keccak1600_state *s)
568 { memset(s->S, 0, sizeof(s->S)); STATE_INIT(s); }
569
570 /* --- @keccak1600_mix@ --- *
571 *
572 * Arguments: @keccak1600_state *s@ = a state to update
573 * @const kludge64 *p@ = pointer to 64-bit words to mix in
574 * @size_t n@ = size of the input, in 64-bit words
575 *
576 * Returns: ---
577 *
578 * Use: Mixes data into a %$\Keccak[r, 1600 - r]$% state. Note that
579 * it's the caller's responsibility to pass in no more than
580 * %$r$% bits of data.
581 */
582
583 void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n)
584 {
585 unsigned i;
586 lane a;
587
588 for (i = 0; i < n; i++)
589 { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); }
590 }
591
592 /* --- @keccak1600_set@ --- *
593 *
594 * Arguments: @keccak1600_state *s@ = a state to update
595 * @const kludge64 *p@ = pointer to 64-bit words to mix in
596 * @size_t n@ = size of the input, in 64-bit words
597 *
598 * Returns: ---
599 *
600 * Use: Stores data into a %$\Keccak[r, 1600 - r]$% state. Note that
601 * it's the caller's responsibility to pass in no more than
602 * %$r$% bits of data.
603 *
604 * This is not the operation you wanted for ordinary hashing.
605 * It's provided for the use of higher-level protocols which use
606 * duplexing and other fancy sponge features.
607 */
608
609 void keccak1600_set(keccak1600_state *s, const kludge64 *p, size_t n)
610 {
611 uint32 m = COMPL_MASK;
612 unsigned i;
613 lane a;
614
615 for (i = 0; i < n; i++) {
616 a = TO_LANE(p[i]); if (m&1) NOT_LANE(a, a);
617 s->S[i] = a; m >>= 1;
618 }
619 }
620
621 /* --- @keccak1600_extract@ --- *
622 *
623 * Arguments: @const keccak1600_state *s@ = a state to extract output from
624 * @kludge64 *p@ = pointer to 64-bit words to write
625 * @size_t n@ = size of the output, in 64-bit words
626 *
627 * Returns: ---
628 *
629 * Use: Reads output from a %$\Keccak[r, 1600 - r]$% state. Note
630 * that it's the caller's responsibility to extract no more than
631 * %$r$% bits of data.
632 */
633
634 void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n)
635 {
636 uint32 m = COMPL_MASK;
637 unsigned i;
638 lane t;
639
640 for (i = 0; i < n; i++) {
641 t = s->S[i]; if (m&1) NOT_LANE(t, t);
642 *p++ = FROM_LANE(t); m >>= 1;
643 }
644 }
645
646 /*----- Test rig ----------------------------------------------------------*/
647
648 #ifdef TEST_RIG
649
650 #include <stdio.h>
651
652 #include <mLib/quis.h>
653 #include <mLib/report.h>
654 #include <mLib/testrig.h>
655
656 static int vrf_p(dstr v[])
657 {
658 keccak1600_state u;
659 kludge64 t[25];
660 dstr d = DSTR_INIT;
661 int n;
662 unsigned i;
663 int ok = 1;
664
665 if (v[0].len != 200) die(1, "bad input size");
666 if (v[2].len != 200) die(1, "bad output size");
667 n = *(int *)v[1].buf;
668 dstr_ensure(&d, 200); d.len = 200;
669
670 keccak1600_init(&u);
671 for (i = 0; i < 25; i++) LOAD64_L_(t[i], v[0].buf + 8*i);
672 keccak1600_mix(&u, t, 25);
673 keccak1600_p(&u, &u, n);
674 keccak1600_extract(&u, t, 25);
675 for (i = 0; i < 25; i++) STORE64_L_(d.buf + 8*i, t[i]);
676 if (memcmp(d.buf, v[2].buf, 200) != 0) {
677 ok = 0;
678 fprintf(stderr, "failed!");
679 fprintf(stderr, "\n\t input = "); type_hex.dump(&v[0], stderr);
680 fprintf(stderr, "\n\t rounds = %d", n);
681 fprintf(stderr, "\n\t expected = "); type_hex.dump(&v[2], stderr);
682 fprintf(stderr, "\n\t calclated = "); type_hex.dump(&d, stderr);
683 }
684
685 dstr_destroy(&d);
686 return (ok);
687 }
688
689 static test_chunk defs[] = {
690 { "p", vrf_p, { &type_hex, &type_int, &type_hex } },
691 { 0, 0, { 0 } }
692 };
693
694 int main(int argc, char *argv[])
695 {
696 test_run(argc, argv, defs, SRCDIR"/t/keccak1600");
697 return (0);
698 }
699
700 #endif
701
702 /*----- That's all, folks -------------------------------------------------*/