ec-field-test.c: Make the field-element type use internal format.
[secnet] / keccak1600.c
CommitLineData
a1a6042e
MW
1/* -*-c-*-
2 *
3 * The Keccak-p[1600, n] permutation
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of secnet.
11 * See README for full list of copyright holders.
12 *
13 * secnet is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version d of the License, or
16 * (at your option) any later version.
17 *
18 * secnet is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * version 3 along with secnet; if not, see
25 * https://www.gnu.org/licenses/gpl.html.
26 *
27 * This file was originally part of Catacomb, but has been automatically
28 * modified for incorporation into secnet: see `import-catacomb-crypto'
29 * for details.
30 *
31 * Catacomb is free software; you can redistribute it and/or modify
32 * it under the terms of the GNU Library General Public License as
33 * published by the Free Software Foundation; either version 2 of the
34 * License, or (at your option) any later version.
35 *
36 * Catacomb is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 * GNU Library General Public License for more details.
40 *
41 * You should have received a copy of the GNU Library General Public
42 * License along with Catacomb; if not, write to the Free
43 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
44 * MA 02111-1307, USA.
45 */
46
47/*----- Header files ------------------------------------------------------*/
48
49#include <limits.h>
50#include <string.h>
51
52#include "fake-mLib-bits.h"
53
54#include "keccak1600.h"
55
56/* #define KECCAK_DEBUG */
57
58/*----- Miscellaneous utilities -------------------------------------------*/
59
60#define I(x, y) ((x) + 5*(y)) /* Column-major indexing */
61
62/*----- Interlacing or not ------------------------------------------------*/
63
64/* We should prefer the interlaced representation if the target is really
65 * 32-bit and only providing synthetic 64-bit integers. Alas, the Windows
66 * 64-bit ABI specifies that `long' is only 32-bits (i.e., it is IL32/LLP64),
67 * so detect x86 specifically.
68 */
69#if (ULONG_MAX >> 31) <= 0xffffffff && \
70 !defined(__amd64__) && !defined(_M_AMD64)
71# define KECCAK_I32
72#endif
73
74#ifdef KECCAK_I32
75/* A 32-bit target with at best weak support for 64-bit shifts. Maintain a
76 * lane as two 32-bit pieces representing the even and odd bits of the lane.
77 * There are slightly fiddly transformations to apply on the way in and out
78 * of the main permutation.
79 */
80
81typedef keccak1600_lane_i32 lane;
82#define S si32
83
84static lane interlace(kludge64 x)
85{
86 /* Given a 64-bit string X, return a lane Z containing the even- and
87 * odd-numbered bits of X.
88 *
89 * This becomes more manageable if we look at what happens to the bit
90 * indices: bit i of X becomes bit ROR_6(i, 1) of Z. We can effectively
91 * swap two bits of the indices by swapping the object bits where those
92 * index bits differ. Fortunately, this is fairly easy.
93 *
94 * We arrange to swap bits between the two halves of X, rather than within
95 * a half.
96 */
97
98 uint32 x0 = LO64(x), x1 = HI64(x), t;
99 lane z;
100 /* 543210 */
101 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 453210 */
102 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 354210 */
103 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 254310 */
104 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 154320 */
105 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 054321 */
106 z.even = x0; z.odd = x1; return (z);
107}
108
109static kludge64 deinterlace(lane x)
110{
111 /* Given a lane X, return the combined 64-bit value. This is the inverse
112 * to `interlace' above, and the principle is the same
113 */
114
115 uint32 x0 = x.even, x1 = x.odd, t;
116 kludge64 z;
117 /* 054321 */
118 t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 154320 */
119 t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 254310 */
120 t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 354210 */
121 t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 453210 */
122 t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 543210 */
123 SET64(z, x1, x0); return (z);
124}
125
126#define TO_LANE(x) (interlace(x))
127#define FROM_LANE(x) (deinterlace(x))
128
129#define PRINTFMT_LANE "%08lx:%08lx"
130#define PRINTARGS_LANE(x) (unsigned long)(x).even, (unsigned long)(x).odd
131
132#define BINOP_LANE(z, op, x, y) \
133 ((z).even = (x).even op (y).even, (z).odd = (x).odd op (y).odd)
134#define XOR_LANE(z, x, y) BINOP_LANE(z, ^, x, y)
135#define AND_LANE(z, x, y) BINOP_LANE(z, &, x, y)
136#define OR_LANE(z, x, y) BINOP_LANE(z, |, x, y)
137#define NOT_LANE(z, x) ((z).even = ~(x).even, (z).odd = ~(x).odd)
138
139#define ROTL_LANE(z, x, n) do { \
140 lane _t = (x); \
141 (z).even = (n)%2 ? ROL32(_t.odd, ((n) + 1)/2) \
142 : ROL32(_t.even, (n)/2); \
143 (z).odd = (n)%2 ? ROL32(_t.even, ((n) - 1)/2) \
144 : ROL32(_t.odd, (n)/2); \
145} while (0)
146
147#define LANE_ZERO { 0, 0 }
148#define LANE_CMPL { 0xffffffff, 0xffffffff }
149
150static const lane rcon[24] = {
151 { 0x00000001, 0x00000000 }, { 0x00000000, 0x00000089 },
152 { 0x00000000, 0x8000008b }, { 0x00000000, 0x80008080 },
153 { 0x00000001, 0x0000008b }, { 0x00000001, 0x00008000 },
154 { 0x00000001, 0x80008088 }, { 0x00000001, 0x80000082 },
155 { 0x00000000, 0x0000000b }, { 0x00000000, 0x0000000a },
156 { 0x00000001, 0x00008082 }, { 0x00000000, 0x00008003 },
157 { 0x00000001, 0x0000808b }, { 0x00000001, 0x8000000b },
158 { 0x00000001, 0x8000008a }, { 0x00000001, 0x80000081 },
159 { 0x00000000, 0x80000081 }, { 0x00000000, 0x80000008 },
160 { 0x00000000, 0x00000083 }, { 0x00000000, 0x80008003 },
161 { 0x00000001, 0x80008088 }, { 0x00000000, 0x80000088 },
162 { 0x00000001, 0x00008000 }, { 0x00000000, 0x80008082 }
163};
164
165#else
166/* A target with good support for 64-bit shifts. We store lanes as 64-bit
167 * quantities and deal with them in the obvious, natural way.
168 */
169
170typedef keccak1600_lane_64 lane;
171#define S s64
172
173#define TO_LANE(x) (x)
174#define FROM_LANE(x) (x)
175
176#define PRINTFMT_LANE "%08lx%08lx"
177#define PRINTARGS_LANE(x) (unsigned long)HI64(x), (unsigned long)LO64(x)
178
179#define XOR_LANE(z, x, y) XOR64((z), (x), (y))
180#define AND_LANE(z, x, y) AND64((z), (x), (y))
181#define OR_LANE(z, x, y) OR64((z), (x), (y))
182#define NOT_LANE(z, x) CPL64((z), (x))
183#define ROTL_LANE(z, x, n) ROL64_((z), (x), (n))
184
185#define LANE_ZERO X64( 0, 0)
186#define LANE_CMPL X64(ffffffff, ffffffff)
187
188static const lane rcon[24] = {
189 X64(00000000, 00000001), X64(00000000, 00008082),
190 X64(80000000, 0000808a), X64(80000000, 80008000),
191 X64(00000000, 0000808b), X64(00000000, 80000001),
192 X64(80000000, 80008081), X64(80000000, 00008009),
193 X64(00000000, 0000008a), X64(00000000, 00000088),
194 X64(00000000, 80008009), X64(00000000, 8000000a),
195 X64(00000000, 8000808b), X64(80000000, 0000008b),
196 X64(80000000, 00008089), X64(80000000, 00008003),
197 X64(80000000, 00008002), X64(80000000, 00000080),
198 X64(00000000, 0000800a), X64(80000000, 8000000a),
199 X64(80000000, 80008081), X64(80000000, 00008080),
200 X64(00000000, 80000001), X64(80000000, 80008008)
201};
202
203#endif
204
205/*----- Complementing or not ----------------------------------------------*/
206
207/* We should use the complemented representation if the target doesn't have a
208 * fused and-not operation. There doesn't appear to be a principled way to
209 * do this, so we'll just have to make do with a big list. Worse, in my
210 * brief survey of the architecture reference manuals I have lying about,
211 * they've split close to 50/50 on this question, so I don't have an
212 * especially good way to pick a default. The `no-fused-op' architectures
213 * seem generally a bit more modern than the `fused-op' architectures, so I
214 * guess I'll make the complemented representation the default.
215 *
216 * and-not No and-not
217 * ------- ----------
218 * ARM (`bic') x86/amd64
219 * Sparc (`andn') z/Architecture
220 * MMIX (`andn') MIPS
221 * IA64 (`andcm') 68k
222 * VAX (`bic') RISC-V
223 * PDP-10 (`andc')
224 */
225#if !(defined(__arm__) || defined(__thumb__) || defined(__aarch64__) || \
226 defined(_M_ARM) || defined(_M_THUMB)) && \
227 !(defined(__ia64__) || defined(__ia64) || defined(__itanium__) || \
228 defined(_M_IA64)) && \
229 !defined(__mmix__) && \
230 !(defined(__sparc__) || defined(__sparc)) && \
231 !defined(__vax__) && \
232 !defined(__pdp10__)
233# define KECCAK_COMPL
234#endif
235
236#ifdef KECCAK_COMPL
237/* A target without fused and/not (`bic', `andc2'). We complement some of
238 * the lanes in the initial state and undo this on output. (Absorbing XORs
239 * input into the state, so this is unaffected.) See the handling of chi in
240 * `keccak1600_round' below for the details.
241 */
242
243#define STATE_INIT(z) do { \
244 lane cmpl = LANE_CMPL; \
245 (z)->S[I(1, 0)] = cmpl; (z)->S[I(2, 0)] = cmpl; \
246 (z)->S[I(3, 1)] = cmpl; (z)->S[I(2, 2)] = cmpl; \
247 (z)->S[I(2, 3)] = cmpl; (z)->S[I(0, 4)] = cmpl; \
248} while (0)
249
250#define STATE_OUT(z) do { \
251 NOT_LANE((z)->S[I(1, 0)], (z)->S[I(1, 0)]); \
252 NOT_LANE((z)->S[I(2, 0)], (z)->S[I(2, 0)]); \
253 NOT_LANE((z)->S[I(3, 1)], (z)->S[I(3, 1)]); \
254 NOT_LANE((z)->S[I(2, 2)], (z)->S[I(2, 2)]); \
255 NOT_LANE((z)->S[I(2, 3)], (z)->S[I(2, 3)]); \
256 NOT_LANE((z)->S[I(0, 4)], (z)->S[I(0, 4)]); \
257} while (0)
258
259#else
260/* A target with fused and/not (`bic', `andc2'). Everything is simple. */
261
ef1224d4
MW
262#define STATE_INIT(z) do {} while (0)
263#define STATE_OUT(z) do {} while (0)
a1a6042e
MW
264
265#endif
266
267/*----- Other magic constants ---------------------------------------------*/
268
269/* The rotation constants. These are systematically named -- see `THETA_RHO'
270 * below.
271 */
272#define ROT_0_0 0
273#define ROT_1_0 1
274#define ROT_2_0 62
275#define ROT_3_0 28
276#define ROT_4_0 27
277
278#define ROT_0_1 36
279#define ROT_1_1 44
280#define ROT_2_1 6
281#define ROT_3_1 55
282#define ROT_4_1 20
283
284#define ROT_0_2 3
285#define ROT_1_2 10
286#define ROT_2_2 43
287#define ROT_3_2 25
288#define ROT_4_2 39
289
290#define ROT_0_3 41
291#define ROT_1_3 45
292#define ROT_2_3 15
293#define ROT_3_3 21
294#define ROT_4_3 8
295
296#define ROT_0_4 18
297#define ROT_1_4 2
298#define ROT_2_4 61
299#define ROT_3_4 56
300#define ROT_4_4 14
301
302/*----- Debugging ---------------------------------------------------------*/
303
304#ifdef KECCAK_DEBUG
305
306#include <stdio.h>
307
308static void dump_state(const char *what, unsigned ir,
309 const keccak1600_state *x)
310{
311 unsigned i, j;
312 keccak1600_state y;
313 kludge64 a;
314 int sep;
315
316 printf(";; %s [round %u]\n", what, ir);
317 printf(";; raw state...\n");
318 for (j = 0; j < 5; j++) {
319 printf(";;");
320 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
321 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(x->S[I(i, j)]));
322 fputc('\n', stdout);
323 }
324 y = *x; STATE_OUT(&y);
325#ifdef KECCAK_COMPL
326 printf(";; uncomplemented state...\n");
327 for (j = 0; j < 5; j++) {
328 printf(";;");
329 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ')
330 printf("%c" PRINTFMT_LANE, sep, PRINTARGS_LANE(y.S[I(i, j)]));
331 fputc('\n', stdout);
332 }
333#endif
334#ifdef KECCAK_I32
335 printf(";; deinterlaced state...\n");
336 for (j = 0; j < 5; j++) {
337 printf(";;");
338 for (i = 0, sep = '\t'; i < 5; i++, sep = ' ') {
339 a = FROM_LANE(y.S[I(i, j)]);
340 printf("%c%08lx%08lx", sep,
341 (unsigned long)HI64(a), (unsigned long)LO64(a));
342 }
343 fputc('\n', stdout);
344 }
345#endif
346 fputc('\n', stdout);
347}
348
349#endif
350
351/*----- The Keccak-p[1600, n] permutation ---------------------------------*/
352
353static void keccak1600_round(keccak1600_state *z,
354 const keccak1600_state *x, unsigned i)
355{
356 /* Perform a round of Keccak-p[1600, n]. Process the state X and write the
357 * result to Z.
358 */
359
360 lane c[5], d[5], t;
361
362 /* Theta, first step: calculate the column parities. */
363#define COLPARITY(j) do { \
364 d[j] = x->S[I(j, 0)]; \
365 XOR_LANE(d[j], d[j], x->S[I(j, 1)]); \
366 XOR_LANE(d[j], d[j], x->S[I(j, 2)]); \
367 XOR_LANE(d[j], d[j], x->S[I(j, 3)]); \
368 XOR_LANE(d[j], d[j], x->S[I(j, 4)]); \
369} while (0)
370 COLPARITY(0); COLPARITY(1); COLPARITY(2); COLPARITY(3); COLPARITY(4);
371#undef COLPARITY
372
373 /* Theta, second step: calculate the combined effect. */
374 ROTL_LANE(c[0], d[1], 1); XOR_LANE(c[0], c[0], d[4]);
375 ROTL_LANE(c[1], d[2], 1); XOR_LANE(c[1], c[1], d[0]);
376 ROTL_LANE(c[2], d[3], 1); XOR_LANE(c[2], c[2], d[1]);
377 ROTL_LANE(c[3], d[4], 1); XOR_LANE(c[3], c[3], d[2]);
378 ROTL_LANE(c[4], d[0], 1); XOR_LANE(c[4], c[4], d[3]);
379
380 /* Now we work plane by plane through the output. To do this, we must undo
381 * the pi transposition. Pi maps (x', y') = (y, 2 x + 3 y), so y = x', and
382 * x = (y' - 3 y)/2 = 3 (y' - 3 x') = x' + 3 y'.
383 */
384#define THETA_RHO(i0, i1, i2, i3, i4) do { \
385 \
386 /* First, theta. */ \
387 XOR_LANE(d[0], x->S[I(i0, 0)], c[i0]); \
388 XOR_LANE(d[1], x->S[I(i1, 1)], c[i1]); \
389 XOR_LANE(d[2], x->S[I(i2, 2)], c[i2]); \
390 XOR_LANE(d[3], x->S[I(i3, 3)], c[i3]); \
391 XOR_LANE(d[4], x->S[I(i4, 4)], c[i4]); \
392 \
393 /* Then rho. */ \
394 ROTL_LANE(d[0], d[0], ROT_##i0##_0); \
395 ROTL_LANE(d[1], d[1], ROT_##i1##_1); \
396 ROTL_LANE(d[2], d[2], ROT_##i2##_2); \
397 ROTL_LANE(d[3], d[3], ROT_##i3##_3); \
398 ROTL_LANE(d[4], d[4], ROT_##i4##_4); \
399} while (0)
400
401 /* The basic chi operation is: z = w ^ (~a&b), but this involves an
402 * inversion which we can mostly avoid by being clever: observe that
403 *
404 * w ^ (~a&~~b) = w ^ ~(a | ~b) = ~w ^ (a | ~b)
405 *
406 * by De Morgan's law. Furthermore, complementing w or z is basically
407 * equivalent. Bertoni, Daemen, Peeters, Van Assche, and Van Keer, `Keccak
408 * implementation overview', describe a pattern of lane complementation
409 * which propagates through theta and pi in exactly the right way to be
410 * restored easily by chi, here, with exactly one inversion per plane.
411 *
412 * Here's the pattern.
413 *
414 * [ * . * * . ] [ . * * . . ]
415 * [ * . * . . ] [ . . . * . ]
416 * [ * . * . . ] -> [ . . * . . ]
417 * [ . * . * * ] [ . . * . . ]
418 * [ * . . * . ] [ * . . . . ]
419 *
420 * where a `.' means that the lane is unchanged, and a `*' means that it
421 * has been complemented.
422 *
423 * The macros `CHI_wxy_z' calculate z in terms of w, x, y assuming that the
424 * inputs w, x, y marked with a `1' are complemented on input, and arrange
425 * for z to be complemented on output if z is so marked.
426 *
427 * The diagrams to the right show the fragment of the complementation
428 * pattern being handled by the corresponding line of code. A symbol in
429 * brackets indicates a deviation from the input pattern forced by explicit
430 * complementation: there will be exactly one of these for each plane.
431 */
432#ifdef KECCAK_COMPL
433# define CHI_COMPL(z, x) NOT_LANE((z), (x))
434# define CHI_001_1(z, w, x, y) \
435 (OR_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
436# define CHI_010_0(z, w, x, y) \
437 (AND_LANE((z), (x), (y)), XOR_LANE((z), (z), (w)))
438# define CHI_101_0 CHI_001_1
439# define CHI_110_1 CHI_010_0
440#else
441# define CHI(z, w, x, y) \
442 (NOT_LANE((z), (x)), \
443 AND_LANE((z), (z), (y)), \
444 XOR_LANE((z), (z), (w)))
445# define CHI_COMPL(z, x) ((z) = (x))
446# define CHI_001_1 CHI
447# define CHI_010_0 CHI
448# define CHI_101_0 CHI
449# define CHI_110_1 CHI
450#endif
451
452 /* Let's do the y' = 0 plane first. Theta and rho are easy with our macro,
453 * and we've done pi with the coordinate hacking. That leaves chi next.
454 * This is hairy because we must worry about complementation.
455 */
456 THETA_RHO(0, 1, 2, 3, 4);
457 CHI_COMPL(t, d[2]); /* [.] */
458 CHI_101_0(z->S[I(0, 0)], d[0], d[1], d[2]); /* * . * -> . */
459 CHI_001_1(z->S[I(1, 0)], d[1], t, d[3]); /* . [.] * -> * */
460 CHI_110_1(z->S[I(2, 0)], d[2], d[3], d[4]); /* * * . -> * */
461 CHI_101_0(z->S[I(3, 0)], d[3], d[4], d[0]); /* * * . -> . */
462 CHI_010_0(z->S[I(4, 0)], d[4], d[0], d[1]); /* * . . -> . */
463
464 /* We'd better do iota before we forget. */
465 XOR_LANE(z->S[I(0, 0)], z->S[I(0, 0)], rcon[i]);
466
467 /* That was fun. Maybe y' = 1 will be as good. */
468 THETA_RHO(3, 4, 0, 1, 2);
469 CHI_COMPL(t, d[4]); /* [*] */
470 CHI_101_0(z->S[I(0, 1)], d[0], d[1], d[2]); /* * . * -> . */
471 CHI_010_0(z->S[I(1, 1)], d[1], d[2], d[3]); /* . * . -> . */
472 CHI_101_0(z->S[I(2, 1)], d[2], d[3], t); /* * . [*] -> . */
473 CHI_001_1(z->S[I(3, 1)], d[3], d[4], d[0]); /* * . . -> * */
474 CHI_010_0(z->S[I(4, 1)], d[4], d[0], d[1]); /* * . . -> . */
475
476 /* We're getting the hang of this. The y' = 2 plane shouldn't be any
477 * trouble.
478 */
479 THETA_RHO(1, 2, 3, 4, 0);
480 CHI_COMPL(t, d[3]); /* [*] */
481 CHI_101_0(z->S[I(0, 2)], d[0], d[1], d[2]); /* * . * -> . */
482 CHI_010_0(z->S[I(1, 2)], d[1], d[2], d[3]); /* . * . -> . */
483 CHI_110_1(z->S[I(2, 2)], d[2], t, d[4]); /* * [*] . -> * */
484 CHI_101_0(z->S[I(3, 2)], t, d[4], d[0]); /* * [*] . -> . */
485 CHI_010_0(z->S[I(4, 2)], d[4], d[0], d[1]); /* * . . -> . */
486
487 /* This isn't as interesting any more. Let's do y' = 3 before boredom sets
488 * in.
489 */
490 THETA_RHO(4, 0, 1, 2, 3);
491 CHI_COMPL(t, d[3]); /* [.] */
492 CHI_010_0(z->S[I(0, 3)], d[0], d[1], d[2]); /* . * . -> . */
493 CHI_101_0(z->S[I(1, 3)], d[1], d[2], d[3]); /* * . * -> . */
494 CHI_001_1(z->S[I(2, 3)], d[2], t, d[4]); /* . [.] * -> * */
495 CHI_010_0(z->S[I(3, 3)], t, d[4], d[0]); /* . [.] * -> . */
496 CHI_101_0(z->S[I(4, 3)], d[4], d[0], d[1]); /* . * * -> . */
497
498 /* Last plane. Just y' = 4 to go. */
499 THETA_RHO(2, 3, 4, 0, 1);
500 CHI_COMPL(t, d[1]); /* [*] */
501 CHI_110_1(z->S[I(0, 4)], d[0], t, d[2]); /* * [*] . -> * */
502 CHI_101_0(z->S[I(1, 4)], t, d[2], d[3]); /* [*] . * -> . */
503 CHI_010_0(z->S[I(2, 4)], d[2], d[3], d[4]); /* . * . -> . */
504 CHI_101_0(z->S[I(3, 4)], d[3], d[4], d[0]); /* * * . -> . */
505 CHI_010_0(z->S[I(4, 4)], d[4], d[0], d[1]); /* * . . -> . */
506
507 /* And we're done. */
508#undef THETA_RHO
509#undef CHI_COMPL
510#undef CHI_001_1
511#undef CHI_010_0
512#undef CHI_101_0
513#undef CHI_110_1
514#undef CHI
515}
516
517/* --- @keccak1600_p@ --- *
518 *
519 * Arguments: @keccak1600_state *z@ = where to write the output state
520 * @conts keccak1600_state *x@ = input state
521 * @unsigned n@ = number of rounds to perform
522 *
523 * Returns: ---
524 *
525 * Use: Implements the %$\Keccak[1600, n]$% permutation at the core
526 * of Keccak and the SHA-3 standard.
527 */
528
529void keccak1600_p(keccak1600_state *z, const keccak1600_state *x, unsigned n)
530{
531 keccak1600_state u, v;
532 unsigned i = 0;
533
534#ifdef KECCAK_DEBUG
535 dump_state("init", 0, x);
536#endif
537 keccak1600_round(&u, x, i++); n--;
538 while (n > 8) {
539 keccak1600_round(&v, &u, i++);
540 keccak1600_round(&u, &v, i++);
541 keccak1600_round(&v, &u, i++);
542 keccak1600_round(&u, &v, i++);
543 keccak1600_round(&v, &u, i++);
544 keccak1600_round(&u, &v, i++);
545 keccak1600_round(&v, &u, i++);
546 keccak1600_round(&u, &v, i++);
547 n -= 8;
548 }
549 switch (n) {
550 case 7: keccak1600_round(&v, &u, i++);
551 keccak1600_round(&u, &v, i++);
552 case 5: keccak1600_round(&v, &u, i++);
553 keccak1600_round(&u, &v, i++);
554 case 3: keccak1600_round(&v, &u, i++);
555 keccak1600_round(&u, &v, i++);
556 case 1: keccak1600_round( z, &u, i++);
557 break;
558 case 8: keccak1600_round(&v, &u, i++);
559 keccak1600_round(&u, &v, i++);
560 case 6: keccak1600_round(&v, &u, i++);
561 keccak1600_round(&u, &v, i++);
562 case 4: keccak1600_round(&v, &u, i++);
563 keccak1600_round(&u, &v, i++);
564 case 2: keccak1600_round(&v, &u, i++);
565 keccak1600_round( z, &v, i++);
566 break;
567 }
568#ifdef KECCAK_DEBUG
569 dump_state("final", 0, z);
570#endif
571}
572
573/* --- @keccack1600_init@ --- *
574 *
575 * Arguments: @keccak1600_state *s@ = a state to initialize
576 *
577 * Returns: ---
578 *
579 * Use: Initialize @s@ to the root state.
580 */
581
582void keccak1600_init(keccak1600_state *s)
583 { memset(s->S, 0, sizeof(s->S)); STATE_INIT(s); }
584
585/* --- @keccak1600_mix@ --- *
586 *
587 * Arguments: @keccak1600_state *s@ = a state to update
588 * @const kludge64 *p@ = pointer to 64-bit words to mix in
589 * @size_t n@ = size of the input, in 64-bit words
590 *
591 * Returns: ---
592 *
593 * Use: Mixes data into a %$\Keccak[r, 1600 - r]$% state. Note that
594 * it's the caller's responsibility to pass in no more than
595 * %$r$% bits of data.
596 */
597
598void keccak1600_mix(keccak1600_state *s, const kludge64 *p, size_t n)
599{
600 unsigned i;
601 lane a;
602
603 for (i = 0; i < n; i++)
604 { a = TO_LANE(p[i]); XOR_LANE(s->S[i], s->S[i], a); }
605}
606
607/* --- @keccak1600_extract@ --- *
608 *
609 * Arguments: @const keccak1600_state *s@ = a state to extract output from
610 * @kludge64 *p@ = pointer to 64-bit words to write
611 * @size_t n@ = size of the output, in 64-bit words
612 *
613 * Returns: ---
614 *
615 * Use: Reads output from a %$\Keccak[r, 1600 - r]$% state. Note
616 * that it's the caller's responsibility to extract no more than
617 * %$r$% bits of data.
618 */
619
620void keccak1600_extract(const keccak1600_state *s, kludge64 *p, size_t n)
621{
622 unsigned i;
623 keccak1600_state t;
624
625 t = *s; STATE_OUT(&t);
626 for (i = 0; i < n; i++) p[i] = FROM_LANE(t.S[i]);
627}
628
629/*----- That's all, folks -------------------------------------------------*/