symm/gcm.c: Make `gcm_mktable' and `gcm_mulk_...' be CPU-dependent.
[catacomb] / symm / gcm.c
1 /* -*-c-*-
2 *
3 * The GCM authenticated encryption mode
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software: you can redistribute it and/or modify it
13 * under the terms of the GNU Library General Public License as published
14 * by the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb. If not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
25 * USA.
26 */
27
28 /*----- Header files ------------------------------------------------------*/
29
30 #include "config.h"
31
32 #include <stdio.h>
33
34 #include <mLib/bits.h>
35
36 #include "dispatch.h"
37 #include "gcm.h"
38 #include "gcm-def.h"
39
40 /*----- Overall strategy --------------------------------------------------*
41 *
42 * GCM is pretty awful to implement in software. (This presentation is going
43 * to be somewhat different to that in the specification, but I think it
44 * makes more sense like this.)
45 *
46 * We're given a %$w$%-bit blockcipher %$E$% with a key %$K$%.
47 *
48 * The main part is arithmetic in the finite field %$k = \gf{2^w}$%, which we
49 * represent as the quotient ring %$\gf{2}[t]/(p_w(t))$% for some irreducible
50 * degree-%$w$% polynomial %$p(t)$%, whose precise value isn't very important
51 * right now. We choose a secret point %$x = E_K(0^w)$%.
52 *
53 * We choose a length size %$z$% as follows: if %$w < 96%$ then %$z = w$%;
54 * otherwise %$z = w/2$%. Format a message pair as follows:
55 *
56 * %$F(a, b) = P_w(a) \cat P_w(b) \cat [\ell(a)]_z \cat [\ell(b)]_z$%
57 *
58 * where %$P_w(x) = x \cat 0^n$% where $%0 \le n < w$% such that
59 * %$\ell(x) + n \equiv 0 \pmod{w}$%.
60 *
61 * Hash a (block-aligned) message %$u$% as follows. First, split %$u$% into
62 * %$w$%-bit blocks %$u_0$%, %$u_1$%, %%\ldots%%, %$u_{n-1}$%. Interpret
63 * these as elements of %$k$%. Then
64 *
65 * %$G_x(u) = u_0 t^n + u_1 t^{n-1} + \cdots + u_{n-1} t$%
66 *
67 * converted back to a %$w$%-bit string.
68 *
69 * We're ready to go now. Suppose we're to encrypt a message %$M$% with
70 * header %$H$% and nonce %$N$%. If %$\ell(N) + 32 = w$% then let
71 * %$N' = N$% and let %$i_0 = 1$%; otherwise, let %$U = G_t(F(\epsilon, N))$%
72 * and split this into %$N' = U[0 \bitsto w - 32]$% and
73 * %$[i_0]_{32} = U[w - 32 \bitsto w]$%.
74 *
75 * Let %$n = \lceil \ell(M)/w \rceil$%. Compute
76 *
77 * %$y_j = E_K(N' \cat [i_0 + j]_{32})$%
78 *
79 * for %$0 \le j \le n$%. Let
80 *
81 * %$s = (y_1 \cat y_2 \cat \cdots \cat y_n)[0 \bitsto \ell(M)$%
82 *
83 * Let %$C = M \xor s$% and let %$T = G_x(F(H, C)) \xor y_0$%. These are the
84 * ciphertext and tag respectively.
85 *
86 * So why is this awful?
87 *
88 * For one thing, the bits are in a completely terrible order. The bytes are
89 * arranged in little-endian order, so the unit coefficient is in the first
90 * byte, and the degree-127 coefficient is in the last byte. But within each
91 * byte, the lowest-degree coefficient is in the most significant bit. It's
92 * therefore better to think of GCM as using a big-endian byte-ordering
93 * convention, but with the bits backwards.
94 *
95 * But messing about with byte ordering is expensive, so let's not do that in
96 * the inner loop. But multiplication in %$k$% is not easy either. Some
97 * kind of precomputed table would be nice, but that will leak secrets
98 * through the cache.
99 *
100 * I choose a particularly simple table: given %$x$%, let %$X[i'] = x t^i$%.
101 * Then $%$x y = \sum_{0\le i<w} y_i X[i']$% which is just a bunch of
102 * bitmasking. But the natural order for examining bits of %$y$% is not
103 * necessarily the obvious one. We'll have already loaded %$y$% into
104 * internal form, as 32-bit words. The good order to process these is left
105 * to right, from high to low bits. But now the order of degrees depends on
106 * the endianness of our conversion of bytes to words. Oh, well.
107 *
108 * If we've adopted a big-endian convention, then we'll see the degrees in
109 * order, 0, 1, ..., all the way up to %$w - 1$% and everything is fine. If
110 * we've adopted a little-endian convention, though, we'll see an ordering
111 * like this:
112 *
113 * 24, 25, ..., 31, 16, 17, ..., 23, 8, 9, ..., 15, 0, 1, ..., 7,
114 * 56, 57, ..., 63, 48, 49, ..., 55, 40, 41, ..., 47, 32, 33, ..., 39,
115 * etc.
116 *
117 * which is the ordinary order with 0x18 = 24 XORed into the index. That is,
118 * %$i' = i$% if we've adopted a big-endian convention, and
119 * %$i' = i \xor 24$% if we've adopted a little-endian convention.
120 */
121
122 /*----- Low-level utilities -----------------------------------------------*/
123
124 /* --- @mult@ --- *
125 *
126 * Arguments: @const gcm_params *p@ = pointer to the parameters
127 * @uint32 *z@ = where to write the result
128 * @const uint32 *x@ = input field element
129 *
130 * Returns: ---
131 *
132 * Use: Multiply the input field element by %$t$%, and write the
133 * product to @z@. It's safe for @x@ and @z@ to be equal, but
134 * they should not otherwise overlap. Both input and output are
135 * in big-endian form, i.e., with the lowest-degree coefficients
136 * in the most significant bits.
137 */
138
139 static void mult(const gcm_params *p, uint32 *z, const uint32 *x)
140 {
141 uint32 m, c, t;
142 unsigned i;
143
144 t = x[p->n - 1]; m = -(t&1u); c = m&p->poly;
145 for (i = 0; i < p->n; i++) { t = x[i]; z[i] = (t >> 1) ^ c; c = t << 31; }
146 }
147
148 /* --- @mul@ --- *
149 *
150 * Arguments: @const gcm_params *p@ = pointer to the parameters
151 * @uint32 *z@ = where to write the result
152 * @const uint32 *x, *y@ = input field elements
153 *
154 * Returns: ---
155 *
156 * Use: Multiply the input field elements together, and write the
157 * product to @z@. It's safe for the operands to overlap. Both
158 * inputs and the output are in big-endian form, i.e., with the
159 * lowest-degree coefficients in the most significant bits.
160 */
161
162 static void mul(const gcm_params *p, uint32 *z,
163 const uint32 *x, const uint32 *y)
164 {
165 uint32 m, t, u[GCM_NMAX], v[GCM_NMAX];
166 unsigned i, j, k;
167
168 /* We can't do this in-place at all, so use temporary space. Make a copy
169 * of @x@ in @u@, where we can clobber it, and build the product in @v@.
170 */
171 for (i = 0; i < p->n; i++) { u[i] = x[i]; v[i] = 0; }
172
173 /* Repeatedly multiply @x@ (in @u@) by %$t$%, and add together those
174 * %$x t^i$% selected by the bits of @y@. This is basically what you get
175 * by streaming the result of @gcm_mktable@ into @gcm_mulk_...@.
176 */
177 for (i = 0; i < p->n; i++) {
178 t = y[i];
179 for (j = 0; j < 32; j++) {
180 m = -((t >> 31)&1u);
181 for (k = 0; k < p->n; k++) v[k] ^= u[k]&m;
182 mult(p, u, u); t <<= 1;
183 }
184 }
185
186 /* Write out the result now that it's ready. */
187 for (i = 0; i < p->n; i++) z[i] = v[i];
188 }
189
190 /*----- Table-based multiplication ----------------------------------------*/
191
192 /* --- @gcm_mktable@ --- *
193 *
194 * Arguments: @const gcm_params *p@ = pointer to the parameters
195 * @uint32 *ktab@ = where to write the table; there must be
196 * space for %$32 n$% $%n$%-word entries, i.e.,
197 * %$32 n^2$% 32-bit words in total, where %$n$% is
198 * @p->n@, the block size in words
199 * @const uint32 *k@ = input field element
200 *
201 * Returns: ---
202 *
203 * Use: Construct a table for use by @gcm_mulk_...@ below, to
204 * multiply (vaguely) efficiently by @k@.
205 */
206
207 static void simple_mktable(const gcm_params *p,
208 uint32 *ktab, const uint32 *k)
209 {
210 unsigned m = (p->f&GCMF_SWAP ? 0x18 : 0);
211 unsigned i, j, o = m*p->n;
212
213 /* As described above, the table stores entries %$K[i \xor m] = k t^i$%,
214 * where %$m = 0$% (big-endian cipher) or %$m = 24$% (little-endian).
215 * The first job is to store %$K[m] = k$%.
216 *
217 * We initially build the table with the entries in big-endian order, and
218 * then swap them if necessary. This makes the arithmetic functions more
219 * amenable for use by @gcm_concat@ below.
220 */
221 if (!(p->f&GCMF_SWAP)) for (i = 0; i < p->n; i++) ktab[o + i] = k[i];
222 else for (i = 0; i < p->n; i++) ktab[o + i] = ENDSWAP32(k[i]);
223
224 /* Fill in the rest of the table by repeatedly multiplying the previous
225 * entry by %$t$%.
226 */
227 for (i = 1; i < 32*p->n; i++)
228 { j = (i ^ m)*p->n; mult(p, ktab + j, ktab + o); o = j; }
229
230 /* Finally, if the cipher uses a little-endian convention, then swap all of
231 * the individual words.
232 */
233 if (p->f&GCMF_SWAP)
234 for (i = 0; i < 32*p->n*p->n; i++) ktab[i] = ENDSWAP32(ktab[i]);
235 }
236
237 CPU_DISPATCH(EMPTY, EMPTY, void, gcm_mktable,
238 (const gcm_params *p, uint32 *ktab, const uint32 *k),
239 (p, ktab, k),
240 pick_mktable, simple_mktable)
241
242 static gcm_mktable__functype *pick_mktable(void)
243 {
244 DISPATCH_PICK_FALLBACK(gcm_mktable, simple_mktable);
245 }
246
247 /* --- @recover_k@ --- *
248 *
249 * Arguments: @const gcm_params *p@ = pointer to the parameters
250 * @uint32 *k@ = block-sized vector in which to store %$k$%
251 * @const uint32 *ktab@ = the table encoding %$k$%
252 *
253 * Returns: ---
254 *
255 * Use: Recovers %$k$%, the secret from which @ktab@ was by
256 * @gcm_mktable@, from the table, and stores it in internal
257 * (big-endian) form in @k@.
258 */
259
260 static void simple_recover_k(const gcm_params *p,
261 uint32 *k, const uint32 *ktab)
262 {
263 unsigned i;
264
265 /* If the blockcipher is big-endian, then the key is simply in the first
266 * table element, in the right format. If the blockcipher is little-endian
267 * then it's in element 24, and the bytes need swapping.
268 */
269
270 if (!(p->f&GCMF_SWAP)) for (i = 0; i < p->n; i++) k[i] = ktab[i];
271 else for (i = 0; i < p->n; i++) k[i] = ENDSWAP32(ktab[24*p->n + i]);
272 }
273
274 CPU_DISPATCH(static, EMPTY, void, recover_k,
275 (const gcm_params *p, uint32 *k, const uint32 *ktab),
276 (p, k, ktab),
277 pick_recover_k, simple_recover_k)
278
279 static recover_k__functype *pick_recover_k(void)
280 { DISPATCH_PICK_FALLBACK(recover_k, simple_recover_k); }
281
282 /* --- @gcm_mulk_N{b,l}@ --- *
283 *
284 * Arguments: @uint32 *a@ = accumulator to multiply
285 * @const uint32 *ktab@ = table constructed by @gcm_mktable@
286 *
287 * Returns: ---
288 *
289 * Use: Multiply @a@ by @k@ (implicitly represented in @ktab@),
290 * updating @a@ in-place. There are separate functions for each
291 * supported block size and endianness because this is the
292 * function whose performance actually matters.
293 */
294
295 #define DEF_MULK(nbits) \
296 \
297 CPU_DISPATCH(EMPTY, EMPTY, void, gcm_mulk_##nbits##b, \
298 (uint32 *a, const uint32 *ktab), (a, ktab), \
299 pick_mulk_##nbits##b, simple_mulk_##nbits) \
300 CPU_DISPATCH(EMPTY, EMPTY, void, gcm_mulk_##nbits##l, \
301 (uint32 *a, const uint32 *ktab), (a, ktab), \
302 pick_mulk_##nbits##l, simple_mulk_##nbits) \
303 \
304 static void simple_mulk_##nbits(uint32 *a, const uint32 *ktab) \
305 { \
306 uint32 m, t; \
307 uint32 z[nbits/32]; \
308 unsigned i, j, k; \
309 \
310 for (i = 0; i < nbits/32; i++) z[i] = 0; \
311 \
312 for (i = 0; i < nbits/32; i++) { \
313 t = a[i]; \
314 for (j = 0; j < 32; j++) { \
315 m = -((t >> 31)&1u); \
316 for (k = 0; k < nbits/32; k++) z[k] ^= *ktab++&m; \
317 t <<= 1; \
318 } \
319 } \
320 \
321 for (i = 0; i < nbits/32; i++) a[i] = z[i]; \
322 } \
323 \
324 static gcm_mulk_##nbits##b##__functype *pick_mulk_##nbits##b(void) \
325 { DISPATCH_PICK_FALLBACK(gcm_mulk_##nbits##b, simple_mulk_##nbits); } \
326 static gcm_mulk_##nbits##l##__functype *pick_mulk_##nbits##l(void) \
327 { DISPATCH_PICK_FALLBACK(gcm_mulk_##nbits##l, simple_mulk_##nbits); }
328
329 GCM_WIDTHS(DEF_MULK)
330
331 #define GCM_MULK_CASE(nbits) \
332 case nbits/32: \
333 if (_f&GCMF_SWAP) gcm_mulk_##nbits##l(_a, _ktab); \
334 else gcm_mulk_##nbits##b(_a, _ktab); \
335 break;
336 #define MULK(n, f, a, ktab) do { \
337 uint32 *_a = (a); const uint32 *_ktab = (ktab); \
338 unsigned _f = (f); \
339 switch (n) { \
340 GCM_WIDTHS(GCM_MULK_CASE) \
341 default: abort(); \
342 } \
343 } while (0)
344
345 /*----- Other utilities ---------------------------------------------------*/
346
347 /* --- @putlen@ --- *
348 *
349 * Arguments: @octet *p@ = pointer to output buffer
350 * @unsigned w@ = size of output buffer
351 * @unsigned blksz@ = block size (assumed fairly small)
352 * @unsigned long nblocks@ = number of blocks
353 * @unsigned nbytes@ = tail size in bytes (assumed small)
354 *
355 * Returns: ---
356 *
357 * Use: Store the overall length in %$\emph{bits}$% (i.e.,
358 * @3*(nblocks*blksz + nbytes)@ in big-endian form in the
359 * buffer @p@.
360 */
361
362 static void putlen(octet *p, unsigned w, unsigned blksz,
363 unsigned long nblocks, unsigned nbytes)
364 {
365 unsigned long nblo = nblocks&((1ul << (ULONG_BITS/2)) - 1),
366 nbhi = nblocks >> ULONG_BITS/2;
367 unsigned long nlo = nblo*blksz + nbytes, nhi = nbhi*blksz;
368
369 /* This is fiddly. Split @nblocks@, which is the big number, into high and
370 * low halves, multiply those separately by @blksz@, propagate carries, and
371 * then multiply by eight.
372 */
373 nhi += nlo >> ULONG_BITS/2;
374 nlo &= (1ul << (ULONG_BITS/2)) - 1;
375 nlo <<= 3;
376
377 /* Now write out the size, feeding bits in from @nhi@ as necessary. */
378 p += w;
379 while (w--) {
380 *--p = U8(nlo);
381 nlo = (nlo >> 8) | ((nhi&0xff) << (ULONG_BITS/2 - 5));
382 nhi >>= 8;
383 }
384 }
385
386 /* --- @mix@ --- *
387 *
388 * Arguments: @const gcm_params *p@ = pointer to the parameters
389 * @uint32 *a@ = GHASH accumulator
390 * @const octet *q@ = pointer to an input block
391 * @const uint32 *ktab@ = multiplication table, built by
392 * @gcm_mktable@
393 *
394 * Returns: ---
395 *
396 * Use: Fold the block @q@ into the GHASH accumulator. The
397 * calculation is %$a' = k (a + q)$%.
398 */
399
400 static void mix(const gcm_params *p, uint32 *a,
401 const octet *q, const uint32 *ktab)
402 {
403 unsigned i;
404
405 if (p->f&GCMF_SWAP)
406 for (i = 0; i < p->n; i++) { a[i] ^= LOAD32_L(q); q += 4; }
407 else
408 for (i = 0; i < p->n; i++) { a[i] ^= LOAD32_B(q); q += 4; }
409 MULK(p->n, p->f, a, ktab);
410 }
411
412 /* --- @gcm_ghashdone@ --- *
413 *
414 * Arguments: @const gcm_params *p@ = pointer to the parameters
415 * @uint32 *a@ = GHASH accumulator
416 * @const uint32 *ktab@ = multiplication table, built by
417 * @gcm_mktable@
418 * @unsigned long xblocks, yblocks@ = number of whole blocks in
419 * the two inputs
420 * @unsigned xbytes, ybytes@ = number of trailing bytes in the
421 * two inputs
422 *
423 * Returns: ---
424 *
425 * Use: Finishes a GHASH operation by appending the appropriately
426 * encoded lengths of the two constituent messages.
427 */
428
429 void gcm_ghashdone(const gcm_params *p, uint32 *a, const uint32 *ktab,
430 unsigned long xblocks, unsigned xbytes,
431 unsigned long yblocks, unsigned ybytes)
432 {
433 octet b[4*GCM_NMAX];
434 unsigned w = p->n < 3 ? 4*p->n : 2*p->n;
435
436 /* Construct the encoded lengths. Note that smaller-block versions of GCM
437 * encode the lengths in separate blocks. GCM is only officially defined
438 * for 64- and 128-bit blocks; I've placed the cutoff somewhat arbitrarily
439 * at 96 bits.
440 */
441 putlen(b, w, 4*p->n, xblocks, xbytes);
442 putlen(b + w, w, 4*p->n, yblocks, ybytes);
443
444 /* Feed the lengths into the accumulator. */
445 mix(p, a, b, ktab);
446 if (p->n < 3) mix(p, a, b + w, ktab);
447 }
448
449 /* --- @gcm_concat@ --- *
450 *
451 * Arguments: @const gcm_params *p@ = pointer to the parameters
452 * @uint32 *z@ = GHASH accumulator for suffix, updated
453 * @const uint32 *x@ = GHASH accumulator for prefix
454 * @const uint32 *ktab@ = multiplication table, built by
455 * @gcm_mktable@
456 * @unsigned long n@ = length of suffix in whole blocks
457 *
458 * Returns: ---
459 *
460 * Use: On entry, @x@ and @z@ are the results of hashing two strings
461 * %$a$% and %$b$%, each a whole number of blocks long; in
462 * particular, %$b$% is @n@ blocks long. On exit, @z@ is
463 * updated to be the hash of %$a \cat b$%.
464 */
465
466 void gcm_concat(const gcm_params *p, uint32 *z, const uint32 *x,
467 const uint32 *ktab, unsigned long n)
468 {
469 uint32 t[GCM_NMAX], u[GCM_NMAX];
470 unsigned i, j;
471
472 if (!n) {
473 /* If @n@ is zero, then there's not much to do. The mathematics
474 * (explained below) still works, but the code takes a shortcut which
475 * doesn't handle this case: so set %$z' = z + x k^n = z + x$%.
476 */
477
478 for (j = 0; j < p->n; j++) z[j] ^= x[j];
479 } else {
480 /* We have %$x = a_0 t^m + \cdots + a_{m-2} t^2 + a_{m-1} t$% and
481 * %$z = b_0 t^n + \cdots + b_{n-2} t^2 + b_{n-1} t$%. What we'd like is
482 * the hash of %$a \cat b$%, which is %$z + x k^n$%.
483 *
484 * The first job, then, is to calculate %$k^n$%, and for this we use a
485 * simple left-to-right square-and-multiply algorithm. There's no need
486 * to keep %$n$% secret here.
487 */
488
489 /* Start by retrieving %$k$% from the table, and convert it to big-endian
490 * form.
491 */
492 recover_k(p, u, ktab);
493
494 /* Now calculate %$k^n$%. */
495 i = ULONG_BITS;
496 #define BIT (1ul << (ULONG_BITS - 1))
497 while (!(n&BIT)) { n <<= 1; i--; }
498 n <<= 1; i--; for (j = 0; j < p->n; j++) t[j] = u[j];
499 while (i--) { mul(p, t, t, t); if (n&BIT) mul(p, t, t, u); n <<= 1; }
500 #undef BIT
501
502 /* Next, calculate %$x k^n$%. If we're using a little-endian convention
503 * then we must convert %$x$%; otherwise we can just use it in place.
504 */
505 if (!(p->f&GCMF_SWAP))
506 mul(p, t, t, x);
507 else {
508 for (j = 0; j < p->n; j++) u[j] = ENDSWAP32(x[j]);
509 mul(p, t, t, u);
510 }
511
512 /* Finally, add %$x k^n$% onto %$z$%, converting back to little-endian if
513 * necessary.
514 */
515 if (!(p->f&GCMF_SWAP)) for (j = 0; j < p->n; j++) z[j] ^= t[j];
516 else for (j = 0; j < p->n; j++) z[j] ^= ENDSWAP32(t[j]);
517 }
518 }
519
520 /*----- Test rig ----------------------------------------------------------*/
521
522 #ifdef TEST_RIG
523
524 #include <mLib/quis.h>
525 #include <mLib/testrig.h>
526
527 static void report_failure(const char *test, unsigned nbits,
528 const char *ref, dstr v[], dstr *d)
529 {
530 printf("test %s failed (nbits = %u)", test, nbits);
531 printf("\n\tx = "); type_hex.dump(&v[0], stdout);
532 printf("\n\ty = "); type_hex.dump(&v[1], stdout);
533 printf("\n\tz = "); type_hex.dump(&v[2], stdout);
534 printf("\n\t%s' = ", ref); type_hex.dump(d, stdout);
535 putchar('\n');
536 }
537
538 static void mulk(unsigned nbits, unsigned f, uint32 *x, const uint32 *ktab)
539 { MULK(nbits/32, f, x, ktab); }
540
541 static int test_mul(uint32 poly, dstr v[])
542 {
543 uint32 x[GCM_NMAX], y[GCM_NMAX], z[GCM_NMAX], ktab[32*GCM_NMAX*GCM_NMAX];
544 gcm_params p;
545 dstr d = DSTR_INIT;
546 unsigned i, nbits;
547 int ok = 1;
548 enum { I_x, I_y, I_z };
549
550 nbits = 8*v[0].len; p.f = 0; p.n = nbits/32; p.poly = poly;
551 dstr_ensure(&d, nbits/8); d.len = nbits/8;
552
553 #define LOADXY(E) do { \
554 for (i = 0; i < nbits/32; i++) { \
555 x[i] = LOAD32_##E(v[I_x].buf + 4*i); \
556 y[i] = LOAD32_##E(v[I_y].buf + 4*i); \
557 } \
558 } while (0)
559
560 #define INITZ(x) do { \
561 for (i = 0; i < nbits/32; i++) z[i] = (x)[i]; \
562 } while (0)
563
564 #define CHECK(E, what, ref) do { \
565 for (i = 0; i < nbits/32; i++) STORE32_##E(d.buf + 4*i, z[i]); \
566 if (memcmp(d.buf, v[I_##ref].buf, nbits/8) != 0) \
567 { ok = 0; report_failure(what, nbits, #ref, v, &d); } \
568 } while (0)
569
570 #define TEST_PREP_1(E, x, y, what) do { \
571 gcm_mktable(&p, ktab, y); \
572 recover_k(&p, z, ktab); CHECK(B, "mktable/recover_k (" #y ")", y); \
573 INITZ(x); mulk(nbits, p.f, z, ktab); CHECK(E, what " (k = " #y ")", z); \
574 } while (0)
575
576 #define TEST_PREP(E, what) do { \
577 TEST_PREP_1(E, x, y, what); \
578 TEST_PREP_1(E, y, x, what); \
579 } while (0)
580
581 /* First, test plain multiply. */
582 LOADXY(B); mul(&p, z, x, y); CHECK(B, "gcm_mul", z);
583
584 /* Next, test big-endian prepared key. */
585 LOADXY(B); TEST_PREP(B, "gcm_kmul_b");
586
587 /* Finally, test little-endian prepared key. */
588 p.f = GCMF_SWAP; LOADXY(L);
589 TEST_PREP(L, "gcm_kmul_l");
590
591 #undef LOADXY
592 #undef INITZ
593 #undef CHECK
594 #undef TEST_PREP_1
595 #undef TEST_PREP
596
597 /* All done. */
598 return (ok);
599 }
600
601 #define TEST(nbits) \
602 static int test_mul_##nbits(dstr v[]) \
603 { return (test_mul(GCM_POLY_##nbits, v)); }
604 GCM_WIDTHS(TEST)
605 #undef TEST
606
607 static test_chunk defs[] = {
608 #define TEST(nbits) \
609 { "gcm-mul" #nbits, test_mul_##nbits, \
610 { &type_hex, &type_hex, &type_hex, 0 } },
611 GCM_WIDTHS(TEST)
612 #undef TEST
613 { 0, 0, { 0 } }
614 };
615
616 int main(int argc, char *argv[])
617 {
618 ego(argv[0]);
619 test_run(argc, argv, defs, SRCDIR"/t/gcm");
620 return (0);
621 }
622
623 #endif
624
625 /*----- That's all, folks -------------------------------------------------*/