progs/perftest.c: Use from Glibc syscall numbers.
[catacomb] / symm / poly1305.c
CommitLineData
57496a50
MW
1/* -*-c-*-
2 *
3 * Poly1305 message authentication code
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include "config.h"
31
32#include <assert.h>
33#include <string.h>
34
35#include "poly1305.h"
6a0eb244 36#include "rsvr.h"
57496a50
MW
37
38/*----- Global variables --------------------------------------------------*/
39
40const octet poly1305_keysz[] = { KSZ_SET, 16, 0 };
41
42/*----- Low-level implementation for 32/64-bit targets --------------------*/
43
44#if !defined(POLY1305_IMPL) && defined(HAVE_UINT64)
45# define POLY1305_IMPL 26
46#endif
47
48#if POLY1305_IMPL == 26
49
50/* Elements x of GF(2^130 - 5) are represented by five integers x_i: x =
51 * SUM_{0<=i<5} x_i 2^{26i}.
52 *
53 * Not all elements are represented canonically. We have 0 <= r_i, s_i <
54 * 2^26 by construction. We maintain 0 <= h_i < 2^27. When we read a
55 * message block m, we have 0 <= m_i < 2^26 by construction again. When we
56 * update the hash state, we calculate h' = r (h + m). Addition is done
57 * componentwise; let t = h + m, and we will have 0 <= t_i < 3*2^26.
58 */
59typedef uint32 felt[5];
60#define M26 0x03ffffff
61#define P p26
62
63/* Convert 32-bit words into field-element pieces. */
8c3c0886
MW
64#define P26W0(x) (((x##0) << 0)&0x03ffffff)
65#define P26W1(x) ((((x##1) << 6)&0x03ffffc0) | (((x##0) >> 26)&0x0000003f))
66#define P26W2(x) ((((x##2) << 12)&0x03ffffff) | (((x##1) >> 20)&0x00000fff))
67#define P26W3(x) ((((x##3) << 18)&0x03fc0000) | (((x##2) >> 14)&0x0003ffff))
6fb4ecfb 68#define P26W4(x) (((x##3) >> 8)&0x00ffffff)
57496a50
MW
69
70/* Propagate carries in parallel. If 0 <= u_i < 2^26 c_i, then we shall have
71 * 0 <= v_0 < 2^26 + 5 c_4, and 0 <= v_i < 2^26 + c_{i-1} for 1 <= i < 5.
72 */
73#define CARRY_REDUCE(v, u) do { \
74 (v##0) = ((u##0)&M26) + 5*((u##4) >> 26); \
75 (v##1) = ((u##1)&M26) + ((u##0) >> 26); \
76 (v##2) = ((u##2)&M26) + ((u##1) >> 26); \
77 (v##3) = ((u##3)&M26) + ((u##2) >> 26); \
78 (v##4) = ((u##4)&M26) + ((u##3) >> 26); \
79} while (0)
80
81/* General multiplication, used by `concat'. */
82static void mul(felt z, const felt x, const felt y)
83{
84 /* Initial bounds: we assume x_i, y_i < 2^27. On exit, z_i < 2^27. */
85
86 uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
87 uint32 y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4];
88 uint64 u0, u1, u2, u3, u4;
89 uint64 v0, v1, v2, v3, v4;
90 uint32 z0, z1, z2, z3, z4;
91
92 /* Do the multiplication: u = h x mod 2^130 - 5. We will have u_i <
93 * 2^27 (5 (4 - i) + i + 1) 2^27 = 2^54 (21 - 4 i) = 2^52 (84 - 16 i). In
94 * all cases we have u_i < 84*2^52 < 2^59. Notably, u_4 < 5*2^54 =
95 * 20*2^52.
96 */
97#define M(x, y) ((uint64)(x)*(y))
98 u0 = M(x0, y0) + (M(x1, y4) + M(x2, y3) + M(x3, y2) + M(x4, y1))*5;
99 u1 = M(x0, y1) + M(x1, y0) + (M(x2, y4) + M(x3, y3) + M(x4, y2))*5;
100 u2 = M(x0, y2) + M(x1, y1) + M(x2, y0) + (M(x3, y4) + M(x4, y3))*5;
101 u3 = M(x0, y3) + M(x1, y2) + M(x2, y1) + M(x3, y0) + (M(x4, y4))*5;
102 u4 = M(x0, y4) + M(x1, y3) + M(x2, y2) + M(x3, y1) + M(x4, y0);
103#undef M
104
105 /* Now we must reduce the coefficients. We do this in an approximate
106 * manner which avoids long data-dependency chains, but requires two
107 * passes.
108 *
109 * The reduced carry down from u_4 to u_0 in the first pass will be c_0 <
110 * 100*2^26; the remaining c_i are smaller: c_i < 2^26 (84 - 16 i). This
111 * leaves 0 <= v_i < 101*2^26. The carries in the second pass are bounded
112 * above by 180.
113 */
114 CARRY_REDUCE(v, u); CARRY_REDUCE(z, v);
115 z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4;
116}
117
118/* General squaring, used by `concat'. */
119static void sqr(felt z, const felt x)
120{
121 /* Initial bounds: we assume x_i < 2^27. On exit, z_i < 2^27. */
122
123 uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
124 uint64 u0, u1, u2, u3, u4;
125 uint64 v0, v1, v2, v3, v4;
126 uint32 z0, z1, z2, z3, z4;
127
128 /* Do the squaring. See `mul' for bounds. */
129#define M(x, y) ((uint64)(x)*(y))
130 u0 = M(x0, x0) + 10*(M(x1, x4) + M(x2, x3));
131 u1 = 2* M(x0, x1) + 5*(M(x3, x3) + 2*M(x2, x4));
132 u2 = M(x1, x1) + 2* M(x0, x2) + 10* M(x3, x4);
133 u3 = 2*(M(x0, x3) + M(x1, x2)) + 5* M(x4, x4);
134 u4 = M(x2, x2) + 2*(M(x0, x4) + M(x1, x3));
135#undef M
136
137 /* Now we must reduce the coefficients. See `mul' for bounds. */
138 CARRY_REDUCE(v, u); CARRY_REDUCE(z, v);
139 z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4;
140}
141
142/* Multiplication by r, using precomputation. */
143static void mul_r(const poly1305_ctx *ctx, felt z, const felt x)
144{
145 /* Initial bounds: by construction, r_i < 2^26. We assume x_i < 3*2^26.
146 * On exit, z_i < 2^27.
147 */
148
149 uint32
150 r0 = ctx->k.u.p26.r0,
151 r1 = ctx->k.u.p26.r1, rr1 = ctx->k.u.p26.rr1,
152 r2 = ctx->k.u.p26.r2, rr2 = ctx->k.u.p26.rr2,
153 r3 = ctx->k.u.p26.r3, rr3 = ctx->k.u.p26.rr3,
154 r4 = ctx->k.u.p26.r4, rr4 = ctx->k.u.p26.rr4;
155 uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
156 uint64 u0, u1, u2, u3, u4;
157 uint64 v0, v1, v2, v3, v4;
158 uint32 z0, z1, z2, z3, z4;
159
160 /* Do the multiplication: u = h x mod 2^130 - 5. We will have u_i <
161 * 2^26 (5 (4 - i) + i + 1) 3*2^26 = 2^52 (63 - 12 i). In all cases
162 * we have u_i < 63*2^52 < 2^58. Notably, u_4 < 15*2^52.
163 */
164#define M(x, y) ((uint64)(x)*(y))
165 u0 = M(x0, r0) + M(x1, rr4) + M(x2, rr3) + M(x3, rr2) + M(x4, rr1);
166 u1 = M(x0, r1) + M(x1, r0) + M(x2, rr4) + M(x3, rr3) + M(x4, rr2);
167 u2 = M(x0, r2) + M(x1, r1) + M(x2, r0) + M(x3, rr4) + M(x4, rr3);
168 u3 = M(x0, r3) + M(x1, r2) + M(x2, r1) + M(x3, r0) + M(x4, rr4);
169 u4 = M(x0, r4) + M(x1, r3) + M(x2, r2) + M(x3, r1) + M(x4, r0);
170#undef M
171
172 /* Now we must reduce the coefficients. We do this in an approximate
173 * manner which avoids long data-dependency chains, but requires two
174 * passes.
175 *
176 * The reduced carry down from u_4 to u_0 in the first pass will be c_0 <
177 * 75*2^26; the remaining c_i are smaller: c_i < 2^26 (63 - 12 i). This
178 * leaves 0 <= v_i < 76*2^26. The carries in the second pass are bounded
179 * above by 135.
180 */
181 CARRY_REDUCE(v, u); CARRY_REDUCE(z, v);
182 z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4;
183}
184
185#endif
186
6ddc1550 187/*----- Low-level implementation for 16/32-bit targets --------------------*/
57496a50
MW
188
189#ifndef POLY1305_IMPL
190# define POLY1305_IMPL 11
191#endif
192
193#if POLY1305_IMPL == 11
194
195/* Elements x of GF(2^130 - 5) are represented by 12 integers x_i: x =
196 * SUM_{0<=i<12} x_i 2^P_i, where P_i = SUM_{0<=j<i} w_j, and w_5 = w_11 =
197 * 10, and w_i = 11 for i in { 0, 1, 2, 3, 4, 6, 7, 8, 9, 10 }.
198 *
199 * Not all elements are represented canonically. We have 0 <= r_i, s_i <
200 * 2^w_i <= 2^11 by construction. We maintain 0 <= h_i < 2^12. When we read
201 * a message block m, we have 0 <= m_i < 2^w_i by construction again. When
202 * we update the hash state, we calculate h' = r (h + m). Addition is done
203 * componentwise; let t = h + m, and we will have 0 <= t_i < 3*2^11.
204 */
205typedef uint16 felt[12];
206#define M10 0x3ff
207#define M11 0x7ff
208#define P p11
209
210/* Load a field element from an octet string. */
211static void load_p11(felt d, const octet *s)
212{
213 unsigned i, j, n, w;
214 uint16 m;
215 uint32 a;
216
217 for (i = j = n = 0, a = 0; j < 12; j++) {
218 if (j == 5 || j == 11) { w = 10; m = M10; }
219 else { w = 11; m = M11; }
220 while (n < w && i < 16) { a |= s[i++] << n; n += 8; }
221 d[j] = a&m; a >>= w; n -= w;
222 }
223}
224
225/* Reduce a field-element's pieces to manageable size. */
226static void carry_reduce(uint32 u[12])
227{
228 /* Initial bounds: we assume u_i < 636*2^22. On exit, u_i < 2^11. */
229
230 unsigned i;
231 uint32 c;
232
233 /* Do sequential carry propagation (16-bit CPUs are less likely to benefit
57e7040b
MW
234 * from instruction-level parallelism). Start at u_9; truncate it to 11
235 * bits, and add the carry onto u_10. Truncate u10 to 11 bits, and add the
236 * carry onto u_11. Truncate u_11 to 10 bits, and add five times the carry
237 * onto u_0. And so on.
57496a50
MW
238 *
239 * The carry is larger than the pieces we're leaving behind. Let c_i be
240 * the high portion of u_i, to be carried onto u_{i+1}. I claim that c_i <
241 * 2557*2^10. Then the carry /into/ any u_i is at most 12785*2^10 < 2^24
242 * (allowing for the reduction as we carry from u_11 to u_0), and u_i after
243 * carry is bounded above by 636*2^22 + 12785*2^10 < 2557*2^20. Hence, the
244 * carry out is at most 2557*2^10, as claimed.
245 *
57e7040b
MW
246 * Once we reach u_9 for the second time, we start with u_9 < 2^11. The
247 * carry into u_9 is at most 2557*2^10 < 1279*2^11 as calculated above; so
248 * the carry out into u_10 is at most 1280. Since u_10 < 2^11 prior to
249 * this carry in, we now have u_10 < 2^11 + 1280 < 2^12; so the carry out
250 * into u_11 is at most 1. The final reduction therefore only needs a
251 * conditional subtraction.
57496a50 252 */
57e7040b
MW
253 { c = u[9] >> 11; u[9] &= M11; }
254 { u[10] += c; c = u[10] >> 11; u[10] &= M11; }
57496a50
MW
255 { u[11] += c; c = u[11] >> 10; u[11] &= M10; }
256 { u[0] += 5*c; c = u[0] >> 11; u[0] &= M11; }
257 for (i = 1; i < 5; i++) { u[i] += c; c = u[i] >> 11; u[i] &= M11; }
258 { u[5] += c; c = u[5] >> 10; u[5] &= M10; }
259 for (i = 6; i < 11; i++) { u[i] += c; c = u[i] >> 11; u[i] &= M11; }
260 u[11] += c;
261}
262
263/* General multiplication. */
264static void mul(felt z, const felt x, const felt y)
265{
266 /* Initial bounds: we assume x_i < 3*2^11, and y_i < 2^12. On exit,
267 * z_i < 2^12.
268 */
269
270 uint32 u[12];
271 unsigned i, j, k;
272
273 /* Do the main multiplication. After this, we shall have
274 *
275 * { 2^22 (636 - 184 i) for 0 <= i < 6
276 * u_i < {
277 * { 2^22 (732 - 60 i) for 6 <= i < 12
278 *
279 * In particular, u_0 < 636*2^22 < 2^32, and u_11 < 72*2^22.
280 *
281 * The irregularly positioned pieces are annoying. Because we fold the
282 * reduction into the multiplication, it's also important to see where the
283 * reduced products fit. Finally, products don't align with the piece
284 * boundaries, and sometimes need to be doubled. The following table
285 * tracks all of this.
286 *
287 * piece width offset second
288 * 0 11 0 130
289 * 1 11 11 141
290 * 2 11 22 152
291 * 3 11 33 163
292 * 4 11 44 174
293 * 5 10 55 185
294 * 6 11 65 195
295 * 7 11 76 206
296 * 8 11 87 217
297 * 9 11 98 228
298 * 10 11 109 239
299 * 11 10 120 250
300 *
301 * The next table tracks exactly which products end up being multiplied by
302 * which constants and accumulated into which destination pieces.
303 *
304 * u_k = t_i r_j + 2 t_i r_j + 5 t_i r_j + 10 t_i r_j
305 * 0 0/0 -- 6/6 1-5/11-7 7-11/5-1
306 * 1 0-1/1-0 -- 6-7/7-6 2-5/11-8 8-11/5-2
307 * 2 0-2/2-0 -- 6-8/8-6 3-5/11-9 9-11/5-3
308 * 3 0-3/3-0 -- 6-9/9-6 4-5/11-10 10-11/5-4
309 * 4 0-4/4-0 -- 6-10/10-6 5/11 11/5
310 * 5 0-5/5-0 -- 6-11/11-6 --
311 * 6 0/6 6/0 1-5/5-1 -- 7-11/11-7
312 * 7 0-1/7-6 6-7/1-0 2-5/5-2 -- 8-11/11-8
313 * 8 0-2/8-6 6-8/2-0 3-5/5-3 -- 9-11/11-9
314 * 9 0-3/9-6 6-9/3-0 4-5/5-4 -- 10-11/11-10
315 * 10 0-4/10-6 6-10/4-0 5/5 -- 11/11
316 * 11 0-11/11-0 -- -- --
317 *
318 * And, finally, trying to bound the multiple of 6*2^22 in each destination
319 * piece is fiddly, so here's a tableau showing the calculation.
320 *
321 * k 1* + 2* + 5* +10* = 1* + 5* =
322 * 0 1 -- 1 10 1 21 106
323 * 1 2 -- 2 8 2 18 92
324 * 2 3 -- 3 6 3 15 78
325 * 3 4 -- 4 4 4 12 64
326 * 4 5 -- 5 2 5 9 50
327 * 5 6 -- 6 -- 6 6 36
328 * 6 2 5 -- 5 12 10 62
329 * 7 4 4 -- 4 12 8 52
330 * 8 6 3 -- 3 12 6 42
331 * 9 8 2 -- 2 12 4 32
332 * 10 10 1 -- 1 12 2 22
333 * 11 12 -- -- -- 12 0 12
334 */
335
336 for (i = 0; i < 12; i++) u[i] = 0;
337
338#define M(i, j) ((uint32)x[i]*y[j])
339
340 /* Product terms we must multiply by 10. */
341 for (k = 0; k < 5; k++) {
342 for (i = k + 1; i < 6; i++) {
343 j = 12 + k - i;
344 u[k] += M(i, j) + M(j, i);
345 u[k + 6] += M(i + 6, j);
346 }
347 }
348 for (k = 0; k < 5; k++) u[k] *= 2;
349 for (k = 6; k < 11; k++) u[k] *= 5;
350
351 /* Product terms we must multiply by 5. */
352 for (k = 0; k < 6; k++) {
353 for (i = k + 6; i >= 6; i--) {
354 j = 12 + k - i;
355 u[k] += M(i, j);
356 }
357 }
358 for (k = 0; k < 6; k++) u[k] *= 5;
359
360 /* Product terms we must multiply by 2. */
361 for (k = 6; k < 11; k++) {
362 for (i = k - 5; i < 6; i++) {
363 j = k - i;
364 u[k] += M(i, j);
365 }
366 }
367 for (k = 6; k < 11; k++) u[k] *= 2;
368
369 /* Remaining product terms. */
370 for (k = 0; k < 6; k++) {
371 for (i = k; i < 6; i--) {
372 j = k - i;
373 u[k] += M(i, j);
374 u[k + 6] += M(i + 6, j) + M(i, j + 6);
375 }
376 }
377
378#undef M
379
380 /* Do the reduction. Currently, `carry_reduce' does more than we need, but
381 * that's fine.
382 */
383 carry_reduce(u);
384
385 /* Done. Write out the answer. */
386 for (i = 0; i < 12; i++) z[i] = u[i];
387}
388
389/* General squaring, used by `concat'. */
390static void sqr(felt z, const felt x)
391 { mul(z, x, x); }
392
393/* Multiplication by r. */
394static void mul_r(const poly1305_ctx *ctx, felt z, const felt x)
395 { mul(z, x, ctx->k.u.p11.r); }
396
397#endif
398
399/*----- Interface functions -----------------------------------------------*/
400
401/* --- @poly1305_keyinit@ --- *
402 *
403 * Arguments: @poly1305_key *key@ = key structure to fill in
404 * @const void *k@ = pointer to key material
405 * @size_t ksz@ = length of key (must be @POLY1305_KEYSZ == 16@)
406 *
407 * Returns: ---
408 *
409 * Use: Records a Poly1305 key and performs (minimal)
410 * precomputations.
411 */
412
413void poly1305_keyinit(poly1305_key *key, const void *k, size_t ksz)
414{
415 const octet *r = k;
416#if POLY1305_IMPL == 11
417 octet rr[16];
418#endif
419
420 KSZ_ASSERT(poly1305, ksz);
421
422#if POLY1305_IMPL == 26
423 uint32 r0 = LOAD32_L(r + 0), r1 = LOAD32_L(r + 4),
424 r2 = LOAD32_L(r + 8), r3 = LOAD32_L(r + 12);
425
426 r0 &= 0x0fffffff; r1 &= 0x0ffffffc; r2 &= 0x0ffffffc; r3 &= 0x0ffffffc;
427 key->u.p26.r0 = P26W0(r); key->u.p26.r1 = P26W1(r);
428 key->u.p26.r2 = P26W2(r); key->u.p26.r3 = P26W3(r);
429 key->u.p26.r4 = P26W4(r);
430
431 key->u.p26.rr1 = 5*key->u.p26.r1; key->u.p26.rr2 = 5*key->u.p26.r2;
432 key->u.p26.rr3 = 5*key->u.p26.r3; key->u.p26.rr4 = 5*key->u.p26.r4;
433#else
434 memcpy(rr, r, 16);
435 rr[ 3] &= 0x0f;
436 rr[ 4] &= 0xfc; rr[ 7] &= 0x0f;
437 rr[ 8] &= 0xfc; rr[11] &= 0x0f;
438 rr[12] &= 0xfc; rr[15] &= 0x0f;
439 load_p11(key->u.p11.r, rr);
440#endif
441}
442
443/* --- @poly1305_macinit@ --- *
444 *
445 * Arguments: @poly1305_ctx *ctx@ = MAC context to fill in
446 * @const poly1305_key *key@ = pointer to key structure to use
447 * @const void *iv@ = pointer to mask string
448 *
449 * Returns: ---
450 *
451 * Use: Initializes a MAC context for use. The key can be discarded
452 * at any time.
453 *
454 * It is permitted for @iv@ to be null, though it is not then
455 * possible to complete the MAC computation on @ctx@. The
456 * resulting context may still be useful, e.g., as an operand to
457 * @poly1305_concat@.
458 */
459
460void poly1305_macinit(poly1305_ctx *ctx,
461 const poly1305_key *key, const void *iv)
462{
463 const octet *s = iv;
464#if POLY1305_IMPL == 26
465 uint32 s0, s1, s2, s3;
466#else
467 unsigned i;
468#endif
469
470#if POLY1305_IMPL == 26
471 if (s) {
472 s0 = LOAD32_L(s + 0); s1 = LOAD32_L(s + 4);
473 s2 = LOAD32_L(s + 8); s3 = LOAD32_L(s + 12);
474 ctx->u.p26.s0 = P26W0(s); ctx->u.p26.s1 = P26W1(s);
475 ctx->u.p26.s2 = P26W2(s); ctx->u.p26.s3 = P26W3(s);
476 ctx->u.p26.s4 = P26W4(s);
477 }
478 ctx->u.p26.h[0] = ctx->u.p26.h[1] = ctx->u.p26.h[2] =
479 ctx->u.p26.h[3] = ctx->u.p26.h[4] = 0;
480#else
481 if (s) load_p11(ctx->u.p11.s, s);
482 for (i = 0; i < 12; i++) ctx->u.p11.h[i] = 0;
483#endif
484 ctx->k = *key;
485 ctx->nbuf = 0;
486 ctx->count = 0;
487}
488
489/* --- @poly1305_copy@ --- *
490 *
491 * Arguments: @poly1305_ctx *to@ = destination context
492 * @const poly1305_ctx *from@ = source context
493 *
494 * Returns: ---
495 *
496 * Use: Duplicates a Poly1305 MAC context. The destination need not
497 * have been initialized. Both contexts can be used
498 * independently afterwards.
499 */
500
501void poly1305_copy(poly1305_ctx *ctx, const poly1305_ctx *from)
502 { *ctx = *from; }
503
504/* --- @poly1305_hash@ --- *
505 *
506 * Arguments: @poly1305_ctx *ctx@ = MAC context to update
507 * @const void *p@ = pointer to message data
508 * @size_t sz@ = length of message data
509 *
510 * Returns: ---
511 *
512 * Use: Processes a chunk of message. The message pieces may have
513 * arbitrary lengths, and may be empty.
514 */
515
516static void update_full(poly1305_ctx *ctx, const octet *p)
517{
518 felt t;
519#if POLY1305_IMPL == 26
520 uint32
521 m0 = LOAD32_L(p + 0), m1 = LOAD32_L(p + 4),
522 m2 = LOAD32_L(p + 8), m3 = LOAD32_L(p + 12);
523
524 t[0] = ctx->u.p26.h[0] + P26W0(m);
525 t[1] = ctx->u.p26.h[1] + P26W1(m);
526 t[2] = ctx->u.p26.h[2] + P26W2(m);
527 t[3] = ctx->u.p26.h[3] + P26W3(m);
528 t[4] = ctx->u.p26.h[4] + P26W4(m) + 0x01000000;
529#else
530 unsigned i;
531
532 load_p11(t, p); t[11] += 0x100;
533 for (i = 0; i < 12; i++) t[i] += ctx->u.p11.h[i];
534#endif
535
536 mul_r(ctx, ctx->u.P.h, t);
537 ctx->count++;
538}
539
6a0eb244
MW
540static const rsvr_policy pol = { 0, 16, 16 };
541
57496a50
MW
542void poly1305_hash(poly1305_ctx *ctx, const void *p, size_t sz)
543{
6a0eb244
MW
544 rsvr_state st;
545 const octet *q = p;
546
547 rsvr_setup(&st, &pol, &ctx->buf, &ctx->nbuf, p, sz);
548 RSVR_DO(&st) while ((q = RSVR_NEXT(&st, 16)) != 0) update_full(ctx, q);
57496a50
MW
549}
550
551/* --- @poly1305_flush@ --- *
552 *
553 * Arguments: @poly1305_ctx *ctx@ = MAC context to flush
554 *
555 * Returns: ---
556 *
557 * Use: Forces any buffered message data in the context to be
558 * processed. This has no effect if the message processed so
559 * far is a whole number of blocks. Flushing is performed
560 * automatically by @poly1305_done@, but it may be necessary to
561 * force it by hand when using @poly1305_concat@.
20400191 562 * (Alternatively, you might use @poly1305_flushzero@ instead.)
57496a50
MW
563 *
564 * Flushing a partial block has an observable effect on the
565 * computation: the resulting state is (with high probability)
566 * dissimilar to any state reachable with a message which is a
567 * whole number of blocks long.
568 */
569
570void poly1305_flush(poly1305_ctx *ctx)
571{
572 felt t;
573#if POLY1305_IMPL == 26
574 uint32 m0, m1, m2, m3;
575#else
576 unsigned i;
577#endif
578
579 if (!ctx->nbuf) return;
580 ctx->buf[ctx->nbuf++] = 1; memset(ctx->buf + ctx->nbuf, 0, 16 - ctx->nbuf);
581#if POLY1305_IMPL == 26
582 m0 = LOAD32_L(ctx->buf + 0); m1 = LOAD32_L(ctx->buf + 4);
583 m2 = LOAD32_L(ctx->buf + 8); m3 = LOAD32_L(ctx->buf + 12);
584
585 t[0] = ctx->u.p26.h[0] + P26W0(m);
586 t[1] = ctx->u.p26.h[1] + P26W1(m);
587 t[2] = ctx->u.p26.h[2] + P26W2(m);
588 t[3] = ctx->u.p26.h[3] + P26W3(m);
589 t[4] = ctx->u.p26.h[4] + P26W4(m);
590#else
591 load_p11(t, ctx->buf);
592 for (i = 0; i < 12; i++) t[i] += ctx->u.p11.h[i];
593#endif
594
595 mul_r(ctx, ctx->u.P.h, t);
20400191
MW
596 ctx->nbuf = 0; ctx->count++;
597}
598
599/* --- @poly1305_flushzero@ --- *
600 *
601 * Arguments: @poly1305_ctx *ctx@ = MAC context to flush
602 *
603 * Returns: ---
604 *
605 * Use: Forces any buffered message data in the context to be
606 * processed, by hashing between zero and fifteen additional
607 * zero bytes. Like @poly1305_flush@, this has no effect if the
608 * the message processed so far is a whole number of blocks.
609 * Unlike @poly1305_flush@, the behaviour if the message is not
610 * a whole number of blocks is equivalent to actually hashing
611 * some extra data.
612 */
613
614void poly1305_flushzero(poly1305_ctx *ctx)
615{
616 if (!ctx->nbuf) return;
617 memset(ctx->buf + ctx->nbuf, 0, 16 - ctx->nbuf);
618 update_full(ctx, ctx->buf);
57496a50
MW
619 ctx->nbuf = 0;
620}
621
622/* --- @poly1305_concat@ --- *
623 *
624 * Arguments: @poly1305_ctx *ctx@ = destination context
625 * @const poly1305_ctx *prefix, *suffix@ = two operand contexts
626 *
627 * Returns: ---
628 *
629 * Use: The two operand contexts @prefix@ and @suffix@ represent
630 * processing of two messages %$m$% and %$m'$%; the effect is to
631 * set @ctx@ to the state corresponding to their concatenation
632 * %$m \cat m'$%.
633 *
634 * All three contexts must have been initialized using the same
635 * key value (though not necessarily from the same key
636 * structure). The mask values associated with the input
637 * contexts are irrelevant. The @prefix@ message %$m$% must be
638 * a whole number of blocks long: this can be arranged by
639 * flushing the context. The @suffix@ message need not be a
640 * whole number of blocks long. All of the contexts remain
641 * operational and can be used independently afterwards.
642 */
643
644void poly1305_concat(poly1305_ctx *ctx,
645 const poly1305_ctx *prefix, const poly1305_ctx *suffix)
646{
647 /* Assume that lengths are public, so it's safe to behave conditionally on
648 * the bits of ctx->count.
649 */
650 unsigned long n;
651 unsigned i;
652 felt x;
653#if POLY1305_IMPL == 26
654 uint32 x0, x1, x2, x3, x4, y0, y1, y2, y3, y4;
655#else
656 uint32 y[12];
657#endif
658
659 /* We can only concatenate if the prefix is block-aligned. */
660 assert(!prefix->nbuf);
661
662 /* The hash for a message m = m_{k-1} m_{k-2} ... m_1 m_0 is h_r(m) =
663 * SUM_{0<=i<k} m_i r^{i+1}. If we have two messages, m, m', of lengths k
664 * and k' blocks respectively, then
665 *
666 * h_r(m || m') = SUM_{0<=i<k} m_i r^{k'+i+1} +
667 * SUM_{0<=i<k'} m'_i r^{i+1}
668 * = r^{k'} h_r(m) + h_r(m')
669 *
670 * This is simple left-to-right square-and-multiply exponentiation.
671 */
672 n = suffix->count;
673 x[0] = 1;
674#if POLY1305_IMPL == 26
675 x[1] = x[2] = x[3] = x[4] = 0;
676#else
677 for (i = 1; i < 12; i++) x[i] = 0;
678#endif
ac082cc9 679#define BIT (1ul << (ULONG_BITS - 1))
57496a50
MW
680 if (n) {
681 i = ULONG_BITS;
ae65b7a2 682 while (!(n&BIT)) { n <<= 1; i--; }
57496a50 683 mul_r(prefix, x, x); n <<= 1; i--;
ae65b7a2 684 while (i--) { sqr(x, x); if (n&BIT) mul_r(prefix, x, x); n <<= 1; }
57496a50
MW
685 }
686#undef BIT
687 mul(x, prefix->u.P.h, x);
688
689 /* Add on the suffix hash. */
690#if POLY1305_IMPL == 26
691 /* We're going to add the two hashes elementwise. Both h' = h_r(m') and
692 * x = r^{k'} h_r(m) are bounded above by 2^27, so the sum will be bounded
693 * by 2^28; but this is too large to leave in the accumulator. (Strictly,
694 * we could get away with it, but the caller can in theory chain an
695 * arbitrary number of concatenations and expect us to cope, and we'd
696 * definitely overflow eventually.) So we reduce. Since the excess is so
697 * small, a single round of `CARRY_REDUCE' is enough.
698 */
699 x0 = x[0] + suffix->u.p26.h[0]; x1 = x[1] + suffix->u.p26.h[1];
700 x2 = x[2] + suffix->u.p26.h[2]; x3 = x[3] + suffix->u.p26.h[3];
701 x4 = x[4] + suffix->u.p26.h[4];
702 CARRY_REDUCE(y, x);
703 ctx->u.p26.h[0] = y0; ctx->u.p26.h[1] = y1; ctx->u.p26.h[2] = y2;
704 ctx->u.p26.h[3] = y3; ctx->u.p26.h[4] = y4;
705#else
706 /* We'll add the two hashes elementwise and have to reduce again. The
707 * numbers are different, but the reasoning is basically the same.
708 */
709 for (i = 0; i < 12; i++) y[i] = x[i] + suffix->u.p11.h[i];
710 carry_reduce(y);
711 for (i = 0; i < 12; i++) ctx->u.p11.h[i] = y[i];
712#endif
713
714 /* Copy the remaining pieces of the context to set up the result. */
715 if (ctx != suffix) {
716 memcpy(ctx->buf, suffix->buf, suffix->nbuf);
717 ctx->nbuf = suffix->nbuf;
718 }
719 ctx->count = prefix->count + suffix->count;
720}
721
722/* --- @poly1305_done@ --- *
723 *
724 * Arguments: @poly1305_ctx *ctx@ = MAC context to finish
725 * @void *h@ = buffer to write the tag to
726 *
727 * Returns: ---
728 *
729 * Use: Completes a Poly1305 MAC tag computation.
730 */
731
732void poly1305_done(poly1305_ctx *ctx, void *h)
733{
734 octet *p = h;
735
736#if POLY1305_IMPL == 26
737 uint32 m_sub, t, c;
738 uint32 h0, h1, h2, h3, h4, hh0, hh1, hh2, hh3, hh4;
739
740 /* If there's anything left over in the buffer, pad it to form a final
741 * coefficient and update the evaluation one last time.
742 */
743 poly1305_flush(ctx);
744
745 /* Collect the final hash state. */
746 h0 = ctx->u.p26.h[0];
747 h1 = ctx->u.p26.h[1];
748 h2 = ctx->u.p26.h[2];
749 h3 = ctx->u.p26.h[3];
750 h4 = ctx->u.p26.h[4];
751
752 /* Reduce the final value mod 2^130 - 5. First pass: set h <- h +
753 * 5 floor(h/2^130). After this, the low pieces of h will be normalized:
754 * 0 <= h_i < 2^26 for 0 <= i < 4; and 0 <= h_4 < 2^26 + 1. In the
755 * (highly unlikely) event that h_4 >= 2^26, set c and truncate to 130
756 * bits.
757 */
758 c = h4 >> 26; h4 &= M26;
759 h0 += 5*c; c = h0 >> 26; h0 &= M26;
760 h1 += c; c = h1 >> 26; h1 &= M26;
761 h2 += c; c = h2 >> 26; h2 &= M26;
762 h3 += c; c = h3 >> 26; h3 &= M26;
763 h4 += c; c = h4 >> 26; h4 &= M26;
764
765 /* Calculate h' = h - (2^130 - 5). If h' >= 0 then t ends up 1; otherwise
766 * it's zero.
767 */
768 t = h0 + 5; hh0 = t&M26; t >>= 26;
769 t += h1; hh1 = t&M26; t >>= 26;
770 t += h2; hh2 = t&M26; t >>= 26;
771 t += h3; hh3 = t&M26; t >>= 26;
772 t += h4; hh4 = t&M26; t >>= 26;
773
774 /* Keep the subtraction result above if t or c is set. */
775 m_sub = -(t | c);
776 h0 = (hh0&m_sub) | (h0&~m_sub);
777 h1 = (hh1&m_sub) | (h1&~m_sub);
778 h2 = (hh2&m_sub) | (h2&~m_sub);
779 h3 = (hh3&m_sub) | (h3&~m_sub);
780 h4 = (hh4&m_sub) | (h4&~m_sub);
781
782 /* Add the mask onto the hash result. */
783 t = h0 + ctx->u.p26.s0; h0 = t&M26; t >>= 26;
784 t += h1 + ctx->u.p26.s1; h1 = t&M26; t >>= 26;
785 t += h2 + ctx->u.p26.s2; h2 = t&M26; t >>= 26;
786 t += h3 + ctx->u.p26.s3; h3 = t&M26; t >>= 26;
787 t += h4 + ctx->u.p26.s4; h4 = t&M26; t >>= 26;
788
789 /* Convert this mess back into 32-bit words. We lose the top two bits,
790 * but that's fine.
791 */
ae65b7a2
MW
792 h0 = (h0 >> 0) | ((h1&0x0000003f) << 26);
793 h1 = (h1 >> 6) | ((h2&0x00000fff) << 20);
794 h2 = (h2 >> 12) | ((h3&0x0003ffff) << 14);
795 h3 = (h3 >> 18) | ((h4&0x00ffffff) << 8);
57496a50
MW
796
797 /* All done. */
798 STORE32_L(p + 0, h0); STORE32_L(p + 4, h1);
799 STORE32_L(p + 8, h2); STORE32_L(p + 12, h3);
800#else
801 uint16 hh[12], hi[12], c, t, m_sub;
802 uint32 a;
803 unsigned i, j, n;
804
805 /* If there's anything left over in the buffer, pad it to form a final
806 * coefficient and update the evaluation one last time.
807 */
808 poly1305_flush(ctx);
809
810 /* Collect the final hash state. */
811 for (i = 0; i < 12; i++) hh[i] = ctx->u.p11.h[i];
812
813 /* Reduce the final value mod 2^130 - 5. First pass: set h <- h +
814 * 5 floor(h/2^130). After this, the low pieces of h will be normalized:
815 * 0 <= h_i < 2^{w_i} for 0 <= i < 11; and 0 <= h_{11} < 2^10 + 1. In the
816 * (highly unlikely) event that h_{11} >= 2^10, set c and truncate to 130
817 * bits.
818 */
819 c = 5*(hh[11] >> 10); hh[11] &= M10;
820 for (i = 0; i < 12; i++) {
821 if (i == 5 || i == 11) { c += hh[i]; hh[i] = c&M10; c >>= 10; }
822 else { c += hh[i]; hh[i] = c&M11; c >>= 11; }
823 }
824
825 /* Calculate h' = h - (2^130 - 5). If h' >= 0 then t ends up 1; otherwise
826 * it's zero.
827 */
828 for (i = 0, t = 5; i < 12; i++) {
829 t += hh[i];
830 if (i == 5 || i == 11) { hi[i] = t&M10; t >>= 10; }
831 else { hi[i] = t&M11; t >>= 11; }
832 }
833
834 /* Keep the subtraction result above if t or c is set. */
835 m_sub = -(t | c);
836 for (i = 0; i < 12; i++) hh[i] = (hi[i]&m_sub) | (hh[i]&~m_sub);
837
838 /* Add the mask onto the hash result. */
839 for (i = 0, t = 0; i < 12; i++) {
840 t += hh[i] + ctx->u.p11.s[i];
841 if (i == 5 || i == 11) { hh[i] = t&M10; t >>= 10; }
842 else { hh[i] = t&M11; t >>= 11; }
843 }
844
845 /* Convert this mess back into bytes. We lose the top two bits, but that's
846 * fine.
847 */
848 for (i = j = n = 0, a = 0; i < 16; i++) {
849 if (n < 8) {
850 a |= hh[j] << n;
851 n += (j == 5 || j == 11) ? 10 : 11;
852 j++;
853 }
854 p[i] = a&0xff; a >>= 8; n -= 8;
855 }
856
857#endif
858}
859
860/*----- Test rig ----------------------------------------------------------*/
861
862#ifdef TEST_RIG
863
141c1284 864#include <mLib/macros.h>
57496a50
MW
865#include <mLib/testrig.h>
866
1aaccf40 867#include "ct.h"
47af781c
MW
868#include "rijndael-ecb.h"
869
57496a50
MW
870static int vrf_hash(dstr v[])
871{
872 poly1305_key k;
873 poly1305_ctx ctx;
874 dstr t = DSTR_INIT;
875 unsigned i, j;
876
877 if (v[0].len != 16) { fprintf(stderr, "bad key length\n"); exit(2); }
878 if (v[1].len != 16) { fprintf(stderr, "bad mask length\n"); exit(2); }
879 if (v[3].len != 16) { fprintf(stderr, "bad tag length\n"); exit(2); }
880 dstr_ensure(&t, 16); t.len = 16;
881
1aaccf40 882 ct_poison(v[0].buf, v[0].len);
57496a50
MW
883 poly1305_keyinit(&k, v[0].buf, v[0].len);
884 for (i = 0; i < v[2].len; i++) {
885 for (j = i; j < v[2].len; j++) {
886 poly1305_macinit(&ctx, &k, v[1].buf);
887 poly1305_hash(&ctx, v[2].buf, i);
888 poly1305_hash(&ctx, v[2].buf + i, j - i);
889 poly1305_hash(&ctx, v[2].buf + j, v[2].len - j);
890 poly1305_done(&ctx, t.buf);
1aaccf40 891 ct_remedy(t.buf, t.len);
141c1284 892 if (MEMCMP(t.buf, !=, v[3].buf, 16)) {
57496a50
MW
893 fprintf(stderr, "failed...");
894 fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr);
895 fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr);
896 fprintf(stderr, "\n\tmsg = "); type_hex.dump(&v[2], stderr);
897 fprintf(stderr, "\n\texp = "); type_hex.dump(&v[3], stderr);
898 fprintf(stderr, "\n\tcalc = "); type_hex.dump(&t, stderr);
899 fprintf(stderr, "\n\tsplits = 0 .. %u .. %u .. %lu\n",
900 i, j, (unsigned long)v[1].len);
901 return (0);
902 }
903 }
904 }
905 return (1);
906}
907
908static int vrf_cat(dstr v[])
909{
910 poly1305_key k;
911 poly1305_ctx ctx, cc[3];
912 dstr t = DSTR_INIT;
913 unsigned i;
914 int ok = 1;
915
916 if (v[0].len != 16) { fprintf(stderr, "bad key length\n"); exit(2); }
917 if (v[1].len != 16) { fprintf(stderr, "bad mask length\n"); exit(2); }
918 if (v[5].len != 16) { fprintf(stderr, "bad tag length\n"); exit(2); }
919 dstr_ensure(&t, 16); t.len = 16;
920
921 poly1305_keyinit(&k, v[0].buf, v[0].len);
922 poly1305_macinit(&ctx, &k, v[1].buf);
923 for (i = 0; i < 3; i++) {
924 poly1305_macinit(&cc[i], &k, 0);
925 poly1305_hash(&cc[i], v[i + 2].buf, v[i + 2].len);
926 }
927 for (i = 0; i < 2; i++) {
928 if (!i) {
929 poly1305_concat(&ctx, &cc[1], &cc[2]);
930 poly1305_concat(&ctx, &cc[0], &ctx);
931 } else {
932 poly1305_concat(&ctx, &cc[0], &cc[1]);
933 poly1305_concat(&ctx, &ctx, &cc[2]);
934 }
935 poly1305_done(&ctx, t.buf);
141c1284 936 if (MEMCMP(t.buf, !=, v[5].buf, 16)) {
57496a50
MW
937 fprintf(stderr, "failed...");
938 fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr);
939 fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr);
940 fprintf(stderr, "\n\tmsg[0] = "); type_hex.dump(&v[2], stderr);
941 fprintf(stderr, "\n\tmsg[1] = "); type_hex.dump(&v[3], stderr);
942 fprintf(stderr, "\n\tmsg[2] = "); type_hex.dump(&v[4], stderr);
943 fprintf(stderr, "\n\texp = "); type_hex.dump(&v[5], stderr);
944 fprintf(stderr, "\n\tcalc = "); type_hex.dump(&t, stderr);
945 fprintf(stderr, "\n\tassoc = %s\n",
946 !i ? "msg[0] || (msg[1] || msg[2])" :
947 "(msg[0] || msg[1]) || msg[2]");
948 ok = 0;
949 }
950 }
951 return (ok);
952}
953
47af781c
MW
954#define MSZMAX 1000
955
956static int vrf_mct(dstr v[])
957{
958 unsigned j, msz;
2e572d83 959 unsigned long i, start_iter, end_iter;
47af781c
MW
960 rijndael_ecbctx rij;
961 poly1305_key key;
962 poly1305_ctx mac;
a9a5bfa0
MW
963 dstr dk = DSTR_INIT, dr = DSTR_INIT, dn = DSTR_INIT,
964 dt = DSTR_INIT, dm = DSTR_INIT;
965 octet *k, *r, s[16], *n, *t, *m;
47af781c
MW
966 int ok = 1;
967
a9a5bfa0
MW
968 DENSURE(&dk, 16); k = (octet *)dk.buf; dk.len = 16;
969 DENSURE(&dr, 16); r = (octet *)dr.buf; dr.len = 16;
970 DENSURE(&dn, 16); n = (octet *)dn.buf; dn.len = 16;
971 DENSURE(&dt, 16); t = (octet *)dt.buf; dt.len = 16;
972 DENSURE(&dm, MSZMAX); m = (octet *)dm.buf; dm.len = MSZMAX;
973 memset(m, 0, MSZMAX);
974
2e572d83
MW
975 if (v[0].len != 16) { fprintf(stderr, "AES key len\n"); exit(2); }
976 if (v[1].len != 16) { fprintf(stderr, "poly key len\n"); exit(2); }
977 if (v[2].len != 16) { fprintf(stderr, "nonce len\n"); exit(2); }
978 if (v[3].len != MSZMAX) { fprintf(stderr, "msgbuf len\n"); exit(2); }
979 if (v[6].len != 16) { fprintf(stderr, "result len\n"); exit(2); }
4741bd9f
MW
980 memcpy(k, v[0].buf, 16);
981 memcpy(r, v[1].buf, 16);
982 memcpy(n, v[2].buf, 16);
2e572d83
MW
983 memcpy(m, v[3].buf, MSZMAX);
984 start_iter = *(unsigned long *)v[4].buf;
985 end_iter = *(unsigned long *)v[5].buf;
986 if (end_iter < start_iter) { fprintf(stderr, "iter bounds\n"); exit(2); }
47af781c 987
4741bd9f
MW
988 rijndael_ecbinit(&rij, k, 16, 0);
989 poly1305_keyinit(&key, r, 16);
2e572d83 990 for (i = start_iter; i < end_iter; i++) {
47af781c
MW
991 msz = 0;
992 for (;;) {
993 rijndael_ecbencrypt(&rij, n, s, 16);
994 poly1305_macinit(&mac, &key, s);
995 poly1305_hash(&mac, m, msz);
996 poly1305_done(&mac, t);
997 if (msz >= MSZMAX) break;
998 n[0] ^= i&0xff;
999 for (j = 0; j < 16; j++) n[j] ^= t[j];
1000 if (msz%2) {
1001 for (j = 0; j < 16; j++) k[j] ^= t[j];
4741bd9f 1002 rijndael_ecbinit(&rij, k, 16, 0);
47af781c
MW
1003 }
1004 if (msz%3) {
1005 for (j = 0; j < 16; j++) r[j] ^= t[j];
4741bd9f 1006 poly1305_keyinit(&key, r, 16);
47af781c
MW
1007 }
1008 m[msz++] ^= t[0];
1009 }
1010 }
1011
2e572d83 1012 if (MEMCMP(t, !=, v[6].buf, 16)) {
47af781c
MW
1013 ok = 0;
1014 fprintf(stderr, "failed...");
1015 fprintf(stderr, "\n\tinitial k = "); type_hex.dump(&v[0], stderr);
1016 fprintf(stderr, "\n\tinitial r = "); type_hex.dump(&v[1], stderr);
1017 fprintf(stderr, "\n\tinitial n = "); type_hex.dump(&v[2], stderr);
2e572d83
MW
1018 fprintf(stderr, "\n\tinitial m = "); type_hex.dump(&v[3], stderr);
1019 fprintf(stderr, "\n\tstart iter = %lu", start_iter);
1020 fprintf(stderr, "\n\tend iter = %lu", end_iter);
1021 fprintf(stderr, "\n\tfinal k = "); type_hex.dump(&dk, stderr);
1022 fprintf(stderr, "\n\tfinal r = "); type_hex.dump(&dr, stderr);
1023 fprintf(stderr, "\n\tfinal n = "); type_hex.dump(&dn, stderr);
1024 fprintf(stderr, "\n\tfinal m = "); type_hex.dump(&dm, stderr);
1025 fprintf(stderr, "\n\texpected = "); type_hex.dump(&v[6], stderr);
a9a5bfa0 1026 fprintf(stderr, "\n\tcalculated = "); type_hex.dump(&dt, stderr);
47af781c
MW
1027 fputc('\n', stderr);
1028 }
1029
a9a5bfa0
MW
1030 dstr_destroy(&dk);
1031 dstr_destroy(&dr);
1032 dstr_destroy(&dn);
1033 dstr_destroy(&dt);
1034 dstr_destroy(&dm);
47af781c
MW
1035 return (ok);
1036}
1037
57496a50
MW
1038static const struct test_chunk tests[] = {
1039 { "poly1305-hash", vrf_hash,
1040 { &type_hex, &type_hex, &type_hex, &type_hex } },
1041 { "poly1305-cat", vrf_cat,
1042 { &type_hex, &type_hex, &type_hex, &type_hex, &type_hex, &type_hex } },
47af781c 1043 { "poly1305-mct", vrf_mct,
2e572d83
MW
1044 { &type_hex, &type_hex, &type_hex, &type_hex,
1045 &type_ulong, &type_ulong, &type_hex } },
57496a50
MW
1046 { 0, 0, { 0 } }
1047};
1048
1049int main(int argc, char *argv[])
1050{
1051 test_run(argc, argv, tests, SRCDIR "/t/poly1305");
1052 return (0);
1053}
1054
1055#endif
1056
1057/*----- That's all, folks -------------------------------------------------*/