From 57496a500309c335ff9c60d3462757c0c5f04801 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 26 May 2016 09:26:09 +0100 Subject: [PATCH] symm/: Implement Daniel Bernstein's `Poly1305' message authentication code. --- symm/Makefile.am | 14 + symm/poly1305.c | 950 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ symm/poly1305.h | 225 +++++++++++++ symm/t/poly1305 | 61 ++++ 4 files changed, 1250 insertions(+) create mode 100644 symm/poly1305.c create mode 100644 symm/poly1305.h create mode 100644 symm/t/poly1305 diff --git a/symm/Makefile.am b/symm/Makefile.am index 2c1cc06f..ee3d4172 100644 --- a/symm/Makefile.am +++ b/symm/Makefile.am @@ -454,6 +454,20 @@ STUBS_HDR += XChaCha20,xchacha20,chacha STUBS_HDR += XChaCha12,xchacha12,chacha STUBS_HDR += XChaCha8,xchacha8,chacha +## Bernstein's `Poly1305' message authentication code. +pkginclude_HEADERS += poly1305.h +libsymm_la_SOURCES += poly1305.c +TESTS += poly1305.t$(EXEEXT) +TESTS += poly1305-p11.t$(EXEEXT) +EXTRA_DIST += t/poly1305 + +check_PROGRAMS += poly1305-p11.t +poly1305_p11_t_SOURCES = poly1305.c +poly1305_p11_t_CPPFLAGS = $(AM_CPPFLAGS) -DTEST_RIG -DSRCDIR="\"$(srcdir)\"" +poly1305_p11_t_CPPFLAGS += -DPOLY1305_IMPL=11 +poly1305_p11_t_LDADD = $(TEST_LIBS) $(top_builddir)/libcatacomb.la +poly1305_p11_t_LDADD += $(mLib_LIBS) $(CATACOMB_LIBS) $(LIBS) + ###-------------------------------------------------------------------------- ### Autogenerated mode implementations. diff --git a/symm/poly1305.c b/symm/poly1305.c new file mode 100644 index 00000000..b2b2c55a --- /dev/null +++ b/symm/poly1305.c @@ -0,0 +1,950 @@ +/* -*-c-*- + * + * Poly1305 message authentication code + * + * (c) 2017 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include "config.h" + +#include +#include + +#include "poly1305.h" + +/*----- Global variables --------------------------------------------------*/ + +const octet poly1305_keysz[] = { KSZ_SET, 16, 0 }; + +/*----- Low-level implementation for 32/64-bit targets --------------------*/ + +#if !defined(POLY1305_IMPL) && defined(HAVE_UINT64) +# define POLY1305_IMPL 26 +#endif + +#if POLY1305_IMPL == 26 + +/* Elements x of GF(2^130 - 5) are represented by five integers x_i: x = + * SUM_{0<=i<5} x_i 2^{26i}. + * + * Not all elements are represented canonically. We have 0 <= r_i, s_i < + * 2^26 by construction. We maintain 0 <= h_i < 2^27. When we read a + * message block m, we have 0 <= m_i < 2^26 by construction again. When we + * update the hash state, we calculate h' = r (h + m). Addition is done + * componentwise; let t = h + m, and we will have 0 <= t_i < 3*2^26. + */ +typedef uint32 felt[5]; +#define M26 0x03ffffff +#define P p26 + +/* Convert 32-bit words into field-element pieces. */ +#define P26W0(x) ((x##0)&0x03ffffff) +#define P26W1(x) ((((x##1)&0x000fffff) << 6) | (((x##0) >> 26)&0x0000003f)) +#define P26W2(x) ((((x##2)&0x00003fff) << 12) | (((x##1) >> 20)&0x00000fff)) +#define P26W3(x) ((((x##3)&0x000000ff) << 18) | (((x##2) >> 14)&0x0003ffff)) +#define P26W4(x) (((x##3) >> 8)&0x00ffffff) + +/* Propagate carries in parallel. If 0 <= u_i < 2^26 c_i, then we shall have + * 0 <= v_0 < 2^26 + 5 c_4, and 0 <= v_i < 2^26 + c_{i-1} for 1 <= i < 5. + */ +#define CARRY_REDUCE(v, u) do { \ + (v##0) = ((u##0)&M26) + 5*((u##4) >> 26); \ + (v##1) = ((u##1)&M26) + ((u##0) >> 26); \ + (v##2) = ((u##2)&M26) + ((u##1) >> 26); \ + (v##3) = ((u##3)&M26) + ((u##2) >> 26); \ + (v##4) = ((u##4)&M26) + ((u##3) >> 26); \ +} while (0) + +/* General multiplication, used by `concat'. */ +static void mul(felt z, const felt x, const felt y) +{ + /* Initial bounds: we assume x_i, y_i < 2^27. On exit, z_i < 2^27. */ + + uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4]; + uint32 y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4]; + uint64 u0, u1, u2, u3, u4; + uint64 v0, v1, v2, v3, v4; + uint32 z0, z1, z2, z3, z4; + + /* Do the multiplication: u = h x mod 2^130 - 5. We will have u_i < + * 2^27 (5 (4 - i) + i + 1) 2^27 = 2^54 (21 - 4 i) = 2^52 (84 - 16 i). In + * all cases we have u_i < 84*2^52 < 2^59. Notably, u_4 < 5*2^54 = + * 20*2^52. + */ +#define M(x, y) ((uint64)(x)*(y)) + u0 = M(x0, y0) + (M(x1, y4) + M(x2, y3) + M(x3, y2) + M(x4, y1))*5; + u1 = M(x0, y1) + M(x1, y0) + (M(x2, y4) + M(x3, y3) + M(x4, y2))*5; + u2 = M(x0, y2) + M(x1, y1) + M(x2, y0) + (M(x3, y4) + M(x4, y3))*5; + u3 = M(x0, y3) + M(x1, y2) + M(x2, y1) + M(x3, y0) + (M(x4, y4))*5; + u4 = M(x0, y4) + M(x1, y3) + M(x2, y2) + M(x3, y1) + M(x4, y0); +#undef M + + /* Now we must reduce the coefficients. We do this in an approximate + * manner which avoids long data-dependency chains, but requires two + * passes. + * + * The reduced carry down from u_4 to u_0 in the first pass will be c_0 < + * 100*2^26; the remaining c_i are smaller: c_i < 2^26 (84 - 16 i). This + * leaves 0 <= v_i < 101*2^26. The carries in the second pass are bounded + * above by 180. + */ + CARRY_REDUCE(v, u); CARRY_REDUCE(z, v); + z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; +} + +/* General squaring, used by `concat'. */ +static void sqr(felt z, const felt x) +{ + /* Initial bounds: we assume x_i < 2^27. On exit, z_i < 2^27. */ + + uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4]; + uint64 u0, u1, u2, u3, u4; + uint64 v0, v1, v2, v3, v4; + uint32 z0, z1, z2, z3, z4; + + /* Do the squaring. See `mul' for bounds. */ +#define M(x, y) ((uint64)(x)*(y)) + u0 = M(x0, x0) + 10*(M(x1, x4) + M(x2, x3)); + u1 = 2* M(x0, x1) + 5*(M(x3, x3) + 2*M(x2, x4)); + u2 = M(x1, x1) + 2* M(x0, x2) + 10* M(x3, x4); + u3 = 2*(M(x0, x3) + M(x1, x2)) + 5* M(x4, x4); + u4 = M(x2, x2) + 2*(M(x0, x4) + M(x1, x3)); +#undef M + + /* Now we must reduce the coefficients. See `mul' for bounds. */ + CARRY_REDUCE(v, u); CARRY_REDUCE(z, v); + z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; +} + +/* Multiplication by r, using precomputation. */ +static void mul_r(const poly1305_ctx *ctx, felt z, const felt x) +{ + /* Initial bounds: by construction, r_i < 2^26. We assume x_i < 3*2^26. + * On exit, z_i < 2^27. + */ + + uint32 + r0 = ctx->k.u.p26.r0, + r1 = ctx->k.u.p26.r1, rr1 = ctx->k.u.p26.rr1, + r2 = ctx->k.u.p26.r2, rr2 = ctx->k.u.p26.rr2, + r3 = ctx->k.u.p26.r3, rr3 = ctx->k.u.p26.rr3, + r4 = ctx->k.u.p26.r4, rr4 = ctx->k.u.p26.rr4; + uint32 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4]; + uint64 u0, u1, u2, u3, u4; + uint64 v0, v1, v2, v3, v4; + uint32 z0, z1, z2, z3, z4; + + /* Do the multiplication: u = h x mod 2^130 - 5. We will have u_i < + * 2^26 (5 (4 - i) + i + 1) 3*2^26 = 2^52 (63 - 12 i). In all cases + * we have u_i < 63*2^52 < 2^58. Notably, u_4 < 15*2^52. + */ +#define M(x, y) ((uint64)(x)*(y)) + u0 = M(x0, r0) + M(x1, rr4) + M(x2, rr3) + M(x3, rr2) + M(x4, rr1); + u1 = M(x0, r1) + M(x1, r0) + M(x2, rr4) + M(x3, rr3) + M(x4, rr2); + u2 = M(x0, r2) + M(x1, r1) + M(x2, r0) + M(x3, rr4) + M(x4, rr3); + u3 = M(x0, r3) + M(x1, r2) + M(x2, r1) + M(x3, r0) + M(x4, rr4); + u4 = M(x0, r4) + M(x1, r3) + M(x2, r2) + M(x3, r1) + M(x4, r0); +#undef M + + /* Now we must reduce the coefficients. We do this in an approximate + * manner which avoids long data-dependency chains, but requires two + * passes. + * + * The reduced carry down from u_4 to u_0 in the first pass will be c_0 < + * 75*2^26; the remaining c_i are smaller: c_i < 2^26 (63 - 12 i). This + * leaves 0 <= v_i < 76*2^26. The carries in the second pass are bounded + * above by 135. + */ + CARRY_REDUCE(v, u); CARRY_REDUCE(z, v); + z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; +} + +#endif + +/*----- Low-level implementation for 32/64-bit targets --------------------*/ + +#ifndef POLY1305_IMPL +# define POLY1305_IMPL 11 +#endif + +#if POLY1305_IMPL == 11 + +/* Elements x of GF(2^130 - 5) are represented by 12 integers x_i: x = + * SUM_{0<=i<12} x_i 2^P_i, where P_i = SUM_{0<=j>= w; n -= w; + } +} + +/* Reduce a field-element's pieces to manageable size. */ +static void carry_reduce(uint32 u[12]) +{ + /* Initial bounds: we assume u_i < 636*2^22. On exit, u_i < 2^11. */ + + unsigned i; + uint32 c; + + /* Do sequential carry propagation (16-bit CPUs are less likely to benefit + * from instruction-level parallelism). Start at u_10; truncate it to 11 + * bits, and add the carry onto u_11. Truncate u_11 to 10 bits, and add + * five times the carry onto u_0. And so on. + * + * The carry is larger than the pieces we're leaving behind. Let c_i be + * the high portion of u_i, to be carried onto u_{i+1}. I claim that c_i < + * 2557*2^10. Then the carry /into/ any u_i is at most 12785*2^10 < 2^24 + * (allowing for the reduction as we carry from u_11 to u_0), and u_i after + * carry is bounded above by 636*2^22 + 12785*2^10 < 2557*2^20. Hence, the + * carry out is at most 2557*2^10, as claimed. + * + * Once we reach u_10 for the second time, we start with u_10 < 2^11. The + * carry into u_10 is at most 2557*2^10 < 1279*2^11 as calculated above; so + * the carry out into u_11 is at most 1280. Since u_11 < 2^10 prior to + * this carry in, all of the u_i are now bounded above by 2^11. The final + * reduction therefore only needs a conditional subtraction. + */ + { c = u[10] >> 11; u[10] &= M11; } + { u[11] += c; c = u[11] >> 10; u[11] &= M10; } + { u[0] += 5*c; c = u[0] >> 11; u[0] &= M11; } + for (i = 1; i < 5; i++) { u[i] += c; c = u[i] >> 11; u[i] &= M11; } + { u[5] += c; c = u[5] >> 10; u[5] &= M10; } + for (i = 6; i < 11; i++) { u[i] += c; c = u[i] >> 11; u[i] &= M11; } + u[11] += c; +} + +/* General multiplication. */ +static void mul(felt z, const felt x, const felt y) +{ + /* Initial bounds: we assume x_i < 3*2^11, and y_i < 2^12. On exit, + * z_i < 2^12. + */ + + uint32 u[12]; + unsigned i, j, k; + + /* Do the main multiplication. After this, we shall have + * + * { 2^22 (636 - 184 i) for 0 <= i < 6 + * u_i < { + * { 2^22 (732 - 60 i) for 6 <= i < 12 + * + * In particular, u_0 < 636*2^22 < 2^32, and u_11 < 72*2^22. + * + * The irregularly positioned pieces are annoying. Because we fold the + * reduction into the multiplication, it's also important to see where the + * reduced products fit. Finally, products don't align with the piece + * boundaries, and sometimes need to be doubled. The following table + * tracks all of this. + * + * piece width offset second + * 0 11 0 130 + * 1 11 11 141 + * 2 11 22 152 + * 3 11 33 163 + * 4 11 44 174 + * 5 10 55 185 + * 6 11 65 195 + * 7 11 76 206 + * 8 11 87 217 + * 9 11 98 228 + * 10 11 109 239 + * 11 10 120 250 + * + * The next table tracks exactly which products end up being multiplied by + * which constants and accumulated into which destination pieces. + * + * u_k = t_i r_j + 2 t_i r_j + 5 t_i r_j + 10 t_i r_j + * 0 0/0 -- 6/6 1-5/11-7 7-11/5-1 + * 1 0-1/1-0 -- 6-7/7-6 2-5/11-8 8-11/5-2 + * 2 0-2/2-0 -- 6-8/8-6 3-5/11-9 9-11/5-3 + * 3 0-3/3-0 -- 6-9/9-6 4-5/11-10 10-11/5-4 + * 4 0-4/4-0 -- 6-10/10-6 5/11 11/5 + * 5 0-5/5-0 -- 6-11/11-6 -- + * 6 0/6 6/0 1-5/5-1 -- 7-11/11-7 + * 7 0-1/7-6 6-7/1-0 2-5/5-2 -- 8-11/11-8 + * 8 0-2/8-6 6-8/2-0 3-5/5-3 -- 9-11/11-9 + * 9 0-3/9-6 6-9/3-0 4-5/5-4 -- 10-11/11-10 + * 10 0-4/10-6 6-10/4-0 5/5 -- 11/11 + * 11 0-11/11-0 -- -- -- + * + * And, finally, trying to bound the multiple of 6*2^22 in each destination + * piece is fiddly, so here's a tableau showing the calculation. + * + * k 1* + 2* + 5* +10* = 1* + 5* = + * 0 1 -- 1 10 1 21 106 + * 1 2 -- 2 8 2 18 92 + * 2 3 -- 3 6 3 15 78 + * 3 4 -- 4 4 4 12 64 + * 4 5 -- 5 2 5 9 50 + * 5 6 -- 6 -- 6 6 36 + * 6 2 5 -- 5 12 10 62 + * 7 4 4 -- 4 12 8 52 + * 8 6 3 -- 3 12 6 42 + * 9 8 2 -- 2 12 4 32 + * 10 10 1 -- 1 12 2 22 + * 11 12 -- -- -- 12 0 12 + */ + + for (i = 0; i < 12; i++) u[i] = 0; + +#define M(i, j) ((uint32)x[i]*y[j]) + + /* Product terms we must multiply by 10. */ + for (k = 0; k < 5; k++) { + for (i = k + 1; i < 6; i++) { + j = 12 + k - i; + u[k] += M(i, j) + M(j, i); + u[k + 6] += M(i + 6, j); + } + } + for (k = 0; k < 5; k++) u[k] *= 2; + for (k = 6; k < 11; k++) u[k] *= 5; + + /* Product terms we must multiply by 5. */ + for (k = 0; k < 6; k++) { + for (i = k + 6; i >= 6; i--) { + j = 12 + k - i; + u[k] += M(i, j); + } + } + for (k = 0; k < 6; k++) u[k] *= 5; + + /* Product terms we must multiply by 2. */ + for (k = 6; k < 11; k++) { + for (i = k - 5; i < 6; i++) { + j = k - i; + u[k] += M(i, j); + } + } + for (k = 6; k < 11; k++) u[k] *= 2; + + /* Remaining product terms. */ + for (k = 0; k < 6; k++) { + for (i = k; i < 6; i--) { + j = k - i; + u[k] += M(i, j); + u[k + 6] += M(i + 6, j) + M(i, j + 6); + } + } + +#undef M + + /* Do the reduction. Currently, `carry_reduce' does more than we need, but + * that's fine. + */ + carry_reduce(u); + + /* Done. Write out the answer. */ + for (i = 0; i < 12; i++) z[i] = u[i]; +} + +/* General squaring, used by `concat'. */ +static void sqr(felt z, const felt x) + { mul(z, x, x); } + +/* Multiplication by r. */ +static void mul_r(const poly1305_ctx *ctx, felt z, const felt x) + { mul(z, x, ctx->k.u.p11.r); } + +#endif + +/*----- Interface functions -----------------------------------------------*/ + +/* --- @poly1305_keyinit@ --- * + * + * Arguments: @poly1305_key *key@ = key structure to fill in + * @const void *k@ = pointer to key material + * @size_t ksz@ = length of key (must be @POLY1305_KEYSZ == 16@) + * + * Returns: --- + * + * Use: Records a Poly1305 key and performs (minimal) + * precomputations. + */ + +void poly1305_keyinit(poly1305_key *key, const void *k, size_t ksz) +{ + const octet *r = k; +#if POLY1305_IMPL == 11 + octet rr[16]; +#endif + + KSZ_ASSERT(poly1305, ksz); + +#if POLY1305_IMPL == 26 + uint32 r0 = LOAD32_L(r + 0), r1 = LOAD32_L(r + 4), + r2 = LOAD32_L(r + 8), r3 = LOAD32_L(r + 12); + + r0 &= 0x0fffffff; r1 &= 0x0ffffffc; r2 &= 0x0ffffffc; r3 &= 0x0ffffffc; + key->u.p26.r0 = P26W0(r); key->u.p26.r1 = P26W1(r); + key->u.p26.r2 = P26W2(r); key->u.p26.r3 = P26W3(r); + key->u.p26.r4 = P26W4(r); + + key->u.p26.rr1 = 5*key->u.p26.r1; key->u.p26.rr2 = 5*key->u.p26.r2; + key->u.p26.rr3 = 5*key->u.p26.r3; key->u.p26.rr4 = 5*key->u.p26.r4; +#else + memcpy(rr, r, 16); + rr[ 3] &= 0x0f; + rr[ 4] &= 0xfc; rr[ 7] &= 0x0f; + rr[ 8] &= 0xfc; rr[11] &= 0x0f; + rr[12] &= 0xfc; rr[15] &= 0x0f; + load_p11(key->u.p11.r, rr); +#endif +} + +/* --- @poly1305_macinit@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to fill in + * @const poly1305_key *key@ = pointer to key structure to use + * @const void *iv@ = pointer to mask string + * + * Returns: --- + * + * Use: Initializes a MAC context for use. The key can be discarded + * at any time. + * + * It is permitted for @iv@ to be null, though it is not then + * possible to complete the MAC computation on @ctx@. The + * resulting context may still be useful, e.g., as an operand to + * @poly1305_concat@. + */ + +void poly1305_macinit(poly1305_ctx *ctx, + const poly1305_key *key, const void *iv) +{ + const octet *s = iv; +#if POLY1305_IMPL == 26 + uint32 s0, s1, s2, s3; +#else + unsigned i; +#endif + +#if POLY1305_IMPL == 26 + if (s) { + s0 = LOAD32_L(s + 0); s1 = LOAD32_L(s + 4); + s2 = LOAD32_L(s + 8); s3 = LOAD32_L(s + 12); + ctx->u.p26.s0 = P26W0(s); ctx->u.p26.s1 = P26W1(s); + ctx->u.p26.s2 = P26W2(s); ctx->u.p26.s3 = P26W3(s); + ctx->u.p26.s4 = P26W4(s); + } + ctx->u.p26.h[0] = ctx->u.p26.h[1] = ctx->u.p26.h[2] = + ctx->u.p26.h[3] = ctx->u.p26.h[4] = 0; +#else + if (s) load_p11(ctx->u.p11.s, s); + for (i = 0; i < 12; i++) ctx->u.p11.h[i] = 0; +#endif + ctx->k = *key; + ctx->nbuf = 0; + ctx->count = 0; +} + +/* --- @poly1305_copy@ --- * + * + * Arguments: @poly1305_ctx *to@ = destination context + * @const poly1305_ctx *from@ = source context + * + * Returns: --- + * + * Use: Duplicates a Poly1305 MAC context. The destination need not + * have been initialized. Both contexts can be used + * independently afterwards. + */ + +void poly1305_copy(poly1305_ctx *ctx, const poly1305_ctx *from) + { *ctx = *from; } + +/* --- @poly1305_hash@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to update + * @const void *p@ = pointer to message data + * @size_t sz@ = length of message data + * + * Returns: --- + * + * Use: Processes a chunk of message. The message pieces may have + * arbitrary lengths, and may be empty. + */ + +static void update_full(poly1305_ctx *ctx, const octet *p) +{ + felt t; +#if POLY1305_IMPL == 26 + uint32 + m0 = LOAD32_L(p + 0), m1 = LOAD32_L(p + 4), + m2 = LOAD32_L(p + 8), m3 = LOAD32_L(p + 12); + + t[0] = ctx->u.p26.h[0] + P26W0(m); + t[1] = ctx->u.p26.h[1] + P26W1(m); + t[2] = ctx->u.p26.h[2] + P26W2(m); + t[3] = ctx->u.p26.h[3] + P26W3(m); + t[4] = ctx->u.p26.h[4] + P26W4(m) + 0x01000000; +#else + unsigned i; + + load_p11(t, p); t[11] += 0x100; + for (i = 0; i < 12; i++) t[i] += ctx->u.p11.h[i]; +#endif + + mul_r(ctx, ctx->u.P.h, t); + ctx->count++; +} + +void poly1305_hash(poly1305_ctx *ctx, const void *p, size_t sz) +{ + const octet *pp = p; + size_t n; + + if (ctx->nbuf) { + if (sz < 16 - ctx->nbuf) { + memcpy(ctx->buf + ctx->nbuf, p, sz); + ctx->nbuf += sz; + return; + } + n = 16 - ctx->nbuf; + memcpy(ctx->buf + ctx->nbuf, pp, n); + update_full(ctx, ctx->buf); + pp += n; sz -= n; + } + while (sz >= 16) { + update_full(ctx, pp); + pp += 16; sz -= 16; + } + if (sz) memcpy(ctx->buf, pp, sz); + ctx->nbuf = sz; +} + +/* --- @poly1305_flush@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to flush + * + * Returns: --- + * + * Use: Forces any buffered message data in the context to be + * processed. This has no effect if the message processed so + * far is a whole number of blocks. Flushing is performed + * automatically by @poly1305_done@, but it may be necessary to + * force it by hand when using @poly1305_concat@. + * + * Flushing a partial block has an observable effect on the + * computation: the resulting state is (with high probability) + * dissimilar to any state reachable with a message which is a + * whole number of blocks long. + */ + +void poly1305_flush(poly1305_ctx *ctx) +{ + felt t; +#if POLY1305_IMPL == 26 + uint32 m0, m1, m2, m3; +#else + unsigned i; +#endif + + if (!ctx->nbuf) return; + ctx->buf[ctx->nbuf++] = 1; memset(ctx->buf + ctx->nbuf, 0, 16 - ctx->nbuf); +#if POLY1305_IMPL == 26 + m0 = LOAD32_L(ctx->buf + 0); m1 = LOAD32_L(ctx->buf + 4); + m2 = LOAD32_L(ctx->buf + 8); m3 = LOAD32_L(ctx->buf + 12); + + t[0] = ctx->u.p26.h[0] + P26W0(m); + t[1] = ctx->u.p26.h[1] + P26W1(m); + t[2] = ctx->u.p26.h[2] + P26W2(m); + t[3] = ctx->u.p26.h[3] + P26W3(m); + t[4] = ctx->u.p26.h[4] + P26W4(m); +#else + load_p11(t, ctx->buf); + for (i = 0; i < 12; i++) t[i] += ctx->u.p11.h[i]; +#endif + + mul_r(ctx, ctx->u.P.h, t); + ctx->count++; + ctx->nbuf = 0; +} + +/* --- @poly1305_concat@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = destination context + * @const poly1305_ctx *prefix, *suffix@ = two operand contexts + * + * Returns: --- + * + * Use: The two operand contexts @prefix@ and @suffix@ represent + * processing of two messages %$m$% and %$m'$%; the effect is to + * set @ctx@ to the state corresponding to their concatenation + * %$m \cat m'$%. + * + * All three contexts must have been initialized using the same + * key value (though not necessarily from the same key + * structure). The mask values associated with the input + * contexts are irrelevant. The @prefix@ message %$m$% must be + * a whole number of blocks long: this can be arranged by + * flushing the context. The @suffix@ message need not be a + * whole number of blocks long. All of the contexts remain + * operational and can be used independently afterwards. + */ + +void poly1305_concat(poly1305_ctx *ctx, + const poly1305_ctx *prefix, const poly1305_ctx *suffix) +{ + /* Assume that lengths are public, so it's safe to behave conditionally on + * the bits of ctx->count. + */ + unsigned long n; + unsigned i; + felt x; +#if POLY1305_IMPL == 26 + uint32 x0, x1, x2, x3, x4, y0, y1, y2, y3, y4; +#else + uint32 y[12]; +#endif + + /* We can only concatenate if the prefix is block-aligned. */ + assert(!prefix->nbuf); + + /* The hash for a message m = m_{k-1} m_{k-2} ... m_1 m_0 is h_r(m) = + * SUM_{0<=icount; + x[0] = 1; +#if POLY1305_IMPL == 26 + x[1] = x[2] = x[3] = x[4] = 0; +#else + for (i = 1; i < 12; i++) x[i] = 0; +#endif +#define BIT (1 << (ULONG_BITS - 1)) + if (n) { + i = ULONG_BITS; + while (!(n & BIT)) { n <<= 1; i--; } + mul_r(prefix, x, x); n <<= 1; i--; + while (i--) { sqr(x, x); if (n & BIT) mul_r(prefix, x, x); n <<= 1; } + } +#undef BIT + mul(x, prefix->u.P.h, x); + + /* Add on the suffix hash. */ +#if POLY1305_IMPL == 26 + /* We're going to add the two hashes elementwise. Both h' = h_r(m') and + * x = r^{k'} h_r(m) are bounded above by 2^27, so the sum will be bounded + * by 2^28; but this is too large to leave in the accumulator. (Strictly, + * we could get away with it, but the caller can in theory chain an + * arbitrary number of concatenations and expect us to cope, and we'd + * definitely overflow eventually.) So we reduce. Since the excess is so + * small, a single round of `CARRY_REDUCE' is enough. + */ + x0 = x[0] + suffix->u.p26.h[0]; x1 = x[1] + suffix->u.p26.h[1]; + x2 = x[2] + suffix->u.p26.h[2]; x3 = x[3] + suffix->u.p26.h[3]; + x4 = x[4] + suffix->u.p26.h[4]; + CARRY_REDUCE(y, x); + ctx->u.p26.h[0] = y0; ctx->u.p26.h[1] = y1; ctx->u.p26.h[2] = y2; + ctx->u.p26.h[3] = y3; ctx->u.p26.h[4] = y4; +#else + /* We'll add the two hashes elementwise and have to reduce again. The + * numbers are different, but the reasoning is basically the same. + */ + for (i = 0; i < 12; i++) y[i] = x[i] + suffix->u.p11.h[i]; + carry_reduce(y); + for (i = 0; i < 12; i++) ctx->u.p11.h[i] = y[i]; +#endif + + /* Copy the remaining pieces of the context to set up the result. */ + if (ctx != suffix) { + memcpy(ctx->buf, suffix->buf, suffix->nbuf); + ctx->nbuf = suffix->nbuf; + } + ctx->count = prefix->count + suffix->count; +} + +/* --- @poly1305_done@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to finish + * @void *h@ = buffer to write the tag to + * + * Returns: --- + * + * Use: Completes a Poly1305 MAC tag computation. + */ + +void poly1305_done(poly1305_ctx *ctx, void *h) +{ + octet *p = h; + +#if POLY1305_IMPL == 26 + uint32 m_sub, t, c; + uint32 h0, h1, h2, h3, h4, hh0, hh1, hh2, hh3, hh4; + + /* If there's anything left over in the buffer, pad it to form a final + * coefficient and update the evaluation one last time. + */ + poly1305_flush(ctx); + + /* Collect the final hash state. */ + h0 = ctx->u.p26.h[0]; + h1 = ctx->u.p26.h[1]; + h2 = ctx->u.p26.h[2]; + h3 = ctx->u.p26.h[3]; + h4 = ctx->u.p26.h[4]; + + /* Reduce the final value mod 2^130 - 5. First pass: set h <- h + + * 5 floor(h/2^130). After this, the low pieces of h will be normalized: + * 0 <= h_i < 2^26 for 0 <= i < 4; and 0 <= h_4 < 2^26 + 1. In the + * (highly unlikely) event that h_4 >= 2^26, set c and truncate to 130 + * bits. + */ + c = h4 >> 26; h4 &= M26; + h0 += 5*c; c = h0 >> 26; h0 &= M26; + h1 += c; c = h1 >> 26; h1 &= M26; + h2 += c; c = h2 >> 26; h2 &= M26; + h3 += c; c = h3 >> 26; h3 &= M26; + h4 += c; c = h4 >> 26; h4 &= M26; + + /* Calculate h' = h - (2^130 - 5). If h' >= 0 then t ends up 1; otherwise + * it's zero. + */ + t = h0 + 5; hh0 = t&M26; t >>= 26; + t += h1; hh1 = t&M26; t >>= 26; + t += h2; hh2 = t&M26; t >>= 26; + t += h3; hh3 = t&M26; t >>= 26; + t += h4; hh4 = t&M26; t >>= 26; + + /* Keep the subtraction result above if t or c is set. */ + m_sub = -(t | c); + h0 = (hh0&m_sub) | (h0&~m_sub); + h1 = (hh1&m_sub) | (h1&~m_sub); + h2 = (hh2&m_sub) | (h2&~m_sub); + h3 = (hh3&m_sub) | (h3&~m_sub); + h4 = (hh4&m_sub) | (h4&~m_sub); + + /* Add the mask onto the hash result. */ + t = h0 + ctx->u.p26.s0; h0 = t&M26; t >>= 26; + t += h1 + ctx->u.p26.s1; h1 = t&M26; t >>= 26; + t += h2 + ctx->u.p26.s2; h2 = t&M26; t >>= 26; + t += h3 + ctx->u.p26.s3; h3 = t&M26; t >>= 26; + t += h4 + ctx->u.p26.s4; h4 = t&M26; t >>= 26; + + /* Convert this mess back into 32-bit words. We lose the top two bits, + * but that's fine. + */ + h0 = (h0 >> 0) | ((h1 & 0x0000003f) << 26); + h1 = (h1 >> 6) | ((h2 & 0x00000fff) << 20); + h2 = (h2 >> 12) | ((h3 & 0x0003ffff) << 14); + h3 = (h3 >> 18) | ((h4 & 0x00ffffff) << 8); + + /* All done. */ + STORE32_L(p + 0, h0); STORE32_L(p + 4, h1); + STORE32_L(p + 8, h2); STORE32_L(p + 12, h3); +#else + uint16 hh[12], hi[12], c, t, m_sub; + uint32 a; + unsigned i, j, n; + + /* If there's anything left over in the buffer, pad it to form a final + * coefficient and update the evaluation one last time. + */ + poly1305_flush(ctx); + + /* Collect the final hash state. */ + for (i = 0; i < 12; i++) hh[i] = ctx->u.p11.h[i]; + + /* Reduce the final value mod 2^130 - 5. First pass: set h <- h + + * 5 floor(h/2^130). After this, the low pieces of h will be normalized: + * 0 <= h_i < 2^{w_i} for 0 <= i < 11; and 0 <= h_{11} < 2^10 + 1. In the + * (highly unlikely) event that h_{11} >= 2^10, set c and truncate to 130 + * bits. + */ + c = 5*(hh[11] >> 10); hh[11] &= M10; + for (i = 0; i < 12; i++) { + if (i == 5 || i == 11) { c += hh[i]; hh[i] = c&M10; c >>= 10; } + else { c += hh[i]; hh[i] = c&M11; c >>= 11; } + } + + /* Calculate h' = h - (2^130 - 5). If h' >= 0 then t ends up 1; otherwise + * it's zero. + */ + for (i = 0, t = 5; i < 12; i++) { + t += hh[i]; + if (i == 5 || i == 11) { hi[i] = t&M10; t >>= 10; } + else { hi[i] = t&M11; t >>= 11; } + } + + /* Keep the subtraction result above if t or c is set. */ + m_sub = -(t | c); + for (i = 0; i < 12; i++) hh[i] = (hi[i]&m_sub) | (hh[i]&~m_sub); + + /* Add the mask onto the hash result. */ + for (i = 0, t = 0; i < 12; i++) { + t += hh[i] + ctx->u.p11.s[i]; + if (i == 5 || i == 11) { hh[i] = t&M10; t >>= 10; } + else { hh[i] = t&M11; t >>= 11; } + } + + /* Convert this mess back into bytes. We lose the top two bits, but that's + * fine. + */ + for (i = j = n = 0, a = 0; i < 16; i++) { + if (n < 8) { + a |= hh[j] << n; + n += (j == 5 || j == 11) ? 10 : 11; + j++; + } + p[i] = a&0xff; a >>= 8; n -= 8; + } + +#endif +} + +/*----- Test rig ----------------------------------------------------------*/ + +#ifdef TEST_RIG + +#include + +static int vrf_hash(dstr v[]) +{ + poly1305_key k; + poly1305_ctx ctx; + dstr t = DSTR_INIT; + unsigned i, j; + + if (v[0].len != 16) { fprintf(stderr, "bad key length\n"); exit(2); } + if (v[1].len != 16) { fprintf(stderr, "bad mask length\n"); exit(2); } + if (v[3].len != 16) { fprintf(stderr, "bad tag length\n"); exit(2); } + dstr_ensure(&t, 16); t.len = 16; + + poly1305_keyinit(&k, v[0].buf, v[0].len); + for (i = 0; i < v[2].len; i++) { + for (j = i; j < v[2].len; j++) { + poly1305_macinit(&ctx, &k, v[1].buf); + poly1305_hash(&ctx, v[2].buf, i); + poly1305_hash(&ctx, v[2].buf + i, j - i); + poly1305_hash(&ctx, v[2].buf + j, v[2].len - j); + poly1305_done(&ctx, t.buf); + if (memcmp(t.buf, v[3].buf, 16) != 0) { + fprintf(stderr, "failed..."); + fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr); + fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr); + fprintf(stderr, "\n\tmsg = "); type_hex.dump(&v[2], stderr); + fprintf(stderr, "\n\texp = "); type_hex.dump(&v[3], stderr); + fprintf(stderr, "\n\tcalc = "); type_hex.dump(&t, stderr); + fprintf(stderr, "\n\tsplits = 0 .. %u .. %u .. %lu\n", + i, j, (unsigned long)v[1].len); + return (0); + } + } + } + return (1); +} + +static int vrf_cat(dstr v[]) +{ + poly1305_key k; + poly1305_ctx ctx, cc[3]; + dstr t = DSTR_INIT; + unsigned i; + int ok = 1; + + if (v[0].len != 16) { fprintf(stderr, "bad key length\n"); exit(2); } + if (v[1].len != 16) { fprintf(stderr, "bad mask length\n"); exit(2); } + if (v[5].len != 16) { fprintf(stderr, "bad tag length\n"); exit(2); } + dstr_ensure(&t, 16); t.len = 16; + + poly1305_keyinit(&k, v[0].buf, v[0].len); + poly1305_macinit(&ctx, &k, v[1].buf); + for (i = 0; i < 3; i++) { + poly1305_macinit(&cc[i], &k, 0); + poly1305_hash(&cc[i], v[i + 2].buf, v[i + 2].len); + } + for (i = 0; i < 2; i++) { + if (!i) { + poly1305_concat(&ctx, &cc[1], &cc[2]); + poly1305_concat(&ctx, &cc[0], &ctx); + } else { + poly1305_concat(&ctx, &cc[0], &cc[1]); + poly1305_concat(&ctx, &ctx, &cc[2]); + } + poly1305_done(&ctx, t.buf); + if (memcmp(t.buf, v[5].buf, 16) != 0) { + fprintf(stderr, "failed..."); + fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr); + fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr); + fprintf(stderr, "\n\tmsg[0] = "); type_hex.dump(&v[2], stderr); + fprintf(stderr, "\n\tmsg[1] = "); type_hex.dump(&v[3], stderr); + fprintf(stderr, "\n\tmsg[2] = "); type_hex.dump(&v[4], stderr); + fprintf(stderr, "\n\texp = "); type_hex.dump(&v[5], stderr); + fprintf(stderr, "\n\tcalc = "); type_hex.dump(&t, stderr); + fprintf(stderr, "\n\tassoc = %s\n", + !i ? "msg[0] || (msg[1] || msg[2])" : + "(msg[0] || msg[1]) || msg[2]"); + ok = 0; + } + } + return (ok); +} + +static const struct test_chunk tests[] = { + { "poly1305-hash", vrf_hash, + { &type_hex, &type_hex, &type_hex, &type_hex } }, + { "poly1305-cat", vrf_cat, + { &type_hex, &type_hex, &type_hex, &type_hex, &type_hex, &type_hex } }, + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + test_run(argc, argv, tests, SRCDIR "/t/poly1305"); + return (0); +} + +#endif + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/symm/poly1305.h b/symm/poly1305.h new file mode 100644 index 00000000..bcc81c29 --- /dev/null +++ b/symm/poly1305.h @@ -0,0 +1,225 @@ +/* -*-c-*- + * + * Poly1305 message authentication code + * + * (c) 2017 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Notes on Poly1305 -------------------------------------------------* + * + * The Poly1305 message authentication code was designed by Daniel Bernstein + * in 2004. It's a heavily performance-engineered Carter--Wegman MAC, based + * on polynomial evaluation in %$\F = \mathrm{GF}(2^{130} - 5)$%. Some of + * the performance engineering is out-of-date, being there to support + * implementation techniques which are no longer relevant, but it still runs + * very quickly. + * + * The key %$r$% is an element of %$\F$%. Messages are encoded as a sequence + * %$m_0, m_1, \ldots, m_{n-1}$% of of elements of %$\F$%. A raw hash is + * calculated as %$h_0 = \sum_{0\le i + +#ifndef CATACOMB_KEYSZ_H +# include "keysz.h" +#endif + +/*----- Constants ---------------------------------------------------------*/ + +extern const octet poly1305_keysz[]; + +#define POLY1305_BLKSZ 16u +#define POLY1305_KEYSZ 16u +#define POLY1305_MASKSZ 16u + +/*----- Data structures ---------------------------------------------------*/ + +typedef struct poly1305_key { + union { + struct { uint32 r0, r1, r2, r3, r4, rr1, rr2, rr3, rr4; } p26; + struct { uint16 r[12]; } p11; + } u; +} poly1305_key; + +typedef struct poly1305_ctx { + poly1305_key k; + union { + struct { uint32 s0, s1, s2, s3, s4; uint32 h[5]; } p26; + struct { uint16 s[12], h[12]; } p11; + } u; + unsigned long count; + unsigned nbuf; + octet buf[16]; +} poly1305_ctx; + +/*----- Functions provided ------------------------------------------------*/ + +/* --- @poly1305_keyinit@ --- * + * + * Arguments: @poly1305_key *key@ = key structure to fill in + * @const void *k@ = pointer to key material + * @size_t ksz@ = length of key (must be @POLY1305_KEYSZ == 16@) + * + * Returns: --- + * + * Use: Records a Poly1305 key and performs (minimal) + * precomputations. + */ + +extern void poly1305_keyinit(poly1305_key */*key*/, + const void */*k*/, size_t /*sz*/); + +/* --- @poly1305_macinit@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to fill in + * @const poly1305_key *key@ = pointer to key structure to use + * @const void *iv@ = pointer to mask string + * + * Returns: --- + * + * Use: Initializes a MAC context for use. The key can be discarded + * at any time. + * + * It is permitted for @iv@ to be null, though it is not then + * possible to complete the MAC computation on @ctx@. The + * resulting context may still be useful, e.g., as an operand to + * @poly1305_concat@. + */ + +extern void poly1305_macinit(poly1305_ctx */*ctx*/, + const poly1305_key */*key*/, + const void */*iv*/); + +/* --- @poly1305_copy@ --- * + * + * Arguments: @poly1305_ctx *to@ = destination context + * @const poly1305_ctx *from@ = source context + * + * Returns: --- + * + * Use: Duplicates a Poly1305 MAC context. The destination need not + * have been initialized. Both contexts can be used + * independently afterwards. + */ + +extern void poly1305_copy(poly1305_ctx */*to*/, + const poly1305_ctx */*from*/); + +/* --- @poly1305_hash@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to update + * @const void *p@ = pointer to message data + * @size_t sz@ = length of message data + * + * Returns: --- + * + * Use: Processes a chunk of message. The message pieces may have + * arbitrary lengths, and may be empty. + */ + +extern void poly1305_hash(poly1305_ctx */*ctx*/, + const void */*p*/, size_t /*sz*/); + +/* --- @poly1305_flush@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to flush + * + * Returns: --- + * + * Use: Forces any buffered message data in the context to be + * processed. This has no effect if the message processed so + * far is a whole number of blocks. Flushing is performed + * automatically by @poly1305_done@, but it may be necessary to + * force it by hand when using @poly1305_concat@. + * + * Flushing a partial block has an observable effect on the + * computation: the resulting state is (with high probability) + * dissimilar to any state reachable with a message which is a + * whole number of blocks long. + */ + +extern void poly1305_flush(poly1305_ctx */*ctx*/); + +/* --- @poly1305_concat@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = destination context + * @const poly1305_ctx *prefix, *suffix@ = two operand contexts + * + * Returns: --- + * + * Use: The two operand contexts @prefix@ and @suffix@ represent + * processing of two messages %$m$% and %$m'$%; the effect is to + * set @ctx@ to the state corresponding to their concatenation + * %$m \cat m'$%. + * + * All three contexts must have been initialized using the same + * key value (though not necessarily from the same key + * structure). The mask values associated with the input + * contexts are irrelevant. The @prefix@ message %$m$% must be + * a whole number of blocks long: this can be arranged by + * flushing the context. The @suffix@ message need not be a + * whole number of blocks long. All of the contexts remain + * operational and can be used independently afterwards. + */ + +extern void poly1305_concat(poly1305_ctx */*ctx*/, + const poly1305_ctx */*prefix*/, + const poly1305_ctx */*suffix*/); + +/* --- @poly1305_done@ --- * + * + * Arguments: @poly1305_ctx *ctx@ = MAC context to finish + * @void *h@ = buffer to write the tag to + * + * Returns: --- + * + * Use: Completes a Poly1305 MAC tag computation. + */ + +extern void poly1305_done(poly1305_ctx */*ctx*/, void */*h*/); + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/symm/t/poly1305 b/symm/t/poly1305 new file mode 100644 index 00000000..6f743966 --- /dev/null +++ b/symm/t/poly1305 @@ -0,0 +1,61 @@ +poly1305-hash { + ## This one's from Daniel J. Bernstein, `Cryptography in NaCL', 2009-03-10, + ## https://cr.yp.to/highspeed/naclcrypto-20090310.pdf + eea6a7251c1e72916d11c2cb214d3c25 2539121d8e234e652d651fa4c8cff880 + 8e993b9f48681273c29650ba32fc76ce48332ea7164d96a4476fb8c531a1186ac0dfc17c98dce87b4da7f011ec48c97271d2c20f9b928fe2270d6fb863d51738b48eeee314a7cc8ab932164548e526ae90224368517acfeabd6bb3732bc0e9da99832b61ca01b6de56244a9e88d5f9b37973f622a43d14a6599b1f654cb45a74e355a5 + f3ffc7703f9400e52a7dfb4b3d3305d9; + + ## Test vectors from RFC7539. + 85d6be7857556d337f4452fe42d506a8 0103808afb0db2fd4abff6af4149f51b + 43727970746f6772617068696320466f72756d2052657365617263682047726f7570 + a8061dc1305136c6c22b8baf0c0127a9; + 00000000000000000000000000000000 00000000000000000000000000000000 + 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 00000000000000000000000000000000; + 00000000000000000000000000000000 36e5f6b5c5e06070f0efca96227a863e + 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f + 36e5f6b5c5e06070f0efca96227a863e; + 36e5f6b5c5e06070f0efca96227a863e 00000000000000000000000000000000 + 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f + f3477e7cd95417af89a6b8794c310cf0; + 1c9240a5eb55d38af333888604f6b5f0 473917c1402b80099dca5cbc207075c0 + 2754776173206272696c6c69672c20616e642074686520736c6974687920746f7665730a446964206779726520616e642067696d626c6520696e2074686520776162653a0a416c6c206d696d737920776572652074686520626f726f676f7665732c0a416e6420746865206d6f6d65207261746873206f757467726162652e + 4541669a7eaaee61e708dc7cbcc5eb62; + 02000000000000000000000000000000 00000000000000000000000000000000 + ffffffffffffffffffffffffffffffff + 03000000000000000000000000000000; + 02000000000000000000000000000000 ffffffffffffffffffffffffffffffff + 02000000000000000000000000000000 + 03000000000000000000000000000000; + 01000000000000000000000000000000 00000000000000000000000000000000 + fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000 + 05000000000000000000000000000000; + 01000000000000000000000000000000 00000000000000000000000000000000 + fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101 + 00000000000000000000000000000000; + 02000000000000000000000000000000 00000000000000000000000000000000 + fdffffffffffffffffffffffffffffff + faffffffffffffffffffffffffffffff; + 01000000000000000400000000000000 00000000000000000000000000000000 + e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000 + 14000000000000005500000000000000; + 01000000000000000400000000000000 00000000000000000000000000000000 + e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000 + 13000000000000000000000000000000; + + ## This is a test of reduction which I constructed by hand. + ab59ca0d7cb5fb09b8065a01f03f310a 00000000000000000000000000000000 + 8daf7e633cf2fc399143a09fd109aa4d + 04000000000000000000000000000000; +} + +poly1305-cat { + 36e5f6b5c5e06070f0efca96227a863e 00000000000000000000000000000000 + 416e79207375626d697373696f6e2074 6f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c 207768696368206172652061646472657373656420746f + f3477e7cd95417af89a6b8794c310cf0; + eea6a7251c1e72916d11c2cb214d3c25 2539121d8e234e652d651fa4c8cff880 + 8e993b9f48681273c29650ba32fc76ce48332ea7164d96a4476fb8c531a1186ac0dfc17c98dce87b4da7f011ec48c97271d2c20f9b928fe2270d6fb863d51738 + b48eeee314a7cc8ab932164548e526ae90224368517acfeabd6bb3732bc0e9da99832b61ca01b6de56244a9e88d5f9b3 + 7973f622a43d14a6599b1f654cb45a74e355a5 + f3ffc7703f9400e52a7dfb4b3d3305d9; +} -- 2.11.0