X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/ee39a683a2b623a1da0747ec20f20b63470a2db6..HEAD:/math/f25519.c diff --git a/math/f25519.c b/math/f25519.c index 220a73c8..a0f40fa0 100644 --- a/math/f25519.c +++ b/math/f25519.c @@ -29,17 +29,20 @@ #include "config.h" +#include "ct.h" #include "f25519.h" /*----- Basic setup -------------------------------------------------------*/ +typedef f25519_piece piece; + #if F25519_IMPL == 26 /* Elements x of GF(2^255 - 19) are represented by ten signed integers x_i: x * = SUM_{0<=i<10} x_i 2^ceil(51i/2), mostly following Bernstein's original * paper. */ -typedef int32 piece; typedef int64 dblpiece; + typedef int64 dblpiece; typedef uint32 upiece; typedef uint64 udblpiece; #define P p26 #define PIECEWD(i) ((i)%2 ? 25 : 26) @@ -47,7 +50,6 @@ typedef uint32 upiece; typedef uint64 udblpiece; #define M26 0x03ffffffu #define M25 0x01ffffffu -#define B26 0x04000000u #define B25 0x02000000u #define B24 0x01000000u @@ -73,18 +75,17 @@ typedef uint32 upiece; typedef uint64 udblpiece; * except for pieces 5, 10, 15, 20, and 25 which have 9 bits. */ -typedef int16 piece; typedef int32 dblpiece; + typedef int32 dblpiece; typedef uint16 upiece; typedef uint32 udblpiece; #define P p10 #define PIECEWD(i) \ ((i) == 5 || (i) == 10 || (i) == 15 || (i) == 20 || (i) == 25 ? 9 : 10) #define NPIECE 26 -#define B10 0x0400 -#define B9 0x200 -#define B8 0x100 #define M10 0x3ff #define M9 0x1ff +#define B9 0x200 +#define B8 0x100 #endif @@ -182,18 +183,18 @@ void f25519_load(f25519 *z, const octet xv[32]) * and lower bounds are achievable. * * All of the x_i at this point are positive, so we don't need to do - * anything wierd when masking them. + * anything weird when masking them. */ - b = x9&B24; c = 19&((b >> 19) - (b >> 24)); x9 -= b << 1; - b = x8&B25; x9 += b >> 25; x8 -= b << 1; - b = x7&B24; x8 += b >> 24; x7 -= b << 1; - b = x6&B25; x7 += b >> 25; x6 -= b << 1; - b = x5&B24; x6 += b >> 24; x5 -= b << 1; - b = x4&B25; x5 += b >> 25; x4 -= b << 1; - b = x3&B24; x4 += b >> 24; x3 -= b << 1; - b = x2&B25; x3 += b >> 25; x2 -= b << 1; - b = x1&B24; x2 += b >> 24; x1 -= b << 1; - b = x0&B25; x1 += (b >> 25) + (x0 >> 26); x0 = (x0&M26) - (b << 1); + b = x9&B24; c = 19&((b >> 19) - (b >> 24)); x9 -= b << 1; + b = x8&B25; x9 += b >> 25; x8 -= b << 1; + b = x7&B24; x8 += b >> 24; x7 -= b << 1; + b = x6&B25; x7 += b >> 25; x6 -= b << 1; + b = x5&B24; x6 += b >> 24; x5 -= b << 1; + b = x4&B25; x5 += b >> 25; x4 -= b << 1; + b = x3&B24; x4 += b >> 24; x3 -= b << 1; + b = x2&B25; x3 += b >> 25; x2 -= b << 1; + b = x1&B24; x2 += b >> 24; x1 -= b << 1; + b = x0&B25; x1 += (b >> 25) + (x0 >> 26); x0 = (x0&M26) - (b << 1); x0 += c; /* And with that, we're done. */ @@ -498,8 +499,114 @@ void f25519_sub(f25519 *z, const f25519 *x, const f25519 *y) #endif } +/* --- @f25519_neg@ --- * + * + * Arguments: @f25519 *z@ = where to put the result (may alias @x@) + * @const f25519 *x@ = an operand + * + * Returns: --- + * + * Use: Set @z = -x@. + */ + +void f25519_neg(f25519 *z, const f25519 *x) +{ +#if F25519_IMPL == 26 + z->P[0] = -x->P[0]; z->P[1] = -x->P[1]; + z->P[2] = -x->P[2]; z->P[3] = -x->P[3]; + z->P[4] = -x->P[4]; z->P[5] = -x->P[5]; + z->P[6] = -x->P[6]; z->P[7] = -x->P[7]; + z->P[8] = -x->P[8]; z->P[9] = -x->P[9]; +#elif F25519_IMPL == 10 + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i]; +#endif +} + /*----- Constant-time utilities -------------------------------------------*/ +/* --- @f25519_pick2@ --- * + * + * Arguments: @f25519 *z@ = where to put the result (may alias @x@ or @y@) + * @const f25519 *x, *y@ = two operands + * @uint32 m@ = a mask + * + * Returns: --- + * + * Use: If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set + * @z = x@. If @m@ has some other value, then scramble @z@ in + * an unhelpful way. + */ + +void f25519_pick2(f25519 *z, const f25519 *x, const f25519 *y, uint32 m) +{ + mask32 mm = FIX_MASK32(m); + +#if F25519_IMPL == 26 + z->P[0] = PICK2(x->P[0], y->P[0], mm); + z->P[1] = PICK2(x->P[1], y->P[1], mm); + z->P[2] = PICK2(x->P[2], y->P[2], mm); + z->P[3] = PICK2(x->P[3], y->P[3], mm); + z->P[4] = PICK2(x->P[4], y->P[4], mm); + z->P[5] = PICK2(x->P[5], y->P[5], mm); + z->P[6] = PICK2(x->P[6], y->P[6], mm); + z->P[7] = PICK2(x->P[7], y->P[7], mm); + z->P[8] = PICK2(x->P[8], y->P[8], mm); + z->P[9] = PICK2(x->P[9], y->P[9], mm); +#elif F25519_IMPL == 10 + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm); +#endif +} + +/* --- @f25519_pickn@ --- * + * + * Arguments: @f25519 *z@ = where to put the result + * @const f25519 *v@ = a table of entries + * @size_t n@ = the number of entries in @v@ + * @size_t i@ = an index + * + * Returns: --- + * + * Use: If @0 <= i < n < 32@ then set @z = v[i]@. If @n >= 32@ then + * do something unhelpful; otherwise, if @i >= n@ then set @z@ + * to zero. + */ + +void f25519_pickn(f25519 *z, const f25519 *v, size_t n, size_t i) +{ + uint32 b = (uint32)1 << (31 - i); + mask32 m; + +#if F25519_IMPL == 26 + z->P[0] = z->P[1] = z->P[2] = z->P[3] = z->P[4] = + z->P[5] = z->P[6] = z->P[7] = z->P[8] = z->P[9] = 0; + while (n--) { + m = SIGN(b); + CONDPICK(z->P[0], v->P[0], m); + CONDPICK(z->P[1], v->P[1], m); + CONDPICK(z->P[2], v->P[2], m); + CONDPICK(z->P[3], v->P[3], m); + CONDPICK(z->P[4], v->P[4], m); + CONDPICK(z->P[5], v->P[5], m); + CONDPICK(z->P[6], v->P[6], m); + CONDPICK(z->P[7], v->P[7], m); + CONDPICK(z->P[8], v->P[8], m); + CONDPICK(z->P[9], v->P[9], m); + v++; b <<= 1; + } +#elif F25519_IMPL == 10 + unsigned j; + + for (j = 0; j < NPIECE; j++) z->P[j] = 0; + while (n--) { + m = SIGN(b); + for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m); + v++; b <<= 1; + } +#endif +} + /* --- @f25519_condswap@ --- * * * Arguments: @f25519 *x, *y@ = two operands @@ -533,6 +640,49 @@ void f25519_condswap(f25519 *x, f25519 *y, uint32 m) #endif } +/* --- @f25519_condneg@ --- * + * + * Arguments: @f25519 *z@ = where to put the result (may alias @x@) + * @const f25519 *x@ = an operand + * @uint32 m@ = a mask + * + * Returns: --- + * + * Use: If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set + * @z = -x@. If @m@ has some other value then scramble @z@ in + * an unhelpful way. + */ + +void f25519_condneg(f25519 *z, const f25519 *x, uint32 m) +{ +#ifdef NEG_TWOC + mask32 m_xor = FIX_MASK32(m); + piece m_add = m&1; +# define CONDNEG(x) (((x) ^ m_xor) + m_add) +#else + int s = PICK2(-1, +1, m); +# define CONDNEG(x) (s*(x)) +#endif + +#if F25519_IMPL == 26 + z->P[0] = CONDNEG(x->P[0]); + z->P[1] = CONDNEG(x->P[1]); + z->P[2] = CONDNEG(x->P[2]); + z->P[3] = CONDNEG(x->P[3]); + z->P[4] = CONDNEG(x->P[4]); + z->P[5] = CONDNEG(x->P[5]); + z->P[6] = CONDNEG(x->P[6]); + z->P[7] = CONDNEG(x->P[7]); + z->P[8] = CONDNEG(x->P[8]); + z->P[9] = CONDNEG(x->P[9]); +#elif F25519_IMPL == 10 + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]); +#endif + +#undef CONDNEG +} + /*----- Multiplication ----------------------------------------------------*/ #if F25519_IMPL == 26 @@ -606,7 +756,7 @@ static void carry_reduce(dblpiece x[NPIECE]) * signed. */ - /*For each piece, we bias it so that floor division (as done by an + /* For each piece, we bias it so that floor division (as done by an * arithmetic right shift) and modulus (as done by bitwise-AND) does the * right thing. */ @@ -913,11 +1063,124 @@ void f25519_inv(f25519 *z, const f25519 *x) #undef SQRN } +/* --- @f25519_quosqrt@ --- * + * + * Arguments: @f25519 *z@ = where to put the result (may alias @x@ or @y@) + * @const f25519 *x, *y@ = two operands + * + * Returns: Zero if successful, @-1@ if %$x/y$% is not a square. + * + * Use: Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%. + * If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x + * \ne 0$% then the operation fails. If you wanted a specific + * square root then you'll have to pick it yourself. + */ + +static const piece sqrtm1_pieces[NPIECE] = { +#if F25519_IMPL == 26 + -32595792, -7943725, 9377950, 3500415, 12389472, + -272473, -25146209, -2005654, 326686, 11406482 +#elif F25519_IMPL == 10 + 176, -88, 161, 157, -485, -196, -231, -220, -416, + -169, -255, 50, 189, -89, -266, -32, 202, -511, + 423, 357, 248, -249, 80, 288, 50, 174 +#endif +}; +#define SQRTM1 ((const f25519 *)sqrtm1_pieces) + +int f25519_quosqrt(f25519 *z, const f25519 *x, const f25519 *y) +{ + f25519 t, u, v, w, t15; + octet xb[32], b0[32], b1[32]; + int32 rc = -1; + mask32 m; + unsigned i; + +#define SQRN(z, x, n) do { \ + f25519_sqr((z), (x)); \ + for (i = 1; i < (n); i++) f25519_sqr((z), (z)); \ +} while (0) + + /* This is a bit tricky; the algorithm is loosely based on Bernstein, Duif, + * Lange, Schwabe, and Yang, `High-speed high-security signatures', + * 2011-09-26, https://ed25519.cr.yp.to/ed25519-20110926.pdf. + */ + f25519_mul(&v, x, y); + + /* Now for an addition chain. */ /* step | value */ + f25519_sqr(&u, &v); /* 1 | 2 */ + f25519_mul(&t, &u, &v); /* 2 | 3 */ + SQRN(&u, &t, 2); /* 4 | 12 */ + f25519_mul(&t15, &u, &t); /* 5 | 15 */ + f25519_sqr(&u, &t15); /* 6 | 30 */ + f25519_mul(&t, &u, &v); /* 7 | 31 = 2^5 - 1 */ + SQRN(&u, &t, 5); /* 12 | 2^10 - 2^5 */ + f25519_mul(&t, &u, &t); /* 13 | 2^10 - 1 */ + SQRN(&u, &t, 10); /* 23 | 2^20 - 2^10 */ + f25519_mul(&u, &u, &t); /* 24 | 2^20 - 1 */ + SQRN(&u, &u, 10); /* 34 | 2^30 - 2^10 */ + f25519_mul(&t, &u, &t); /* 35 | 2^30 - 1 */ + f25519_sqr(&u, &t); /* 36 | 2^31 - 2 */ + f25519_mul(&t, &u, &v); /* 37 | 2^31 - 1 */ + SQRN(&u, &t, 31); /* 68 | 2^62 - 2^31 */ + f25519_mul(&t, &u, &t); /* 69 | 2^62 - 1 */ + SQRN(&u, &t, 62); /* 131 | 2^124 - 2^62 */ + f25519_mul(&t, &u, &t); /* 132 | 2^124 - 1 */ + SQRN(&u, &t, 124); /* 256 | 2^248 - 2^124 */ + f25519_mul(&t, &u, &t); /* 257 | 2^248 - 1 */ + f25519_sqr(&u, &t); /* 258 | 2^249 - 2 */ + f25519_mul(&t, &u, &v); /* 259 | 2^249 - 1 */ + SQRN(&t, &t, 3); /* 262 | 2^252 - 8 */ + f25519_sqr(&u, &t); /* 263 | 2^253 - 16 */ + f25519_mul(&t, &u, &t); /* 264 | 3*2^252 - 24 */ + f25519_mul(&t, &t, &t15); /* 265 | 3*2^252 - 9 */ + f25519_mul(&w, &t, &v); /* 266 | 3*2^252 - 8 */ + + /* Awesome. Now let me explain. Let v be a square in GF(p), and let w = + * v^(3*2^252 - 8). In particular, let's consider + * + * v^2 w^4 = v^2 v^{3*2^254 - 32} = (v^{2^254 - 10})^3 + * + * But 2^254 - 10 = ((2^255 - 19) - 1)/2 = (p - 1)/2. Since v is a square, + * it has order dividing (p - 1)/2, and therefore v^2 w^4 = 1 and + * + * w^4 = 1/v^2 + * + * That in turn implies that w^2 = ±1/v. Now, recall that v = x y, and let + * w' = w x. Then w'^2 = ±x^2/v = ±x/y. If y w'^2 = x then we set + * z = w', since we have z^2 = x/y; otherwise let z = i w', where i^2 = -1, + * so z^2 = -w^2 = x/y, and we're done. + * + * The easiest way to compare is to encode. This isn't as wasteful as it + * sounds: the hard part is normalizing the representations, which we have + * to do anyway. + */ + f25519_mul(&w, &w, x); + f25519_sqr(&t, &w); + f25519_mul(&t, &t, y); + f25519_neg(&u, &t); + f25519_store(xb, x); + f25519_store(b0, &t); + f25519_store(b1, &u); + f25519_mul(&u, &w, SQRTM1); + + m = -ct_memeq(b0, xb, 32); + rc = PICK2(0, rc, m); + f25519_pick2(z, &w, &u, m); + m = -ct_memeq(b1, xb, 32); + rc = PICK2(0, rc, m); + + /* And we're done. */ + return (rc); +} + /*----- Test rig ----------------------------------------------------------*/ #ifdef TEST_RIG +#include #include +#include #include static void fixdstr(dstr *d) @@ -956,7 +1219,7 @@ static void dump_f25519_ref(dstr *d, FILE *fp) } static int eq(const f25519 *x, dstr *d) - { octet b[32]; f25519_store(b, x); return (memcmp(b, d->buf, 32) == 0); } + { octet b[32]; f25519_store(b, x); return (MEMCMP(b, ==, d->buf, 32)); } static const test_type type_f25519 = { cvt_f25519, dump_f25519 }, @@ -982,6 +1245,7 @@ static const test_type return (ok); \ } +TEST_UNOP(neg) TEST_UNOP(sqr) TEST_UNOP(inv) @@ -1031,6 +1295,79 @@ static int vrf_mulc(dstr dv[]) return (ok); } +static int vrf_condneg(dstr dv[]) +{ + f25519 *x = (f25519 *)dv[0].buf; + uint32 m = *(uint32 *)dv[1].buf; + f25519 z; + int ok = 1; + + f25519_condneg(&z, x, m); + if (!eq(&z, &dv[2])) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m); + fdump(stderr, "calc z", z.P); + f25519_load(&z, (const octet *)dv[1].buf); + fdump(stderr, "want z", z.P); + } + + return (ok); +} + +static int vrf_pick2(dstr dv[]) +{ + f25519 *x = (f25519 *)dv[0].buf, *y = (f25519 *)dv[1].buf; + uint32 m = *(uint32 *)dv[2].buf; + f25519 z; + int ok = 1; + + f25519_pick2(&z, x, y, m); + if (!eq(&z, &dv[3])) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fdump(stderr, "y", y->P); + fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m); + fdump(stderr, "calc z", z.P); + f25519_load(&z, (const octet *)dv[3].buf); + fdump(stderr, "want z", z.P); + } + + return (ok); +} + +static int vrf_pickn(dstr dv[]) +{ + dstr d = DSTR_INIT; + f25519 v[32], z; + size_t i = *(uint32 *)dv[1].buf, j, n; + const char *p; + char *q; + int ok = 1; + + for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++) + { cvt_f25519(p, &d); v[n] = *(f25519 *)d.buf; } + + f25519_pickn(&z, v, n, i); + if (!eq(&z, &dv[2])) { + ok = 0; + fprintf(stderr, "failed!\n"); + for (j = 0; j < n; j++) { + fprintf(stderr, "v[%2u]", (unsigned)j); + fdump(stderr, "", v[j].P); + } + fprintf(stderr, "i = %u\n", (unsigned)i); + fdump(stderr, "calc z", z.P); + f25519_load(&z, (const octet *)dv[2].buf); + fdump(stderr, "want z", z.P); + } + + dstr_destroy(&d); + return (ok); +} + static int vrf_condswap(dstr dv[]) { f25519 *x = (f25519 *)dv[0].buf, *y = (f25519 *)dv[1].buf; @@ -1056,6 +1393,35 @@ static int vrf_condswap(dstr dv[]) return (ok); } +static int vrf_quosqrt(dstr dv[]) +{ + f25519 *x = (f25519 *)dv[0].buf, *y = (f25519 *)dv[1].buf; + f25519 z, zz; + int rc; + int ok = 1; + + if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); } + rc = f25519_quosqrt(&z, x, y); + if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fdump(stderr, "y", y->P); + if (rc) fprintf(stderr, "calc: FAIL\n"); + else fdump(stderr, "calc", z.P); + if (!dv[2].len) + fprintf(stderr, "exp: FAIL\n"); + else { + f25519_load(&zz, (const octet *)dv[2].buf); + fdump(stderr, "z", zz.P); + f25519_load(&zz, (const octet *)dv[3].buf); + fdump(stderr, "z'", zz.P); + } + } + + return (ok); +} + static int vrf_sub_mulc_add_sub_mul(dstr dv[]) { f25519 *u = (f25519 *)dv[0].buf, *v = (f25519 *)dv[1].buf, @@ -1093,13 +1459,22 @@ static int vrf_sub_mulc_add_sub_mul(dstr dv[]) static test_chunk tests[] = { { "add", vrf_add, { &type_f25519, &type_f25519, &type_f25519_ref } }, { "sub", vrf_sub, { &type_f25519, &type_f25519, &type_f25519_ref } }, + { "neg", vrf_neg, { &type_f25519, &type_f25519_ref } }, + { "condneg", vrf_condneg, + { &type_f25519, &type_uint32, &type_f25519_ref } }, { "mul", vrf_mul, { &type_f25519, &type_f25519, &type_f25519_ref } }, { "mulconst", vrf_mulc, { &type_f25519, &type_long, &type_f25519_ref } }, + { "pick2", vrf_pick2, + { &type_f25519, &type_f25519, &type_uint32, &type_f25519_ref } }, + { "pickn", vrf_pickn, + { &type_string, &type_uint32, &type_f25519_ref } }, { "condswap", vrf_condswap, { &type_f25519, &type_f25519, &type_uint32, &type_f25519_ref, &type_f25519_ref } }, { "sqr", vrf_sqr, { &type_f25519, &type_f25519_ref } }, { "inv", vrf_inv, { &type_f25519, &type_f25519_ref } }, + { "quosqrt", vrf_quosqrt, + { &type_f25519, &type_f25519, &type_hex, &type_hex } }, { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul, { &type_f25519, &type_f25519, &type_long, &type_f25519, &type_f25519, &type_f25519, &type_f25519_ref } },