X-Git-Url: https://git.distorted.org.uk/~mdw/secnet/blobdiff_plain/137d3b36ce3d121855364d3a28383ab86fbb9d7b..ae8cd973b817f81c075ab20e84d3239125146f24:/fgoldi.c diff --git a/fgoldi.c b/fgoldi.c deleted file mode 100644 index 6a8d35b..0000000 --- a/fgoldi.c +++ /dev/null @@ -1,606 +0,0 @@ -/* - * fgoldi.c: arithmetic modulo 2^448 - 2^224 - 1 - */ -/* - * This file is Free Software. It has been modified to as part of its - * incorporation into secnet. - * - * Copyright 2017 Mark Wooding - * - * You may redistribute this file and/or modify it under the terms of - * the permissive licence shown below. - * - * You may redistribute secnet as a whole and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3, or (at your option) any - * later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see - * https://www.gnu.org/licenses/gpl.html. - */ -/* - * Imported from Catacomb, and modified for Secnet (2017-04-30): - * - * * Use `fake-mLib-bits.h' in place of the real . - * - * * Remove the 16/32-bit implementation, since C99 always has 64-bit - * arithmetic. - * - * * Remove the test rig code: a replacement is in a separate source file. - * - * The file's original comment headers are preserved below. - */ -/* -*-c-*- - * - * Arithmetic in the Goldilocks field GF(2^448 - 2^224 - 1) - * - * (c) 2017 Straylight/Edgeware - */ - -/*----- Licensing notice --------------------------------------------------* - * - * This file is part of Catacomb. - * - * Catacomb is free software; you can redistribute it and/or modify - * it under the terms of the GNU Library General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * Catacomb is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public - * License along with Catacomb; if not, write to the Free - * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - * MA 02111-1307, USA. - */ - -/*----- Header files ------------------------------------------------------*/ - -#include "fgoldi.h" - -/*----- Basic setup -------------------------------------------------------* - * - * Let φ = 2^224; then p = φ^2 - φ - 1, and, in GF(p), we have φ^2 = φ + 1 - * (hence the name). - */ - -/* We represent an element of GF(p) as 16 28-bit signed integer pieces x_i: - * x = SUM_{0<=i<16} x_i 2^(28i). - */ - -typedef int32 piece; typedef int64 dblpiece; -typedef uint32 upiece; typedef uint64 udblpiece; -#define PIECEWD(i) 28 -#define NPIECE 16 -#define P p28 - -#define B28 0x10000000u -#define B27 0x08000000u -#define M28 0x0fffffffu -#define M27 0x07ffffffu -#define M32 0xffffffffu - -/*----- Debugging machinery -----------------------------------------------*/ - -#if defined(FGOLDI_DEBUG) - -#include - -#include "mp.h" -#include "mptext.h" - -static mp *get_pgoldi(void) -{ - mp *p = MP_NEW, *t = MP_NEW; - - p = mp_setbit(p, MP_ZERO, 448); - t = mp_setbit(t, MP_ZERO, 224); - p = mp_sub(p, p, t); - p = mp_sub(p, p, MP_ONE); - mp_drop(t); - return (p); -} - -DEF_FDUMP(fdump, piece, PIECEWD, NPIECE, 56, get_pgoldi()) - -#endif - -/*----- Loading and storing -----------------------------------------------*/ - -/* --- @fgoldi_load@ --- * - * - * Arguments: @fgoldi *z@ = where to store the result - * @const octet xv[56]@ = source to read - * - * Returns: --- - * - * Use: Reads an element of %$\gf{2^{448} - 2^{224} - 19}$% in - * external representation from @xv@ and stores it in @z@. - * - * External representation is little-endian base-256. Some - * elements have multiple encodings, which are not produced by - * correct software; use of noncanonical encodings is not an - * error, and toleration of them is considered a performance - * feature. - */ - -void fgoldi_load(fgoldi *z, const octet xv[56]) -{ - unsigned i; - uint32 xw[14]; - piece b, c; - - /* First, read the input value as words. */ - for (i = 0; i < 14; i++) xw[i] = LOAD32_L(xv + 4*i); - - /* Extract unsigned 28-bit pieces from the words. */ - z->P[ 0] = (xw[ 0] >> 0)&M28; - z->P[ 7] = (xw[ 6] >> 4)&M28; - z->P[ 8] = (xw[ 7] >> 0)&M28; - z->P[15] = (xw[13] >> 4)&M28; - for (i = 1; i < 7; i++) { - z->P[i + 0] = ((xw[i + 0] << (4*i)) | (xw[i - 1] >> (32 - 4*i)))&M28; - z->P[i + 8] = ((xw[i + 7] << (4*i)) | (xw[i + 6] >> (32 - 4*i)))&M28; - } - - /* Convert the nonnegative pieces into a balanced signed representation, so - * each piece ends up in the interval |z_i| <= 2^27. For each piece, if - * its top bit is set, lend a bit leftwards; in the case of z_15, reduce - * this bit by adding it onto z_0 and z_8, since this is the φ^2 bit, and - * φ^2 = φ + 1. We delay this carry until after all of the pieces have - * been balanced. If we don't do this, then we have to do a more expensive - * test for nonzeroness to decide whether to lend a bit leftwards rather - * than just testing a single bit. - * - * Note that we don't try for a canonical representation here: both upper - * and lower bounds are achievable. - */ - b = z->P[15]&B27; z->P[15] -= b << 1; c = b >> 27; - for (i = NPIECE - 1; i--; ) - { b = z->P[i]&B27; z->P[i] -= b << 1; z->P[i + 1] += b >> 27; } - z->P[0] += c; z->P[8] += c; -} - -/* --- @fgoldi_store@ --- * - * - * Arguments: @octet zv[56]@ = where to write the result - * @const fgoldi *x@ = the field element to write - * - * Returns: --- - * - * Use: Stores a field element in the given octet vector in external - * representation. A canonical encoding is always stored. - */ - -void fgoldi_store(octet zv[56], const fgoldi *x) -{ - piece y[NPIECE], yy[NPIECE], c, d; - uint32 u, v; - mask32 m; - unsigned i; - - for (i = 0; i < NPIECE; i++) y[i] = x->P[i]; - - /* First, propagate the carries. By the end of this, we'll have all of the - * the pieces canonically sized and positive, and maybe there'll be - * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining - * value will be in the half-open interval [0, φ^2). The whole represented - * value is then y + φ^2 c. - * - * Assume that we start out with |y_i| <= 2^30. We start off by cutting - * off and reducing the carry c_15 from the topmost piece, y_15. This - * leaves 0 <= y_15 < 2^28; and we'll have |c_15| <= 4. We'll add this - * onto y_0 and y_8, and propagate the carries. It's very clear that we'll - * end up with |y + (φ + 1) c_15 - φ^2/2| << φ^2. - * - * Here, the y_i are signed, so we must be cautious about bithacking them. - */ - c = ASR(piece, y[15], 28); y[15] = (upiece)y[15]&M28; y[8] += c; - for (i = 0; i < NPIECE; i++) - { y[i] += c; c = ASR(piece, y[i], 28); y[i] = (upiece)y[i]&M28; } - - /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and - * y >= p, then we should subtract p from the whole value; if c = -1 then - * we should add p; and otherwise we should do nothing. - * - * But conditional behaviour is bad, m'kay. So here's what we do instead. - * - * The first job is to sort out what we wanted to do. If c = -1 then we - * want to (a) invert the constant addend and (b) feed in a carry-in; - * otherwise, we don't. - */ - m = SIGN(c)&M28; - d = m&1; - - /* Now do the addition/subtraction. Remember that all of the y_i are - * nonnegative, so shifting and masking are safe and easy. - */ - d += y[0] + (1 ^ m); yy[0] = d&M28; d >>= 28; - for (i = 1; i < 8; i++) - { d += y[i] + m; yy[i] = d&M28; d >>= 28; } - d += y[8] + (1 ^ m); yy[8] = d&M28; d >>= 28; - for (i = 9; i < 16; i++) - { d += y[i] + m; yy[i] = d&M28; d >>= 28; } - - /* The final carry-out is in d; since we only did addition, and the y_i are - * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y, - * if (a) c /= 0 (in which case we know that the old value was - * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that - * the subtraction didn't cause a borrow, so we must be in the case where - * p <= y < φ^2. - */ - m = NONZEROP(c) | ~NONZEROP(d - 1); - for (i = 0; i < NPIECE; i++) y[i] = (yy[i]&m) | (y[i]&~m); - - /* Extract 32-bit words from the value. */ - for (i = 0; i < 7; i++) { - u = ((y[i + 0] >> (4*i)) | ((uint32)y[i + 1] << (28 - 4*i)))&M32; - v = ((y[i + 8] >> (4*i)) | ((uint32)y[i + 9] << (28 - 4*i)))&M32; - STORE32_L(zv + 4*i, u); - STORE32_L(zv + 4*i + 28, v); - } -} - -/* --- @fgoldi_set@ --- * - * - * Arguments: @fgoldi *z@ = where to write the result - * @int a@ = a small-ish constant - * - * Returns: --- - * - * Use: Sets @z@ to equal @a@. - */ - -void fgoldi_set(fgoldi *x, int a) -{ - unsigned i; - - x->P[0] = a; - for (i = 1; i < NPIECE; i++) x->P[i] = 0; -} - -/*----- Basic arithmetic --------------------------------------------------*/ - -/* --- @fgoldi_add@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) - * @const fgoldi *x, *y@ = two operands - * - * Returns: --- - * - * Use: Set @z@ to the sum %$x + y$%. - */ - -void fgoldi_add(fgoldi *z, const fgoldi *x, const fgoldi *y) -{ - unsigned i; - for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] + y->P[i]; -} - -/* --- @fgoldi_sub@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) - * @const fgoldi *x, *y@ = two operands - * - * Returns: --- - * - * Use: Set @z@ to the difference %$x - y$%. - */ - -void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y) -{ - unsigned i; - for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i]; -} - -/*----- Constant-time utilities -------------------------------------------*/ - -/* --- @fgoldi_condswap@ --- * - * - * Arguments: @fgoldi *x, *y@ = two operands - * @uint32 m@ = a mask - * - * Returns: --- - * - * Use: If @m@ is zero, do nothing; if @m@ is all-bits-set, then - * exchange @x@ and @y@. If @m@ has some other value, then - * scramble @x@ and @y@ in an unhelpful way. - */ - -void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m) -{ - unsigned i; - mask32 mm = FIX_MASK32(m); - - for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm); -} - -/*----- Multiplication ----------------------------------------------------*/ - -/* Let B = 2^63 - 1 be the largest value such that +B and -B can be - * represented in a double-precision piece. On entry, it must be the case - * that |X_i| <= M <= B - 2^27 for some M. If this is the case, then, on - * exit, we will have |Z_i| <= 2^27 + M/2^27. - */ -#define CARRY_REDUCE(z, x) do { \ - dblpiece _t[NPIECE], _c; \ - unsigned _i; \ - \ - /* Bias the input pieces. This keeps the carries and so on centred \ - * around zero rather than biased positive. \ - */ \ - for (_i = 0; _i < NPIECE; _i++) _t[_i] = (x)[_i] + B27; \ - \ - /* Calculate the reduced pieces. Careful with the bithacking. */ \ - _c = ASR(dblpiece, _t[15], 28); \ - (z)[0] = (dblpiece)((udblpiece)_t[0]&M28) - B27 + _c; \ - for (_i = 1; _i < NPIECE; _i++) { \ - (z)[_i] = (dblpiece)((udblpiece)_t[_i]&M28) - B27 + \ - ASR(dblpiece, _t[_i - 1], 28); \ - } \ - (z)[8] += _c; \ -} while (0) - -/* --- @fgoldi_mulconst@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@) - * @const fgoldi *x@ = an operand - * @long a@ = a small-ish constant; %$|a| < 2^{20}$%. - * - * Returns: --- - * - * Use: Set @z@ to the product %$a x$%. - */ - -void fgoldi_mulconst(fgoldi *z, const fgoldi *x, long a) -{ - unsigned i; - dblpiece zz[NPIECE], aa = a; - - for (i = 0; i < NPIECE; i++) zz[i] = aa*x->P[i]; - CARRY_REDUCE(z->P, zz); -} - -/* --- @fgoldi_mul@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) - * @const fgoldi *x, *y@ = two operands - * - * Returns: --- - * - * Use: Set @z@ to the product %$x y$%. - */ - -void fgoldi_mul(fgoldi *z, const fgoldi *x, const fgoldi *y) -{ - dblpiece zz[NPIECE], u[NPIECE]; - piece ab[NPIECE/2], cd[NPIECE/2]; - const piece - *a = x->P + NPIECE/2, *b = x->P, - *c = y->P + NPIECE/2, *d = y->P; - unsigned i, j; - -# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j]) - - /* Behold the magic. - * - * Write x = a φ + b, and y = c φ + d. Then x y = a c φ^2 + - * (a d + b c) φ + b d. Karatsuba and Ofman observed that a d + b c = - * (a + b) (c + d) - a c - b d, saving a multiplication, and Hamburg chose - * the prime p so that φ^2 = φ + 1. So - * - * x y = ((a + b) (c + d) - b d) φ + a c + b d - */ - - for (i = 0; i < NPIECE; i++) zz[i] = 0; - - /* Our first job will be to calculate (1 - φ) b d, and write the result - * into z. As we do this, an interesting thing will happen. Write - * b d = u φ + v; then (1 - φ) b d = u φ + v - u φ^2 - v φ = (1 - φ) v - u. - * So, what we do is to write the product end-swapped and negated, and then - * we'll subtract the (negated, remember) high half from the low half. - */ - for (i = 0; i < NPIECE/2; i++) { - for (j = 0; j < NPIECE/2 - i; j++) - zz[i + j + NPIECE/2] -= M(b,i, d,j); - for (; j < NPIECE/2; j++) - zz[i + j - NPIECE/2] -= M(b,i, d,j); - } - for (i = 0; i < NPIECE/2; i++) - zz[i] -= zz[i + NPIECE/2]; - - /* Next, we add on a c. There are no surprises here. */ - for (i = 0; i < NPIECE/2; i++) - for (j = 0; j < NPIECE/2; j++) - zz[i + j] += M(a,i, c,j); - - /* Now, calculate a + b and c + d. */ - for (i = 0; i < NPIECE/2; i++) - { ab[i] = a[i] + b[i]; cd[i] = c[i] + d[i]; } - - /* Finally (for the multiplication) we must add on (a + b) (c + d) φ. - * Write (a + b) (c + d) as u φ + v; then we actually want u φ^2 + v φ = - * v φ + (1 + φ) u. We'll store u in a temporary place and add it on - * twice. - */ - for (i = 0; i < NPIECE; i++) u[i] = 0; - for (i = 0; i < NPIECE/2; i++) { - for (j = 0; j < NPIECE/2 - i; j++) - zz[i + j + NPIECE/2] += M(ab,i, cd,j); - for (; j < NPIECE/2; j++) - u[i + j - NPIECE/2] += M(ab,i, cd,j); - } - for (i = 0; i < NPIECE/2; i++) - { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; } - -#undef M - - /* That wraps it up for the multiplication. Let's figure out some bounds. - * Fortunately, Karatsuba is a polynomial identity, so all of the pieces - * end up the way they'd be if we'd done the thing the easy way, which - * simplifies the analysis. Suppose we started with |x_i|, |y_i| <= 9/5 - * 2^28. The overheads in the result are given by the coefficients of - * - * ((u^16 - 1)/(u - 1))^2 mod u^16 - u^8 - 1 - * - * the greatest of which is 38. So |z_i| <= 38*81/25*2^56 < 2^63. - * - * Anyway, a round of `CARRY_REDUCE' will leave us with |z_i| < 2^27 + - * 2^36; and a second round will leave us with |z_i| < 2^27 + 512. - */ - for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz); - for (i = 0; i < NPIECE; i++) z->P[i] = zz[i]; -} - -/* --- @fgoldi_sqr@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) - * @const fgoldi *x@ = an operand - * - * Returns: --- - * - * Use: Set @z@ to the square %$x^2$%. - */ - -void fgoldi_sqr(fgoldi *z, const fgoldi *x) -{ - dblpiece zz[NPIECE], u[NPIECE]; - piece ab[NPIECE]; - const piece *a = x->P + NPIECE/2, *b = x->P; - unsigned i, j; - -# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j]) - - /* The magic is basically the same as `fgoldi_mul' above. We write - * x = a φ + b and use Karatsuba and the special prime shape. This time, - * we have - * - * x^2 = ((a + b)^2 - b^2) φ + a^2 + b^2 - */ - - for (i = 0; i < NPIECE; i++) zz[i] = 0; - - /* Our first job will be to calculate (1 - φ) b^2, and write the result - * into z. Again, this interacts pleasantly with the prime shape. - */ - for (i = 0; i < NPIECE/4; i++) { - zz[2*i + NPIECE/2] -= M(b,i, b,i); - for (j = i + 1; j < NPIECE/2 - i; j++) - zz[i + j + NPIECE/2] -= 2*M(b,i, b,j); - for (; j < NPIECE/2; j++) - zz[i + j - NPIECE/2] -= 2*M(b,i, b,j); - } - for (; i < NPIECE/2; i++) { - zz[2*i - NPIECE/2] -= M(b,i, b,i); - for (j = i + 1; j < NPIECE/2; j++) - zz[i + j - NPIECE/2] -= 2*M(b,i, b,j); - } - for (i = 0; i < NPIECE/2; i++) - zz[i] -= zz[i + NPIECE/2]; - - /* Next, we add on a^2. There are no surprises here. */ - for (i = 0; i < NPIECE/2; i++) { - zz[2*i] += M(a,i, a,i); - for (j = i + 1; j < NPIECE/2; j++) - zz[i + j] += 2*M(a,i, a,j); - } - - /* Now, calculate a + b. */ - for (i = 0; i < NPIECE/2; i++) - ab[i] = a[i] + b[i]; - - /* Finally (for the multiplication) we must add on (a + b)^2 φ. - * Write (a + b)^2 as u φ + v; then we actually want (u + v) φ + u. We'll - * store u in a temporary place and add it on twice. - */ - for (i = 0; i < NPIECE; i++) u[i] = 0; - for (i = 0; i < NPIECE/4; i++) { - zz[2*i + NPIECE/2] += M(ab,i, ab,i); - for (j = i + 1; j < NPIECE/2 - i; j++) - zz[i + j + NPIECE/2] += 2*M(ab,i, ab,j); - for (; j < NPIECE/2; j++) - u[i + j - NPIECE/2] += 2*M(ab,i, ab,j); - } - for (; i < NPIECE/2; i++) { - u[2*i - NPIECE/2] += M(ab,i, ab,i); - for (j = i + 1; j < NPIECE/2; j++) - u[i + j - NPIECE/2] += 2*M(ab,i, ab,j); - } - for (i = 0; i < NPIECE/2; i++) - { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; } - -#undef M - - /* Finally, carrying. */ - for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz); - for (i = 0; i < NPIECE; i++) z->P[i] = zz[i]; - -} - -/*----- More advanced operations ------------------------------------------*/ - -/* --- @fgoldi_inv@ --- * - * - * Arguments: @fgoldi *z@ = where to put the result (may alias @x@) - * @const fgoldi *x@ = an operand - * - * Returns: --- - * - * Use: Stores in @z@ the multiplicative inverse %$x^{-1}$%. If - * %$x = 0$% then @z@ is set to zero. This is considered a - * feature. - */ - -void fgoldi_inv(fgoldi *z, const fgoldi *x) -{ - fgoldi t, u; - unsigned i; - -#define SQRN(z, x, n) do { \ - fgoldi_sqr((z), (x)); \ - for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \ -} while (0) - - /* Calculate x^-1 = x^(p - 2) = x^(2^448 - 2^224 - 3), which also handles - * x = 0 as intended. The addition chain is home-made. - */ /* step | value */ - fgoldi_sqr(&u, x); /* 1 | 2 */ - fgoldi_mul(&t, &u, x); /* 2 | 3 */ - SQRN(&u, &t, 2); /* 4 | 12 */ - fgoldi_mul(&t, &u, &t); /* 5 | 15 */ - SQRN(&u, &t, 4); /* 9 | 240 */ - fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */ - SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */ - fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */ - SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */ - fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */ - SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */ - fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */ - fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */ - fgoldi_mul(&t, &t, x); /* 43 | 2^37 - 1 */ - SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */ - fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */ - SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */ - fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */ - SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */ - fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */ - fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */ - fgoldi_mul(&u, &u, x); /* 233 | 2^223 - 1 */ - SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */ - fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */ - SQRN(&t, &t, 2); /* 459 | 2^448 - 2^224 - 4 */ - fgoldi_mul(z, &t, x); /* 460 | 2^448 - 2^224 - 3 */ - -#undef SQRN -} - -/*----- That's all, folks -------------------------------------------------*/