X-Git-Url: https://git.distorted.org.uk/~mdw/secnet/blobdiff_plain/1047c205103e6da9fc6a317f41583147dbc11aa3..a1a6042e24c9873aa6abf668bcb68d39d0eb4190:/scaf.c diff --git a/scaf.c b/scaf.c new file mode 100644 index 0000000..96c3744 --- /dev/null +++ b/scaf.c @@ -0,0 +1,341 @@ +/* -*-c-*- + * + * Simple scalar fields + * + * (c) 2017 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of secnet. + * See README for full list of copyright holders. + * + * secnet is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version d of the License, or + * (at your option) any later version. + * + * secnet is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * version 3 along with secnet; if not, see + * https://www.gnu.org/licenses/gpl.html. + * + * This file was originally part of Catacomb, but has been automatically + * modified for incorporation into secnet: see `import-catacomb-crypto' + * for details. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include + +#include "scaf.h" + +/*----- Debugging utilties ------------------------------------------------*/ + +#ifdef SCAF_DEBUG + +#include + +#include "mp.h" +#include "mpint.h" +#include "mptext.h" + +static void scaf_dump(const char *what, const scaf_piece *x, + size_t npiece, size_t piecewd) +{ + mp *y = MP_ZERO, *t = MP_NEW; + size_t i; + unsigned o = 0; + + for (i = 0; i < npiece; i++) { + t = mp_fromuint64(t, x[i]); + t = mp_lsl(t, t, o); + y = mp_add(y, y, t); + o += piecewd; + } + printf(";; %s", what); MP_PRINT("", y); putchar('\n'); + mp_drop(y); mp_drop(t); +} + +static void scaf_dumpdbl(const char *what, const scaf_dblpiece *x, + size_t npiece, size_t piecewd) +{ + mp *y = MP_ZERO, *t = MP_NEW; + size_t i; + unsigned o = 0; + + for (i = 0; i < npiece; i++) { + t = mp_fromuint64(t, x[i]); + t = mp_lsl(t, t, o); + y = mp_add(y, y, t); + o += piecewd; + } + printf(";; %s", what); MP_PRINT("", y); putchar('\n'); + mp_drop(y); mp_drop(t); +} + +#endif + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @scaf_load@ --- * + * + * Arguments: @scaf_piece *z@ = where to write the result + * @const octet *b@ = source buffer to read + * @size_t sz@ = size of the source buffer + * @size_t npiece@ = number of pieces to read + * @unsigned piecewd@ = nominal width of pieces in bits + * + * Returns: --- + * + * Use: Loads a little-endian encoded scalar into a vector @z@ of + * single-precision pieces. + */ + +void scaf_load(scaf_piece *z, const octet *b, size_t sz, + size_t npiece, unsigned piecewd) +{ + uint32 a, m = ((scaf_piece)1 << piecewd) - 1; + unsigned i, j, n; + + for (i = j = n = 0, a = 0; i < sz; i++) { + a |= b[i] << n; n += 8; + if (n >= piecewd) { + z[j++] = a&m; a >>= piecewd; n -= piecewd; + if (j >= npiece) return; + } + } + z[j++] = a; + while (j < npiece) z[j++] = 0; +} + +/* --- @scaf_loaddbl@ --- * + * + * Arguments: @scaf_dblpiece *z@ = where to write the result + * @const octet *b@ = source buffer to read + * @size_t sz@ = size of the source buffer + * @size_t npiece@ = number of pieces to read + * @unsigned piecewd@ = nominal width of pieces in bits + * + * Returns: --- + * + * Use: Loads a little-endian encoded scalar into a vector @z@ of + * double-precision pieces. + */ + +void scaf_loaddbl(scaf_dblpiece *z, const octet *b, size_t sz, + size_t npiece, unsigned piecewd) +{ + uint32 a, m = ((scaf_piece)1 << piecewd) - 1; + unsigned i, j, n; + + for (i = j = n = 0, a = 0; i < sz; i++) { + a |= b[i] << n; n += 8; + if (n >= piecewd) { + z[j++] = a&m; a >>= piecewd; n -= piecewd; + if (j >= npiece) return; + } + } + z[j++] = a; + while (j < npiece) z[j++] = 0; +} + +/* --- @scaf_store@ --- * + * + * Arguments: @octet *b@ = buffer to fill in + * @size_t sz@ = size of the buffer + * @const scaf_piece *x@ = scalar to store + * @size_t npiece@ = number of pieces in @x@ + * @unsigned piecewd@ = nominal width of pieces in bits + * + * Returns: --- + * + * Use: Stores a scalar in a vector of single-precison pieces as a + * little-endian vector of bytes. + */ + +void scaf_store(octet *b, size_t sz, const scaf_piece *x, + size_t npiece, unsigned piecewd) +{ + uint32 a; + unsigned i, j, n; + + for (i = j = n = 0, a = 0; i < npiece; i++) { + a |= x[i] << n; n += piecewd; + while (n >= 8) { + b[j++] = a&0xffu; a >>= 8; n -= 8; + if (j >= sz) return; + } + } + b[j++] = a; + memset(b + j, 0, sz - j); +} + +/* --- @scaf_mul@ --- * + * + * Arguments: @scaf_dblpiece *z@ = where to put the answer + * @const scaf_piece *x, *y@ = the operands + * @size_t npiece@ = the length of the operands + * + * Returns: --- + * + * Use: Multiply two scalars. The destination must have space for + * @2*npiece@ pieces (though the last one will always be zero). + * The result is not reduced. + */ + +void scaf_mul(scaf_dblpiece *z, const scaf_piece *x, const scaf_piece *y, + size_t npiece) +{ + unsigned i, j; + + for (i = 0; i < 2*npiece; i++) z[i] = 0; + + for (i = 0; i < npiece; i++) + for (j = 0; j < npiece; j++) + z[i + j] += (scaf_dblpiece)x[i]*y[j]; +} + +/* --- @scaf_reduce@ --- * + * + * Arguments: @scaf_piece *z@ = where to write the result + * @const scaf_dblpiece *x@ = the operand to reduce + * @const scaf_piece *l@ = the modulus, in internal format + * @const scaf_piece *mu@ = scaled approximation to @1/l@ + * @size_t npiece@ = number of pieces in @l@ + * @unsigned piecewd@ = nominal width of a piece in bits + * @scaf_piece *scratch@ = @3*npiece@ scratch pieces + * + * Returns: --- + * + * Use: Reduce @x@ (a vector of @2*npiece@ double-precision pieces) + * modulo @l@ (a vector of @npiece@ single-precision pieces), + * writing the result to @z@. + * + * Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%. The + * operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in + * @npiece + 1@ pieces. Furthermore, we must have + * %$3 l < B^n$%. (Fiddle with %$w$% if necessary.) + */ + +void scaf_reduce(scaf_piece *z, const scaf_dblpiece *x, + const scaf_piece *l, const scaf_piece *mu, + size_t npiece, unsigned piecewd, scaf_piece *scratch) +{ + unsigned i, j; + scaf_piece *t = scratch, *q = scratch + 2*npiece; + scaf_piece u, m = ((scaf_piece)1 << piecewd) - 1; + scaf_dblpiece c; + + /* This here is the hard part. + * + * Let w = PIECEWD, let n = NPIECE, and let B = 2^w. We must have + * B^(n-1) <= l < B^n. + * + * The argument MU contains pieces of the quantity µ = floor(B^2n/l), which + * is a scaled approximation to 1/l. We'll calculate + * + * q = floor(µ floor(x/B^(n-1))/B^(n+1)) + * + * which is an underestimate of x/l. + * + * With a bit more precision: by definition, u - 1 < floor(u) <= u. Hence, + * + * B^2n/l - 1 < µ <= B^2/l + * + * and + * + * x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1) + * + * Multiplying these together, and dividing through by B^(n+1), gives + * + * floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <= + * q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l) + * + * Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and + * B^(n-1)/l are each less than 1; hence + * + * floor(x/l) - 2 <= q <= floor(x/l) <= x/l + * + * Now we set r = x - q l. Certainly, r == x (mod l); and + * + * 0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n + */ + + /* Before we start on the fancy stuff, we need to resolve the pending + * carries in x. We'll be doing the floor division by just ignoring some + * of the pieces, and it would be bad if we missed some significant bits. + * Of course, this means that we don't actually have to store the low + * NPIECE - 1 pieces of the result. + */ + for (i = 0, c = 0; i < 2*npiece; i++) + { c += x[i]; t[i] = c&m; c >>= piecewd; } + + /* Now we calculate q. If we calculate this in product-scanning order, we + * can avoid having to store the low NPIECE + 1 pieces of the product as + * long as we keep track of the carry out properly. Conveniently, NMU = + * NPIECE + 1, which keeps the loop bounds easy in the first pass. + * + * Furthermore, because we know that r fits in NPIECE pieces, we only need + * the low NPIECE pieces of q. + */ + for (i = 0, c = 0; i < npiece + 1; i++) { + for (j = 0; j <= i; j++) + c += (scaf_dblpiece)t[j + npiece - 1]*mu[i - j]; + c >>= piecewd; + } + for (i = 0; i < npiece; i++) { + for (j = i + 1; j < npiece + 1; j++) + c += (scaf_dblpiece)t[j + npiece - 1]*mu[npiece + 1 + i - j]; + q[i] = c&m; c >>= piecewd; + } + + /* Next, we calculate r - q l in z. Product-scanning seems to be working + * out for us, and this time it will save us needing a large temporary + * space for the product q l as we go. On the downside, we have to track + * the carries from the multiplication and subtraction separately. + * + * Notice that the result r is at most NPIECE pieces long, so we can stop + * once we have that many. + */ + u = 1; c = 0; + for (i = 0; i < npiece; i++) { + for (j = 0; j <= i; j++) c += (scaf_dblpiece)q[j]*l[i - j]; + u += t[i] + ((scaf_piece)(c&m) ^ m); + z[i] = u&m; u >>= piecewd; c >>= piecewd; + } + + /* Finally, two passes of conditional subtraction. Calculate t = z - l; if + * there's no borrow out the top, then update z = t; otherwise leave t + * alone. + */ + for (i = 0; i < 2; i++) { + for (j = 0, u = 1; j < npiece; j++) { + u += z[j] + (l[j] ^ m); + t[j] = u&m; u >>= piecewd; + } + for (j = 0, u = -u; j < npiece; j++) z[j] = (t[j]&u) | (z[j]&~u); + } +} + +/*----- That's all, folks -------------------------------------------------*/