X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/343509982ee8c88ddafd0129b4dcf97e3c7a672d..ceb3f0c0a3b7bb3fa3250d31b04c382894095e52:/gfreduce.c diff --git a/gfreduce.c b/gfreduce.c new file mode 100644 index 0000000..3969f11 --- /dev/null +++ b/gfreduce.c @@ -0,0 +1,652 @@ +/* -*-c-*- + * + * $Id: gfreduce.c,v 1.1.2.1 2004/03/21 22:39:46 mdw Exp $ + * + * Efficient reduction modulo sparse binary polynomials + * + * (c) 2004 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Revision history --------------------------------------------------* + * + * $Log: gfreduce.c,v $ + * Revision 1.1.2.1 2004/03/21 22:39:46 mdw + * Elliptic curves on binary fields work. + * + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include +#include + +#include "gf.h" +#include "gfreduce.h" +#include "gfreduce-exp.h" +#include "fibrand.h" +#include "mprand.h" + +/*----- Data structures ---------------------------------------------------*/ + +DA_DECL(instr_v, gfreduce_instr); + +/*----- Main code ---------------------------------------------------------*/ + +/* --- What's going on here? --- * + * + * Let's face it, @gfx_div@ sucks. It works (I hope), but it's not in any + * sense fast. Here, we do efficient reduction modulo sparse polynomials. + * + * Suppose we have a polynomial @X@ we're trying to reduce mod @P@. If we + * take the topmost nonzero word of @X@, call it @w@, then we can eliminate + * it by subtracting off @w P x^{k}@ for an appropriate value of @k@. The + * trick is in observing that if @P@ is sparse we can do this multiplication + * and subtraction efficiently, just by XORing appropriate shifts of @w@ into + * @X@. + * + * The first tricky bit is in working out when to stop. I'll use eight-bit + * words to demonstrate what I'm talking about. + * + * xxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx + * 001ppppp pppppppp pppppppp pppppppp + * || + * |<------------ bp ------------->| + * |<------------ nw --------------->| + * + * The trick of taking whole words off of @X@ stops working when there are + * only @nw@ words left. Then we have to mask off the bottom bits of @w@ + * before continuing. + */ + +/* --- @gfreduce_create@ --- * + * + * Arguments: @gfreduce *r@ = structure to fill in + * @mp *x@ = a (hopefully sparse) polynomial + * + * Returns: --- + * + * Use: Initializes a context structure for reduction. + */ + +void gfreduce_create(gfreduce *r, mp *p) +{ + instr_v iv = DA_INIT; + unsigned long d, dw; + mpscan sc; + unsigned long i; + gfreduce_instr *ip; + unsigned f = 0; + size_t w, ww, wi, wl, ll; + + /* --- Sort out the easy stuff --- */ + + d = mp_bits(p); assert(d); d--; + r->lim = d/MPW_BITS; + dw = d%MPW_BITS; + if (!dw) + r->mask = 0; + else { + r->mask = MPW(((mpw)-1) << dw); + r->lim++; + } + r->p = mp_copy(p); + + /* --- Stash a new instruction --- */ + +#define INSTR(op_, arg_) do { \ + DA_ENSURE(&iv, 1); \ + DA(&iv)[DA_LEN(&iv)].op = (op_); \ + DA(&iv)[DA_LEN(&iv)].arg = (arg_); \ + DA_EXTEND(&iv, 1); \ +} while (0) + +#define f_lsr 1u + + w = (d + MPW_BITS - 1)/MPW_BITS; + INSTR(GFRI_LOAD, w); + wi = DA_LEN(&iv); + f = 0; + ll = 0; + for (i = 0, mp_scan(&sc, p); mp_step(&sc) && i < d; i++) { + if (!mp_bit(&sc)) + continue; + ww = (d - i + MPW_BITS - 1)/MPW_BITS; + if (ww != w) { + wl = DA_LEN(&iv); + INSTR(GFRI_STORE, w); + if (!ll) + ll = DA_LEN(&iv); + if (!(f & f_lsr)) + INSTR(GFRI_LOAD, ww); + else { + INSTR(GFRI_LOAD, w - 1); + for (; wi < wl; wi++) { + ip = &DA(&iv)[wi]; + assert(ip->op == GFRI_LSL); + if (ip->arg) + INSTR(GFRI_LSR, MPW_BITS - ip->arg); + } + if (w - 1 != ww) { + INSTR(GFRI_STORE, w - 1); + INSTR(GFRI_LOAD, ww); + } + f &= ~f_lsr; + } + w = ww; + wi = DA_LEN(&iv); + } + INSTR(GFRI_LSL, (i - d)%MPW_BITS); + if ((i - d)%MPW_BITS) + f |= f_lsr; + } + wl = DA_LEN(&iv); + INSTR(GFRI_STORE, w); + if (!ll) + ll = DA_LEN(&iv); + if (f & f_lsr) { + INSTR(GFRI_LOAD, w - 1); + for (; wi < wl; wi++) { + ip = &DA(&iv)[wi]; + assert(ip->op == GFRI_LSL); + if (ip->arg) + INSTR(GFRI_LSR, MPW_BITS - ip->arg); + } + INSTR(GFRI_STORE, w - 1); + } + +#undef INSTR + + r->in = DA_LEN(&iv); + r->iv = xmalloc(r->in * sizeof(gfreduce_instr)); + r->liv = r->iv + ll; + memcpy(r->iv, DA(&iv), r->in * sizeof(gfreduce_instr)); + DA_DESTROY(&iv); +} + +/* --- @gfreduce_destroy@ --- * + * + * Arguments: @gfreduce *r@ = structure to free + * + * Returns: --- + * + * Use: Reclaims the resources from a reduction context. + */ + +void gfreduce_destroy(gfreduce *r) +{ + mp_drop(r->p); + xfree(r->iv); +} + +/* --- @gfreduce_dump@ --- * + * + * Arguments: @gfreduce *r@ = structure to dump + * @FILE *fp@ = file to dump on + * + * Returns: --- + * + * Use: Dumps a reduction context. + */ + +void gfreduce_dump(gfreduce *r, FILE *fp) +{ + size_t i; + + fprintf(fp, "poly = "); mp_writefile(r->p, fp, 16); + fprintf(fp, "\n lim = %lu; mask = %lx\n", + (unsigned long)r->lim, (unsigned long)r->mask); + for (i = 0; i < r->in; i++) { + static const char *opname[] = { "load", "lsl", "lsr", "store" }; + assert(r->iv[i].op < N(opname)); + fprintf(fp, " %s %lu\n", + opname[r->iv[i].op], + (unsigned long)r->iv[i].arg); + } +} + +/* --- @gfreduce_do@ --- * + * + * Arguments: @gfreduce *r@ = reduction context + * @mp *d@ = destination + * @mp *x@ = source + * + * Returns: Destination, @x@ reduced modulo the reduction poly. + */ + +static void run(const gfreduce_instr *i, const gfreduce_instr *il, + mpw *v, mpw z) +{ + mpw w = 0; + + for (; i < il; i++) { + switch (i->op) { + case GFRI_LOAD: w = *(v - i->arg); break; + case GFRI_LSL: w ^= z << i->arg; break; + case GFRI_LSR: w ^= z >> i->arg; break; + case GFRI_STORE: *(v - i->arg) = MPW(w); break; + default: abort(); + } + } +} + +mp *gfreduce_do(gfreduce *r, mp *d, mp *x) +{ + mpw *v, *vl; + const gfreduce_instr *il; + mpw z; + + /* --- Try to reuse the source's space --- */ + + MP_COPY(x); + if (d) MP_DROP(d); + MP_DEST(x, MP_LEN(x), x->f); + + /* --- Do the reduction --- */ + + il = r->iv + r->in; + if (MP_LEN(x) >= r->lim) { + v = x->v + r->lim; + vl = x->vl; + while (vl-- > v) { + while (*vl) { + z = *vl; + *vl = 0; + run(r->iv, il, vl, z); + } + } + if (r->mask) { + while (*vl & r->mask) { + z = *vl & r->mask; + *vl &= ~r->mask; + run(r->iv, il, vl, z); + } + } + } + + /* --- Done --- */ + + MP_SHRINK(x); + return (x); +} + +/* --- @gfreduce_sqrt@ --- * + * + * Arguments: @gfreduce *r@ = pointer to reduction context + * @mp *d@ = destination + * @mp *x@ = some polynomial + * + * Returns: The square root of @x@ modulo @r->p@, or null. + */ + +mp *gfreduce_sqrt(gfreduce *r, mp *d, mp *x) +{ + mp *y = MP_COPY(x); + mp *z, *spare = MP_NEW; + unsigned long m = mp_bits(r->p) - 1; + unsigned long i; + + for (i = 0; i < m - 1; i++) { + mp *t = gf_sqr(spare, y); + spare = y; + y = gfreduce_do(r, t, t); + } + z = gf_sqr(spare, y); + z = gfreduce_do(r, z, z); + if (!MP_EQ(x, z)) { + mp_drop(y); + y = 0; + } + mp_drop(z); + mp_drop(d); + return (y); +} + +/* --- @gfreduce_trace@ --- * + * + * Arguments: @gfreduce *r@ = pointer to reduction context + * @mp *x@ = some polynomial + * + * Returns: The trace of @x@. (%$\Tr(x)=x + x^2 + \cdots + x^{2^{m-1}}$% + * if %$x \in \gf{2^m}$%). + */ + +int gfreduce_trace(gfreduce *r, mp *x) +{ + mp *y = MP_COPY(x); + mp *spare = MP_NEW; + unsigned long m = mp_bits(r->p) - 1; + unsigned long i; + int rc; + + for (i = 0; i < m - 1; i++) { + mp *t = gf_sqr(spare, y); + spare = y; + y = gfreduce_do(r, t, t); + y = gf_add(y, y, x); + } + rc = !MP_ISZERO(y); + mp_drop(spare); + mp_drop(y); + return (rc); +} + +/* --- @gfreduce_halftrace@ --- * + * + * Arguments: @gfreduce *r@ = pointer to reduction context + * @mp *d@ = destination + * @mp *x@ = some polynomial + * + * Returns: The half-trace of @x@. + * (%$\HfTr(x)= x + x^{2^2} + \cdots + x^{2^{m-1}}$% + * if %$x \in \gf{2^m}$% with %$m$% odd). + */ + +mp *gfreduce_halftrace(gfreduce *r, mp *d, mp *x) +{ + mp *y = MP_COPY(x); + mp *spare = MP_NEW; + unsigned long m = mp_bits(r->p) - 1; + unsigned long i; + + mp_drop(d); + for (i = 0; i < m - 1; i += 2) { + mp *t = gf_sqr(spare, y); + spare = y; + y = gfreduce_do(r, t, t); + t = gf_sqr(spare, y); + spare = y; + y = gfreduce_do(r, t, t); + y = gf_add(y, y, x); + } + mp_drop(spare); + return (y); +} + +/* --- @gfreduce_quadsolve@ --- * + * + * Arguments: @gfreduce *r@ = pointer to reduction context + * @mp *d@ = destination + * @mp *x@ = some polynomial + * + * Returns: A polynomial @y@ such that %$y^2 + y = x$%, or null. + */ + +mp *gfreduce_quadsolve(gfreduce *r, mp *d, mp *x) +{ + unsigned long m = mp_bits(r->p) - 1; + mp *t; + + MP_COPY(x); + if (m & 1) + d = gfreduce_halftrace(r, d, x); + else { + mp *z, *w, *rho = MP_NEW; + mp *spare = MP_NEW; + grand *fr = fibrand_create(0); + unsigned long i; + + for (;;) { + rho = mprand(rho, m, fr, 0); + z = MP_ZERO; + w = MP_COPY(rho); + for (i = 0; i < m - 1; i++) { + t = gf_sqr(spare, z); spare = z; z = gfreduce_do(r, t, t); + t = gf_sqr(spare, w); spare = w; w = gfreduce_do(r, t, t); + t = gf_mul(spare, w, x); t = gfreduce_do(r, t, t); spare = t; + z = gf_add(z, z, t); + w = gf_add(w, w, rho); + } + if (!MP_ISZERO(w)) + break; + MP_DROP(z); + MP_DROP(w); + } + if (d) MP_DROP(d); + MP_DROP(w); + MP_DROP(spare); + MP_DROP(rho); + fr->ops->destroy(fr); + d = z; + } + + t = gf_sqr(MP_NEW, d); t = gfreduce_do(r, t, t); t = gf_add(t, t, d); + if (!MP_EQ(t, x)) { + MP_DROP(d); + d = 0; + } + MP_DROP(t); + MP_DROP(x); + d->v[0] &= ~(mpw)1; + return (d); +} + +/* --- @gfreduce_exp@ --- * + * + * Arguments: @gfreduce *gr@ = pointer to reduction context + * @mp *d@ = fake destination + * @mp *a@ = base + * @mp *e@ = exponent + * + * Returns: Result, %$a^e \bmod m$%. + */ + +mp *gfreduce_exp(gfreduce *gr, mp *d, mp *a, mp *e) +{ + mp *x = MP_ONE; + mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW; + + MP_SHRINK(e); + if (!MP_LEN(e)) + ; + else if (MP_LEN(e) < EXP_THRESH) + EXP_SIMPLE(x, a, e); + else + EXP_WINDOW(x, a, e); + mp_drop(d); + mp_drop(spare); + return (x); +} + +/*----- Test rig ----------------------------------------------------------*/ + +#ifdef TEST_RIG + +#define MP(x) mp_readstring(MP_NEW, #x, 0, 0) + +static int vreduce(dstr *v) +{ + mp *d = *(mp **)v[0].buf; + mp *n = *(mp **)v[1].buf; + mp *r = *(mp **)v[2].buf; + mp *c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, d); + c = gfreduce_do(&rr, MP_NEW, n); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** reduction failed\n*** "); + gfreduce_dump(&rr, stderr); + fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 16); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vmodexp(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *g = *(mp **)v[1].buf; + mp *x = *(mp **)v[2].buf; + mp *r = *(mp **)v[3].buf; + mp *c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, p); + c = gfreduce_exp(&rr, MP_NEW, g, x); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** modexp failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); + fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 16); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vsqrt(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *x = *(mp **)v[1].buf; + mp *r = *(mp **)v[2].buf; + mp *c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, p); + c = gfreduce_sqrt(&rr, MP_NEW, x); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** sqrt failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vtr(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *x = *(mp **)v[1].buf; + int r = *(int *)v[2].buf, c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, p); + c = gfreduce_trace(&rr, x); + if (c != r) { + fprintf(stderr, "\n*** trace failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); + fprintf(stderr, "\n*** c = %d", c); + fprintf(stderr, "\n*** r = %d", r); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(p); mp_drop(x); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vhftr(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *x = *(mp **)v[1].buf; + mp *r = *(mp **)v[2].buf; + mp *c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, p); + c = gfreduce_halftrace(&rr, MP_NEW, x); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** halftrace failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vquad(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *x = *(mp **)v[1].buf; + mp *r = *(mp **)v[2].buf; + mp *c; + int ok = 1; + gfreduce rr; + + gfreduce_create(&rr, p); + c = gfreduce_quadsolve(&rr, MP_NEW, x); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** quadsolve failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 16); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 16); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 16); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 16); + fprintf(stderr, "\n"); + ok = 0; + } + gfreduce_destroy(&rr); + mp_drop(p); mp_drop(r); mp_drop(x); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static test_chunk defs[] = { + { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } }, + { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } }, + { "sqrt", vsqrt, { &type_mp, &type_mp, &type_mp, 0 } }, + { "trace", vtr, { &type_mp, &type_mp, &type_int, 0 } }, + { "halftrace", vhftr, { &type_mp, &type_mp, &type_mp, 0 } }, + { "quadsolve", vquad, { &type_mp, &type_mp, &type_mp, 0 } }, + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + test_run(argc, argv, defs, SRCDIR"/tests/gfreduce"); + return (0); +} + +#endif + +/*----- That's all, folks -------------------------------------------------*/