X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/ba6e6b64033b1f9de49feccb5c9cd438354481f7..0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a:/math/mpreduce.c diff --git a/math/mpreduce.c b/math/mpreduce.c new file mode 100644 index 0000000..669f516 --- /dev/null +++ b/math/mpreduce.c @@ -0,0 +1,438 @@ +/* -*-c-*- + * + * Efficient reduction modulo nice primes + * + * (c) 2004 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include +#include + +#include "mp.h" +#include "mpreduce.h" +#include "mpreduce-exp.h" + +/*----- Data structures ---------------------------------------------------*/ + +DA_DECL(instr_v, mpreduce_instr); + +/*----- Main code ---------------------------------------------------------*/ + +/* --- @mpreduce_create@ --- * + * + * Arguments: @gfreduce *r@ = structure to fill in + * @mp *x@ = an integer + * + * Returns: Zero if successful; nonzero on failure. + * + * Use: Initializes a context structure for reduction. + */ + +int mpreduce_create(mpreduce *r, mp *p) +{ + mpscan sc; + enum { Z = 0, Z1 = 2, X = 4, X0 = 6 }; + unsigned st = Z; + instr_v iv = DA_INIT; + unsigned long d, i; + unsigned op; + size_t w, b, bb; + + /* --- Fill in the easy stuff --- */ + + if (!MP_POSP(p)) + return (-1); + d = mp_bits(p); + r->lim = d/MPW_BITS; + r->s = d%MPW_BITS; + if (r->s) + r->lim++; + r->p = mp_copy(p); + + /* --- Stash a new instruction --- */ + +#define INSTR(op_, argx_, argy_) do { \ + DA_ENSURE(&iv, 1); \ + DA(&iv)[DA_LEN(&iv)].op = (op_); \ + DA(&iv)[DA_LEN(&iv)].argx = (argx_); \ + DA(&iv)[DA_LEN(&iv)].argy = (argy_); \ + DA_EXTEND(&iv, 1); \ +} while (0) + + /* --- Main loop --- * + * + * A simple state machine decomposes @p@ conveniently into positive and + * negative powers of 2. The pure form of the state machine is left below + * for reference (and in case I need inspiration for a NAF exponentiator). + */ + +#ifdef DEBUG + for (i = 0, mp_scan(&sc, p); mp_step(&sc); i++) { + switch (st | mp_bit(&sc)) { + case Z | 1: st = Z1; break; + case Z1 | 0: st = Z; printf("+ %lu\n", i - 1); break; + case Z1 | 1: st = X; printf("- %lu\n", i - 1); break; + case X | 0: st = X0; break; + case X0 | 1: st = X; printf("- %lu\n", i - 1); break; + case X0 | 0: st = Z; printf("+ %lu\n", i - 1); break; + } + } + if (st >= X) printf("+ %lu\n", i - 1); + st = Z; +#endif + + bb = MPW_BITS - (d + 1)%MPW_BITS; + for (i = 0, mp_scan(&sc, p); i < d && mp_step(&sc); i++) { + switch (st | mp_bit(&sc)) { + case Z | 1: st = Z1; break; + case Z1 | 0: st = Z; op = MPRI_SUB; goto instr; + case Z1 | 1: st = X; op = MPRI_ADD; goto instr; + case X | 0: st = X0; break; + case X0 | 1: st = X; op = MPRI_ADD; goto instr; + case X0 | 0: st = Z; op = MPRI_SUB; goto instr; + instr: + w = (d - i)/MPW_BITS + 1; + b = (bb + i)%MPW_BITS; + INSTR(op | !!b, w, b); + } + } + if (DA_LEN(&iv) && (DA(&iv)[DA_LEN(&iv) - 1].op & ~1u) == MPRI_SUB) { + mp_drop(r->p); + DA_DESTROY(&iv); + return (-1); + } + +#undef INSTR + + /* --- Wrap up --- */ + + r->in = DA_LEN(&iv); + if (!r->in) + r->iv = 0; + else if (!r->s) { + r->iv = xmalloc(r->in * sizeof(mpreduce_instr)); + memcpy(r->iv, DA(&iv), r->in * sizeof(mpreduce_instr)); + } else { + r->iv = xmalloc(r->in * 2 * sizeof(mpreduce_instr)); + for (i = 0; i < r->in; i++) { + r->iv[i] = DA(&iv)[i]; + op = r->iv[i].op & ~1u; + w = r->iv[i].argx; + b = r->iv[i].argy; + b += r->s; + if (b >= MPW_BITS) { + b -= MPW_BITS; + w--; + } + if (b) op |= 1; + r->iv[i + r->in].op = op; + r->iv[i + r->in].argx = w; + r->iv[i + r->in].argy = b; + } + } + DA_DESTROY(&iv); + +#ifdef DEBUG + mpreduce_dump(r, stdout); +#endif + return (0); +} + +/* --- @mpreduce_destroy@ --- * + * + * Arguments: @mpreduce *r@ = structure to free + * + * Returns: --- + * + * Use: Reclaims the resources from a reduction context. + */ + +void mpreduce_destroy(mpreduce *r) +{ + mp_drop(r->p); + if (r->iv) xfree(r->iv); +} + +/* --- @mpreduce_dump@ --- * + * + * Arguments: @mpreduce *r@ = structure to dump + * @FILE *fp@ = file to dump on + * + * Returns: --- + * + * Use: Dumps a reduction context. + */ + +void mpreduce_dump(mpreduce *r, FILE *fp) +{ + size_t i; + static const char *opname[] = { "add", "addshift", "sub", "subshift" }; + + fprintf(fp, "mod = "); mp_writefile(r->p, fp, 16); + fprintf(fp, "\n lim = %lu; s = %d\n", (unsigned long)r->lim, r->s); + for (i = 0; i < r->in; i++) { + assert(r->iv[i].op < N(opname)); + fprintf(fp, " %s %lu %lu\n", + opname[r->iv[i].op], + (unsigned long)r->iv[i].argx, + (unsigned long)r->iv[i].argy); + } + if (r->s) { + fprintf(fp, "tail end charlie\n"); + for (i = r->in; i < 2 * r->in; i++) { + assert(r->iv[i].op < N(opname)); + fprintf(fp, " %s %lu %lu\n", + opname[r->iv[i].op], + (unsigned long)r->iv[i].argx, + (unsigned long)r->iv[i].argy); + } + } +} + +/* --- @mpreduce_do@ --- * + * + * Arguments: @mpreduce *r@ = reduction context + * @mp *d@ = destination + * @mp *x@ = source + * + * Returns: Destination, @x@ reduced modulo the reduction poly. + */ + +static void run(const mpreduce_instr *i, const mpreduce_instr *il, + mpw *v, mpw z) +{ + for (; i < il; i++) { +#ifdef DEBUG + mp vv; + mp_build(&vv, v - i->argx, v + 1); + printf(" 0x"); mp_writefile(&vv, stdout, 16); + printf(" %c (0x%lx << %u) == 0x", + (i->op & ~1u) == MPRI_ADD ? '+' : '-', + (unsigned long)z, + i->argy); +#endif + switch (i->op) { + case MPRI_ADD: MPX_UADDN(v - i->argx, v + 1, z); break; + case MPRI_ADDLSL: mpx_uaddnlsl(v - i->argx, v + 1, z, i->argy); break; + case MPRI_SUB: MPX_USUBN(v - i->argx, v + 1, z); break; + case MPRI_SUBLSL: mpx_usubnlsl(v - i->argx, v + 1, z, i->argy); break; + default: + abort(); + } +#ifdef DEBUG + mp_build(&vv, v - i->argx, v + 1); + mp_writefile(&vv, stdout, 16); + printf("\n"); +#endif + } +} + +mp *mpreduce_do(mpreduce *r, mp *d, mp *x) +{ + mpw *v, *vl; + const mpreduce_instr *il; + mpw z; + +#ifdef DEBUG + mp *_r = 0, *_rr = 0; +#endif + + /* --- If source is negative, divide --- */ + + if (MP_NEGP(x)) { + mp_div(0, &d, x, r->p); + return (d); + } + + /* --- Try to reuse the source's space --- */ + + MP_COPY(x); + if (d) MP_DROP(d); + MP_DEST(x, MP_LEN(x), x->f); + + /* --- Do the reduction --- */ + +#ifdef DEBUG + _r = MP_NEW; + mp_div(0, &_r, x, r->p); + MP_PRINTX("x", x); + _rr = 0; +#endif + + il = r->iv + r->in; + if (MP_LEN(x) >= r->lim) { + v = x->v + r->lim; + vl = x->vl; + while (vl-- > v) { + while (*vl) { + z = *vl; + *vl = 0; + run(r->iv, il, vl, z); +#ifdef DEBUG + MP_PRINTX("x", x); + mp_div(0, &_rr, x, r->p); + assert(MP_EQ(_r, _rr)); +#endif + } + } + if (r->s) { + while (*vl >> r->s) { + z = *vl >> r->s; + *vl &= ((1 << r->s) - 1); + run(r->iv + r->in, il + r->in, vl, z); +#ifdef DEBUG + MP_PRINTX("x", x); + mp_div(0, &_rr, x, r->p); + assert(MP_EQ(_r, _rr)); +#endif + } + } + } + + /* --- Finishing touches --- */ + + MP_SHRINK(x); + if (MP_CMP(x, >=, r->p)) + x = mp_sub(x, x, r->p); + + /* --- Done --- */ + +#ifdef DEBUG + assert(MP_EQ(_r, x)); + mp_drop(_r); + mp_drop(_rr); +#endif + return (x); +} + +/* --- @mpreduce_exp@ --- * + * + * Arguments: @mpreduce *mr@ = pointer to reduction context + * @mp *d@ = fake destination + * @mp *a@ = base + * @mp *e@ = exponent + * + * Returns: Result, %$a^e \bmod m$%. + */ + +mp *mpreduce_exp(mpreduce *mr, mp *d, mp *a, mp *e) +{ + mp *x = MP_ONE; + mp *spare = (e->f & MP_BURN) ? MP_NEWSEC : MP_NEW; + + MP_SHRINK(e); + MP_COPY(a); + if (MP_ZEROP(e)) + ; + else { + if (MP_NEGP(e)) + a = mp_modinv(a, a, mr->p); + if (MP_LEN(e) < EXP_THRESH) + EXP_SIMPLE(x, a, e); + else + EXP_WINDOW(x, a, e); + } + mp_drop(a); + mp_drop(d); + mp_drop(spare); + return (x); +} + +/*----- Test rig ----------------------------------------------------------*/ + + +#ifdef TEST_RIG + +#define MP(x) mp_readstring(MP_NEW, #x, 0, 0) + +static int vreduce(dstr *v) +{ + mp *d = *(mp **)v[0].buf; + mp *n = *(mp **)v[1].buf; + mp *r = *(mp **)v[2].buf; + mp *c; + int ok = 1; + mpreduce rr; + + mpreduce_create(&rr, d); + c = mpreduce_do(&rr, MP_NEW, n); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** reduction failed\n*** "); + mpreduce_dump(&rr, stderr); + fprintf(stderr, "\n*** n = "); mp_writefile(n, stderr, 10); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 10); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 10); + fprintf(stderr, "\n"); + ok = 0; + } + mpreduce_destroy(&rr); + mp_drop(n); mp_drop(d); mp_drop(r); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static int vmodexp(dstr *v) +{ + mp *p = *(mp **)v[0].buf; + mp *g = *(mp **)v[1].buf; + mp *x = *(mp **)v[2].buf; + mp *r = *(mp **)v[3].buf; + mp *c; + int ok = 1; + mpreduce rr; + + mpreduce_create(&rr, p); + c = mpreduce_exp(&rr, MP_NEW, g, x); + if (!MP_EQ(c, r)) { + fprintf(stderr, "\n*** modexp failed\n*** "); + fprintf(stderr, "\n*** p = "); mp_writefile(p, stderr, 10); + fprintf(stderr, "\n*** g = "); mp_writefile(g, stderr, 10); + fprintf(stderr, "\n*** x = "); mp_writefile(x, stderr, 10); + fprintf(stderr, "\n*** c = "); mp_writefile(c, stderr, 10); + fprintf(stderr, "\n*** r = "); mp_writefile(r, stderr, 10); + fprintf(stderr, "\n"); + ok = 0; + } + mpreduce_destroy(&rr); + mp_drop(p); mp_drop(g); mp_drop(r); mp_drop(x); mp_drop(c); + assert(mparena_count(MPARENA_GLOBAL) == 0); + return (ok); +} + +static test_chunk defs[] = { + { "reduce", vreduce, { &type_mp, &type_mp, &type_mp, 0 } }, + { "modexp", vmodexp, { &type_mp, &type_mp, &type_mp, &type_mp, 0 } }, + { 0, 0, { 0 } } +}; + +int main(int argc, char *argv[]) +{ + test_run(argc, argv, defs, SRCDIR"/t/mpreduce"); + return (0); +} + +#endif + +/*----- That's all, folks -------------------------------------------------*/