From 0e70bd46b0d4f50d5ec23e23e646d58f5ba0b23d Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Mon, 12 Sep 2016 22:32:37 +0100 Subject: [PATCH] math/mpmont.c: Factor out the computational core of the algorithm. Surprisingly, this makes everything a little simpler. --- math/mpmont.c | 226 ++++++++++++++++++++++++++++++++++------------------------ math/t/mpmont | 10 +++ 2 files changed, 142 insertions(+), 94 deletions(-) diff --git a/math/mpmont.c b/math/mpmont.c index 1f9143b6..b95e1cfe 100644 --- a/math/mpmont.c +++ b/math/mpmont.c @@ -40,6 +40,125 @@ /* #define MPMONT_DISABLE */ +/*----- Low-level implementation ------------------------------------------*/ + +#ifndef MPMONT_DISABLE + +/* --- @redccore@ --- * + * + * Arguments: @mpw *dv, *dvl@ = base and limit of source/destination + * @const mpw *mv@ = base of modulus %$m$% + * @size_t n@ = length of modulus + * @const mpw *mi@ = base of REDC coefficient %$m'$% + * + * Returns: --- + * + * Use: Let %$a$% be the input operand. Store in %$d$% the value + * %$a + (m' a \bmod R) m$%. The destination has space for at + * least %$2 n + 1$% words of result. + */ + +static void redccore(mpw *dv, mpw *dvl, const mpw *mv, + size_t n, const mpw *mi) +{ + mpw mi0 = *mi; + size_t i; + + for (i = 0; i < n; i++) { + MPX_UMLAN(dv, dvl, mv, mv + n, MPW(*dv*mi0)); + dv++; + } +} + +/* --- @redccore@ --- * + * + * Arguments: @mpw *dv, *dvl@ = base and limit of source/destination + * @const mpw *av, *avl@ = base and limit of first multiplicand + * @const mpw *bv, *bvl@ = base and limit of second multiplicand + * @const mpw *mv@ = base of modulus %$m$% + * @size_t n@ = length of modulus + * @const mpw *mi@ = base of REDC coefficient %$m'$% + * + * Returns: --- + * + * Use: Let %$a$% and %$b$% be the multiplicands. Let %$w = a b$%. + * Store in %$d$% the value %$a b + (m' a b \bmod R) m$%. + */ + +static void mulcore(mpw *dv, mpw *dvl, + const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, + const mpw *mv, size_t n, const mpw *mi) +{ + mpw ai, b0, y, mi0 = *mi; + const mpw *tv, *tvl; + const mpw *mvl = mv + n; + size_t i = 0; + + /* --- Initial setup --- */ + + MPX_ZERO(dv, dvl); + if (avl - av > bvl - bv) { + tv = av; av = bv; bv = tv; + tvl = avl; avl = bvl; bvl = tvl; + } + b0 = *bv; + + /* --- Multiply, until we run out of multiplicand --- */ + + while (i < n && av < avl) { + ai = *av++; + y = MPW((*dv + ai*b0)*mi0); + MPX_UMLAN(dv, dvl, bv, bvl, ai); + MPX_UMLAN(dv, dvl, mv, mvl, y); + dv++; i++; + } + + /* --- Continue reducing until we run out of modulus --- */ + + while (i < n) { + y = MPW(*dv*mi0); + MPX_UMLAN(dv, dvl, mv, mvl, y); + dv++; i++; + } +} + +/* --- @finish@ --- * + * + * Arguments: @mpmont *mm@ = pointer to a Montgomery reduction context + * *mp *d@ = pointer to mostly-reduced operand + * + * Returns: --- + * + * Use: Applies the finishing touches to Montgomery reduction. The + * operand @d@ is a multiple of %$R%$ at this point, so it needs + * to be shifted down; the result might need a further + * subtraction to get it into the right interval; and we may + * need to do an additional subtraction if %$d$% is negative. + */ + +static void finish(mpmont *mm, mp *d) +{ + mpw *dv = d->v, *dvl = d->vl; + size_t n = mm->n; + + memmove(dv, dv + n, MPWS(dvl - (dv + n))); + dvl -= n; + + if (MPX_UCMP(dv, dvl, >=, mm->m->v, mm->m->vl)) + mpx_usub(dv, dvl, dv, dvl, mm->m->v, mm->m->vl); + + if (d->f & MP_NEG) { + mpx_usub(dv, dvl, mm->m->v, mm->m->vl, dv, dvl); + d->f &= ~MP_NEG; + } + + d->vl = dvl; + MP_SHRINK(d); +} + +#endif + /*----- Reduction and multiplication --------------------------------------*/ /* --- @mpmont_create@ --- * @@ -150,14 +269,11 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) mpw *vl; mp *u; - if (MP_LEN(a) >= n) - vl = a->v + n; - else - vl = a->vl; + if (MP_LEN(a) >= n) vl = a->v + n; + else vl = a->vl; mp_build(&al, a->v, vl); u = mp_mul(MP_NEW, &al, mm->mi); - if (MP_LEN(u) > n) - u->vl = u->v + n; + if (MP_LEN(u) > n) u->vl = u->v + n; u = mp_mul(u, u, mm->m); d = mp_add(d, a, u); MP_ENSURE(d, n); @@ -167,43 +283,16 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) /* --- Otherwise do it the hard way --- */ else { - mpw *dv, *dvl; - mpw *mv, *mvl; - mpw mi; - size_t k = n; - - /* --- Initial conditioning of the arguments --- */ - a = MP_COPY(a); - if (d) - MP_DROP(d); + if (d) MP_DROP(d); d = a; - MP_DEST(d, 2 * n + 1, a->f); - - dv = d->v; dvl = d->vl; - mv = mm->m->v; mvl = mm->m->vl; - - /* --- Let's go to work --- */ - - mi = mm->mi->v[0]; - while (k--) { - mpw u = MPW(*dv * mi); - MPX_UMLAN(dv, dvl, mv, mvl, u); - dv++; - } + MP_DEST(d, 2*mm->n + 1, a->f); + redccore(d->v, d->vl, mm->m->v, mm->n, mm->mi->v); } /* --- Wrap everything up --- */ - memmove(d->v, d->v + n, MPWS(MP_LEN(d) - n)); - d->vl -= n; - if (MPX_UCMP(d->v, d->vl, >=, mm->m->v, mm->m->vl)) - mpx_usub(d->v, d->vl, d->v, d->vl, mm->m->v, mm->m->vl); - if (d->f & MP_NEG) { - mpx_usub(d->v, d->vl, mm->m->v, mm->m->vl, d->v, d->vl); - d->f &= ~MP_NEG; - } - MP_SHRINK(d); + finish(mm, d); return (d); } @@ -235,65 +324,14 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) d = mp_mul(d, a, b); d = mpmont_reduce(mm, d, d); } else { - mpw *dv, *dvl; - mpw *av, *avl; - mpw *bv, *bvl; - mpw *mv, *mvl; - mpw y; - size_t n, i; - mpw mi; - - /* --- Initial conditioning of the arguments --- */ - - if (MP_LEN(a) > MP_LEN(b)) { - mp *t = a; a = b; b = t; - } - n = MP_LEN(mm->m); - a = MP_COPY(a); b = MP_COPY(b); - MP_DEST(d, 2 * n + 1, a->f | b->f | MP_UNDEF); - dv = d->v; dvl = d->vl; - MPX_ZERO(dv, dvl); - av = a->v; avl = a->vl; - bv = b->v; bvl = b->vl; - mv = mm->m->v; mvl = mm->m->vl; - y = *bv; - - /* --- Montgomery multiplication phase --- */ - - i = 0; - mi = mm->mi->v[0]; - while (i < n && av < avl) { - mpw x = *av++; - mpw u = MPW((*dv + x * y) * mi); - MPX_UMLAN(dv, dvl, bv, bvl, x); - MPX_UMLAN(dv, dvl, mv, mvl, u); - dv++; - i++; - } - - /* --- Simpler Montgomery reduction phase --- */ - - while (i < n) { - mpw u = MPW(*dv * mi); - MPX_UMLAN(dv, dvl, mv, mvl, u); - dv++; - i++; - } - - /* --- Done --- */ - - memmove(d->v, dv, MPWS(dvl - dv)); - d->vl -= dv - d->v; - if (MPX_UCMP(d->v, d->vl, >=, mm->m->v, mm->m->vl)) - mpx_usub(d->v, d->vl, d->v, d->vl, mm->m->v, mm->m->vl); - if ((a->f ^ b->f) & MP_NEG) - mpx_usub(d->v, d->vl, mm->m->v, mm->m->vl, d->v, d->vl); - MP_SHRINK(d); - d->f = (a->f | b->f) & MP_BURN; - MP_DROP(a); - MP_DROP(b); + MP_DEST(d, 2*mm->n + 1, a->f | b->f | MP_UNDEF); + mulcore(d->v, d->vl, a->v, a->vl, b->v, b->vl, + mm->m->v, mm->n, mm->mi->v); + d->f = ((a->f | b->f) & MP_BURN) | ((a->f ^ b->f) & MP_NEG); + finish(mm, d); + MP_DROP(a); MP_DROP(b); } return (d); diff --git a/math/t/mpmont b/math/t/mpmont index 37535af7..81fbe634 100644 --- a/math/t/mpmont +++ b/math/t/mpmont @@ -5,6 +5,11 @@ create { 266454859 # -m^{-1} mod b 130655606683780235388773757767708 # R mod m 237786678640282040194246459306177; # R^2 mod m + + 6277101735386680763835789423207666416083908700390324961279 + 340282366920938463444927863358058659841 + 18446744073709551617 + 340282366920938463500268095579187314689; } mul { @@ -13,6 +18,11 @@ mul { 6456542564 10807149256; + 6277101735386680763835789423207666416083908700390324961279 + 2455155546008943817740293915197451784769108058161191238065 + 340282366920938463500268095579187314689 + 5646741895976341600220572388698743135318229029826089708489; + 51518627314818829164222247085233898246715229794943812733936714788310185005015428803253311691709787911812368198649776769324928993075889524373913555618270874746833913595051625422038974326537979654635530320271853851973343513053953211672797425464186157719021174955241645388345195723368057041032310152242301620397 7041548659011846562361842096561083537784928869240554198760844555642215260669458833049231069318370838770180094409088437631986867239713464317243824963669990014087444248250948204574690463940534304651099653802302150197753463246181762684347288736386534346725039618007392334267637262008343417972878515511486456037 21451817224897484023627307128311082613304580637202546848860538836010530320943159719981586919811151828606838777812233053319458755053306547823820900602281867134174742586071226220962576712633552196944784360512851517812225731562588375896089193406088239903885470354101095713609394462435076126493339021945199401247 -- 2.11.0