X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/17de5b2eddc93fee07c7619847e7ee72b41c0476..444083aef7e70ce9afe893a36d72e1a1a976f1ed:/math/mpmont.c diff --git a/math/mpmont.c b/math/mpmont.c index a86623b2..968766d3 100644 --- a/math/mpmont.c +++ b/math/mpmont.c @@ -27,6 +27,8 @@ /*----- Header files ------------------------------------------------------*/ +#include "config.h" +#include "dispatch.h" #include "mp.h" #include "mpmont.h" @@ -58,8 +60,12 @@ * least %$2 n + 1$% words of result. */ -static void redccore(mpw *dv, mpw *dvl, const mpw *mv, - size_t n, const mpw *mi) +CPU_DISPATCH(static, (void), void, redccore, + (mpw *dv, mpw *dvl, const mpw *mv, size_t n, const mpw *mi), + (dv, dvl, mv, n, mi), pick_redccore, simple_redccore); + +static void simple_redccore(mpw *dv, mpw *dvl, const mpw *mv, + size_t n, const mpw *mi) { mpw mi0 = *mi; size_t i; @@ -70,6 +76,29 @@ static void redccore(mpw *dv, mpw *dvl, const mpw *mv, } } +#define MAYBE_REDC4(impl) \ + extern void mpxmont_redc4_##impl(mpw *dv, mpw *dvl, const mpw *mv, \ + size_t n, const mpw *mi); \ + static void maybe_redc4_##impl(mpw *dv, mpw *dvl, const mpw *mv, \ + size_t n, const mpw *mi) \ + { \ + if (n%4) simple_redccore(dv, dvl, mv, n, mi); \ + else mpxmont_redc4_##impl(dv, dvl, mv, n, mi); \ + } + +#if CPUFAM_X86 + MAYBE_REDC4(x86_sse2) +#endif + +static redccore__functype *pick_redccore(void) +{ +#if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif + DISPATCH_PICK_FALLBACK(mpmont_reduce, simple_redccore); +} + /* --- @redccore@ --- * * * Arguments: @mpw *dv, *dvl@ = base and limit of source/destination @@ -85,10 +114,17 @@ static void redccore(mpw *dv, mpw *dvl, const mpw *mv, * Store in %$d$% the value %$a b + (m' a b \bmod R) m$%. */ -static void mulcore(mpw *dv, mpw *dvl, - const mpw *av, const mpw *avl, - const mpw *bv, const mpw *bvl, - const mpw *mv, size_t n, const mpw *mi) +CPU_DISPATCH(static, (void), void, mulcore, + (mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, const mpw *mv, + size_t n, const mpw *mi), + (dv, dvl, av, avl, bv, bvl, mv, n, mi), + pick_mulcore, simple_mulcore); + +static void simple_mulcore(mpw *dv, mpw *dvl, + const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, + const mpw *mv, size_t n, const mpw *mi) { mpw ai, b0, y, mi0 = *mi; const mpw *tv, *tvl; @@ -123,6 +159,38 @@ static void mulcore(mpw *dv, mpw *dvl, } } +#define MAYBE_MUL4(impl) \ + extern void mpxmont_mul4_##impl(mpw *dv, \ + const mpw *av, const mpw *bv, \ + const mpw *mv, \ + size_t n, const mpw *mi); \ + static void maybe_mul4_##impl(mpw *dv, mpw *dvl, \ + const mpw *av, const mpw *avl, \ + const mpw *bv, const mpw *bvl, \ + const mpw *mv, size_t n, const mpw *mi) \ + { \ + size_t an = avl - av, bn = bvl - bv; \ + if (n%4 || an != n || bn != n) \ + simple_mulcore(dv, dvl, av, avl, bv, bvl, mv, n, mi); \ + else { \ + mpxmont_mul4_##impl(dv, av, bv, mv, n, mi); \ + MPX_ZERO(dv + 2*n + 1, dvl); \ + } \ + } + +#if CPUFAM_X86 + MAYBE_MUL4(x86_sse2) +#endif + +static mulcore__functype *pick_mulcore(void) +{ +#if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif + DISPATCH_PICK_FALLBACK(mpmont_mul, simple_mulcore); +} + /* --- @finish@ --- * * * Arguments: @mpmont *mm@ = pointer to a Montgomery reduction context @@ -321,13 +389,14 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) { - if (mm->n > MPK_THRESH * 3) { + size_t n = mm->n; + + if (n > MPK_THRESH * 3) { d = mp_mul(d, a, b); d = mpmont_reduce(mm, d, d); } else { - a = MP_COPY(a); - b = MP_COPY(b); - MP_DEST(d, 2*mm->n + 1, a->f | b->f | MP_UNDEF); + a = MP_COPY(a); b = MP_COPY(b); + MP_DEST(d, 2*n + 1, a->f | b->f | MP_UNDEF); mulcore(d->v, d->vl, a->v, a->vl, b->v, b->vl, mm->m->v, mm->n, mm->mi->v); d->f = ((a->f | b->f) & MP_BURN) | ((a->f ^ b->f) & MP_NEG);