X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/0e70bd46b0d4f50d5ec23e23e646d58f5ba0b23d..a1a9ee0a7240087e202a7855e470573de0e59c09:/math/mpmont.c diff --git a/math/mpmont.c b/math/mpmont.c index b95e1cfe..2ed1e113 100644 --- a/math/mpmont.c +++ b/math/mpmont.c @@ -27,6 +27,8 @@ /*----- Header files ------------------------------------------------------*/ +#include "config.h" +#include "dispatch.h" #include "mp.h" #include "mpmont.h" @@ -40,6 +42,8 @@ /* #define MPMONT_DISABLE */ +#define MPMONT_KTHRESH (16*MPK_THRESH) + /*----- Low-level implementation ------------------------------------------*/ #ifndef MPMONT_DISABLE @@ -58,8 +62,12 @@ * least %$2 n + 1$% words of result. */ -static void redccore(mpw *dv, mpw *dvl, const mpw *mv, - size_t n, const mpw *mi) +CPU_DISPATCH(static, (void), void, redccore, + (mpw *dv, mpw *dvl, const mpw *mv, size_t n, const mpw *mi), + (dv, dvl, mv, n, mi), pick_redccore, simple_redccore); + +static void simple_redccore(mpw *dv, mpw *dvl, const mpw *mv, + size_t n, const mpw *mi) { mpw mi0 = *mi; size_t i; @@ -70,6 +78,43 @@ static void redccore(mpw *dv, mpw *dvl, const mpw *mv, } } +#define MAYBE_REDC4(impl) \ + extern void mpxmont_redc4_##impl(mpw *dv, mpw *dvl, const mpw *mv, \ + size_t n, const mpw *mi); \ + static void maybe_redc4_##impl(mpw *dv, mpw *dvl, const mpw *mv, \ + size_t n, const mpw *mi) \ + { \ + if (n%4) simple_redccore(dv, dvl, mv, n, mi); \ + else mpxmont_redc4_##impl(dv, dvl, mv, n, mi); \ + } + +#if CPUFAM_X86 + MAYBE_REDC4(x86_sse2) + MAYBE_REDC4(x86_avx) +#endif + +#if CPUFAM_AMD64 + MAYBE_REDC4(amd64_sse2) + MAYBE_REDC4(amd64_avx) +#endif + +static redccore__functype *pick_redccore(void) +{ +#if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif +#if CPUFAM_AMD64 + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif + DISPATCH_PICK_FALLBACK(mpmont_reduce, simple_redccore); +} + /* --- @redccore@ --- * * * Arguments: @mpw *dv, *dvl@ = base and limit of source/destination @@ -85,10 +130,17 @@ static void redccore(mpw *dv, mpw *dvl, const mpw *mv, * Store in %$d$% the value %$a b + (m' a b \bmod R) m$%. */ -static void mulcore(mpw *dv, mpw *dvl, - const mpw *av, const mpw *avl, - const mpw *bv, const mpw *bvl, - const mpw *mv, size_t n, const mpw *mi) +CPU_DISPATCH(static, (void), void, mulcore, + (mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, const mpw *mv, + size_t n, const mpw *mi), + (dv, dvl, av, avl, bv, bvl, mv, n, mi), + pick_mulcore, simple_mulcore); + +static void simple_mulcore(mpw *dv, mpw *dvl, + const mpw *av, const mpw *avl, + const mpw *bv, const mpw *bvl, + const mpw *mv, size_t n, const mpw *mi) { mpw ai, b0, y, mi0 = *mi; const mpw *tv, *tvl; @@ -123,9 +175,56 @@ static void mulcore(mpw *dv, mpw *dvl, } } +#define MAYBE_MUL4(impl) \ + extern void mpxmont_mul4_##impl(mpw *dv, \ + const mpw *av, const mpw *bv, \ + const mpw *mv, \ + size_t n, const mpw *mi); \ + static void maybe_mul4_##impl(mpw *dv, mpw *dvl, \ + const mpw *av, const mpw *avl, \ + const mpw *bv, const mpw *bvl, \ + const mpw *mv, size_t n, const mpw *mi) \ + { \ + size_t an = avl - av, bn = bvl - bv; \ + if (n%4 || an != n || bn != n) \ + simple_mulcore(dv, dvl, av, avl, bv, bvl, mv, n, mi); \ + else { \ + mpxmont_mul4_##impl(dv, av, bv, mv, n, mi); \ + MPX_ZERO(dv + 2*n + 1, dvl); \ + } \ + } + +#if CPUFAM_X86 + MAYBE_MUL4(x86_sse2) + MAYBE_MUL4(x86_avx) +#endif + +#if CPUFAM_AMD64 + MAYBE_MUL4(amd64_sse2) + MAYBE_MUL4(amd64_avx) +#endif + +static mulcore__functype *pick_mulcore(void) +{ +#if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif +#if CPUFAM_AMD64 + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif + DISPATCH_PICK_FALLBACK(mpmont_mul, simple_mulcore); +} + /* --- @finish@ --- * * - * Arguments: @mpmont *mm@ = pointer to a Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to a Montgomery reduction + * context * *mp *d@ = pointer to mostly-reduced operand * * Returns: --- @@ -137,7 +236,7 @@ static void mulcore(mpw *dv, mpw *dvl, * need to do an additional subtraction if %$d$% is negative. */ -static void finish(mpmont *mm, mp *d) +static void finish(const mpmont *mm, mp *d) { mpw *dv = d->v, *dvl = d->vl; size_t n = mm->n; @@ -209,6 +308,7 @@ int mpmont_create(mpmont *mm, mp *m) mp_build(&r, r2->v + n, r2->vl); mm->mi = mp_modinv(MP_NEW, m, &r); mm->mi = mp_sub(mm->mi, &r, mm->mi); + MP_ENSURE(mm->mi, n); /* --- Discover the values %$R \bmod m$% and %$R^2 \bmod m$% --- */ @@ -241,7 +341,7 @@ void mpmont_destroy(mpmont *mm) /* --- @mpmont_reduce@ --- * * - * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to Montgomery reduction context * @mp *d@ = destination * @mp *a@ = source, assumed positive * @@ -250,7 +350,7 @@ void mpmont_destroy(mpmont *mm) #ifdef MPMONT_DISABLE -mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) +mp *mpmont_reduce(const mpmont *mm, mp *d, mp *a) { mp_div(0, &d, a, mm->m); return (d); @@ -258,13 +358,13 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) #else -mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) +mp *mpmont_reduce(const mpmont *mm, mp *d, mp *a) { size_t n = mm->n; /* --- Check for serious Karatsuba reduction --- */ - if (n > MPK_THRESH * 3) { + if (n > MPMONT_KTHRESH) { mp al; mpw *vl; mp *u; @@ -300,7 +400,7 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) /* --- @mpmont_mul@ --- * * - * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to Montgomery reduction context * @mp *d@ = destination * @mp *a, *b@ = sources, assumed positive * @@ -309,7 +409,7 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) #ifdef MPMONT_DISABLE -mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) +mp *mpmont_mul(const mpmont *mm, mp *d, mp *a, mp *b) { d = mp_mul(d, a, b); mp_div(0, &d, d, mm->m); @@ -318,15 +418,16 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) #else -mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) +mp *mpmont_mul(const mpmont *mm, mp *d, mp *a, mp *b) { - if (mm->n > MPK_THRESH * 3) { + size_t n = mm->n; + + if (n > MPMONT_KTHRESH) { d = mp_mul(d, a, b); d = mpmont_reduce(mm, d, d); } else { - a = MP_COPY(a); - b = MP_COPY(b); - MP_DEST(d, 2*mm->n + 1, a->f | b->f | MP_UNDEF); + a = MP_COPY(a); b = MP_COPY(b); + MP_DEST(d, 2*n + 1, a->f | b->f | MP_UNDEF); mulcore(d->v, d->vl, a->v, a->vl, b->v, b->vl, mm->m->v, mm->n, mm->mi->v); d->f = ((a->f | b->f) & MP_BURN) | ((a->f ^ b->f) & MP_NEG); @@ -343,6 +444,10 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) #ifdef TEST_RIG +#ifdef ENABLE_ASM_DEBUG +# include "regdump.h" +#endif + static int tcreate(dstr *v) { mp *m = *(mp **)v[0].buf; @@ -438,7 +543,6 @@ static int tmul(dstr *v) mp_drop(mr); } - MP_DROP(m); MP_DROP(a); MP_DROP(b); @@ -457,6 +561,9 @@ static test_chunk tests[] = { int main(int argc, char *argv[]) { sub_init(); +#ifdef ENABLE_ASM_DEBUG + regdump_init(); +#endif test_run(argc, argv, tests, SRCDIR "/t/mpmont"); return (0); }