X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/444083aef7e70ce9afe893a36d72e1a1a976f1ed..a1a9ee0a7240087e202a7855e470573de0e59c09:/math/mpmont.c diff --git a/math/mpmont.c b/math/mpmont.c index 968766d3..2ed1e113 100644 --- a/math/mpmont.c +++ b/math/mpmont.c @@ -42,6 +42,8 @@ /* #define MPMONT_DISABLE */ +#define MPMONT_KTHRESH (16*MPK_THRESH) + /*----- Low-level implementation ------------------------------------------*/ #ifndef MPMONT_DISABLE @@ -88,14 +90,28 @@ static void simple_redccore(mpw *dv, mpw *dvl, const mpw *mv, #if CPUFAM_X86 MAYBE_REDC4(x86_sse2) + MAYBE_REDC4(x86_avx) +#endif + +#if CPUFAM_AMD64 + MAYBE_REDC4(amd64_sse2) + MAYBE_REDC4(amd64_avx) #endif static redccore__functype *pick_redccore(void) { #if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_sse2, cpu_feature_p(CPUFEAT_X86_SSE2)); #endif +#if CPUFAM_AMD64 + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif DISPATCH_PICK_FALLBACK(mpmont_reduce, simple_redccore); } @@ -180,20 +196,35 @@ static void simple_mulcore(mpw *dv, mpw *dvl, #if CPUFAM_X86 MAYBE_MUL4(x86_sse2) + MAYBE_MUL4(x86_avx) +#endif + +#if CPUFAM_AMD64 + MAYBE_MUL4(amd64_sse2) + MAYBE_MUL4(amd64_avx) #endif static mulcore__functype *pick_mulcore(void) { #if CPUFAM_X86 + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_sse2, cpu_feature_p(CPUFEAT_X86_SSE2)); #endif +#if CPUFAM_AMD64 + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); + DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_sse2, + cpu_feature_p(CPUFEAT_X86_SSE2)); +#endif DISPATCH_PICK_FALLBACK(mpmont_mul, simple_mulcore); } /* --- @finish@ --- * * - * Arguments: @mpmont *mm@ = pointer to a Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to a Montgomery reduction + * context * *mp *d@ = pointer to mostly-reduced operand * * Returns: --- @@ -205,7 +236,7 @@ static mulcore__functype *pick_mulcore(void) * need to do an additional subtraction if %$d$% is negative. */ -static void finish(mpmont *mm, mp *d) +static void finish(const mpmont *mm, mp *d) { mpw *dv = d->v, *dvl = d->vl; size_t n = mm->n; @@ -310,7 +341,7 @@ void mpmont_destroy(mpmont *mm) /* --- @mpmont_reduce@ --- * * - * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to Montgomery reduction context * @mp *d@ = destination * @mp *a@ = source, assumed positive * @@ -319,7 +350,7 @@ void mpmont_destroy(mpmont *mm) #ifdef MPMONT_DISABLE -mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) +mp *mpmont_reduce(const mpmont *mm, mp *d, mp *a) { mp_div(0, &d, a, mm->m); return (d); @@ -327,13 +358,13 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) #else -mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) +mp *mpmont_reduce(const mpmont *mm, mp *d, mp *a) { size_t n = mm->n; /* --- Check for serious Karatsuba reduction --- */ - if (n > MPK_THRESH * 3) { + if (n > MPMONT_KTHRESH) { mp al; mpw *vl; mp *u; @@ -369,7 +400,7 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) /* --- @mpmont_mul@ --- * * - * Arguments: @mpmont *mm@ = pointer to Montgomery reduction context + * Arguments: @const mpmont *mm@ = pointer to Montgomery reduction context * @mp *d@ = destination * @mp *a, *b@ = sources, assumed positive * @@ -378,7 +409,7 @@ mp *mpmont_reduce(mpmont *mm, mp *d, mp *a) #ifdef MPMONT_DISABLE -mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) +mp *mpmont_mul(const mpmont *mm, mp *d, mp *a, mp *b) { d = mp_mul(d, a, b); mp_div(0, &d, d, mm->m); @@ -387,11 +418,11 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) #else -mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) +mp *mpmont_mul(const mpmont *mm, mp *d, mp *a, mp *b) { size_t n = mm->n; - if (n > MPK_THRESH * 3) { + if (n > MPMONT_KTHRESH) { d = mp_mul(d, a, b); d = mpmont_reduce(mm, d, d); } else { @@ -413,6 +444,10 @@ mp *mpmont_mul(mpmont *mm, mp *d, mp *a, mp *b) #ifdef TEST_RIG +#ifdef ENABLE_ASM_DEBUG +# include "regdump.h" +#endif + static int tcreate(dstr *v) { mp *m = *(mp **)v[0].buf; @@ -508,7 +543,6 @@ static int tmul(dstr *v) mp_drop(mr); } - MP_DROP(m); MP_DROP(a); MP_DROP(b); @@ -527,6 +561,9 @@ static test_chunk tests[] = { int main(int argc, char *argv[]) { sub_init(); +#ifdef ENABLE_ASM_DEBUG + regdump_init(); +#endif test_run(argc, argv, tests, SRCDIR "/t/mpmont"); return (0); }