X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/609affae0305784d87f2357488fba35699a04098..a90d420cbe87490c844ae422c966e746d3134b07:/math/mpx.c diff --git a/math/mpx.c b/math/mpx.c index 18baf2f2..07a6c20f 100644 --- a/math/mpx.c +++ b/math/mpx.c @@ -545,15 +545,21 @@ MPX_SHIFTOP(lsr, { size_t nr = MPW_BITS - nb; mpw w; - av += nw; - w = av < avl ? *av++ : 0; - while (av < avl) { - mpw t; - if (dv >= dvl) goto done; - t = *av++; - *dv++ = MPW((w >> nb) | (t << nr)); - w = t; + if (nw >= avl - av) + w = 0; + else { + av += nw; + w = *av++; + + while (av < avl) { + mpw t; + if (dv >= dvl) goto done; + t = *av++; + *dv++ = MPW((w >> nb) | (t << nr)); + w = t; + } } + if (dv < dvl) { *dv++ = MPW(w >> nb); MPX_ZERO(dv, dvl); @@ -812,7 +818,7 @@ void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } -/* --- @mpx_uaddnlsl@ --- * +/* --- @mpx_usubnlsl@ --- * * * Arguments: @mpw *dv, *dvl@ = destination and first argument vector * @mpw a@ = second argument @@ -917,19 +923,25 @@ static void simple_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, #if CPUFAM_X86 MAYBE_UMUL4(x86_sse2) + MAYBE_UMUL4(x86_avx) #endif #if CPUFAM_AMD64 MAYBE_UMUL4(amd64_sse2) + MAYBE_UMUL4(amd64_avx) #endif static mpx_umul__functype *pick_umul(void) { #if CPUFAM_X86 + DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_sse2, cpu_feature_p(CPUFEAT_X86_SSE2)); #endif #if CPUFAM_AMD64 + DISPATCH_PICK_COND(mpx_umul, maybe_umul4_amd64_avx, + cpu_feature_p(CPUFEAT_X86_AVX)); DISPATCH_PICK_COND(mpx_umul, maybe_umul4_amd64_sse2, cpu_feature_p(CPUFEAT_X86_SSE2)); #endif @@ -1262,6 +1274,7 @@ mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) #include #include +#include #include #include @@ -1354,7 +1367,7 @@ static int loadstore(dstr *v) ok = 0; MPX_OCTETS(oct, m, ml); mpx_storel(m, ml, d.buf, d.sz); - if (memcmp(d.buf, v->buf, oct) != 0) { + if (MEMCMP(d.buf, !=, v->buf, oct)) { dumpbits("\n*** storel failed", d.buf, d.sz); ok = 0; } @@ -1364,7 +1377,7 @@ static int loadstore(dstr *v) ok = 0; MPX_OCTETS(oct, m, ml); mpx_storeb(m, ml, d.buf, d.sz); - if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) { + if (MEMCMP(d.buf + d.sz - oct, !=, v->buf + v->len - oct, oct)) { dumpbits("\n*** storeb failed", d.buf, d.sz); ok = 0; } @@ -1398,14 +1411,14 @@ static int twocl(dstr *v) mpx_loadl(m, ml0, v[0].buf, v[0].len); mpx_storel2cn(m, ml0, d.buf, v[1].len); - if (memcmp(d.buf, v[1].buf, v[1].len)) { + if (MEMCMP(d.buf, !=, v[1].buf, v[1].len)) { dumpbits("\n*** storel2cn failed", d.buf, v[1].len); ok = 0; } mpx_loadl2cn(m, ml1, v[1].buf, v[1].len); mpx_storel(m, ml1, d.buf, v[0].len); - if (memcmp(d.buf, v[0].buf, v[0].len)) { + if (MEMCMP(d.buf, !=, v[0].buf, v[0].len)) { dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); ok = 0; } @@ -1442,14 +1455,14 @@ static int twocb(dstr *v) mpx_loadb(m, ml0, v[0].buf, v[0].len); mpx_storeb2cn(m, ml0, d.buf, v[1].len); - if (memcmp(d.buf, v[1].buf, v[1].len)) { + if (MEMCMP(d.buf, !=, v[1].buf, v[1].len)) { dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); ok = 0; } mpx_loadb2cn(m, ml1, v[1].buf, v[1].len); mpx_storeb(m, ml1, d.buf, v[0].len); - if (memcmp(d.buf, v[0].buf, v[0].len)) { + if (MEMCMP(d.buf, !=, v[0].buf, v[0].len)) { dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); ok = 0; }