/*----- Header files ------------------------------------------------------*/
+#include "config.h"
+
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <mLib/bits.h>
#include <mLib/macros.h>
+#include "dispatch.h"
#include "mptypes.h"
#include "mpx.h"
#include "bitops.h"
* vectors in any way.
*/
-void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
- const mpw *bv, const mpw *bvl)
+CPU_DISPATCH(EMPTY, (void), void, mpx_umul,
+ (mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
+ const mpw *bv, const mpw *bvl),
+ (dv, dvl, av, avl, bv, bvl), pick_umul, simple_umul);
+
+static void simple_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
+ const mpw *bv, const mpw *bvl)
{
/* --- This is probably worthwhile on a multiply --- */
}
}
+#define MAYBE_UMUL4(impl) \
+ extern void mpx_umul4_##impl(mpw */*dv*/, \
+ const mpw */*av*/, const mpw */*avl*/, \
+ const mpw */*bv*/, const mpw */*bvl*/); \
+ static void maybe_umul4_##impl(mpw *dv, mpw *dvl, \
+ const mpw *av, const mpw *avl, \
+ const mpw *bv, const mpw *bvl) \
+ { \
+ size_t an = avl - av, bn = bvl - bv, dn = dvl - dv; \
+ if (!an || an%4 != 0 || !bn || bn%4 != 0 || dn < an + bn) \
+ simple_umul(dv, dvl, av, avl, bv, bvl); \
+ else { \
+ mpx_umul4_##impl(dv, av, avl, bv, bvl); \
+ MPX_ZERO(dv + an + bn, dvl); \
+ } \
+ }
+
+#if CPUFAM_X86
+ MAYBE_UMUL4(x86_sse2)
+#endif
+
+static mpx_umul__functype *pick_umul(void)
+{
+#if CPUFAM_X86
+ DISPATCH_PICK_COND(mpx_umul, maybe_umul4_x86_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+#endif
+ DISPATCH_PICK_FALLBACK(mpx_umul, simple_umul);
+}
+
/* --- @mpx_umuln@ --- *
*
* Arguments: @mpw *dv, *dvl@ = destination vector base and limit
size_t _sz = (sz); \
mpw *_vv = xmalloc(MPWS(_sz)); \
mpw *_vvl = _vv + _sz; \
+ memset(_vv, 0xa5, MPWS(_sz)); \
(v) = _vv; \
(vl) = _vvl; \
} while (0)