+#define MAYBE_MUL4(impl) \
+ extern void mpxmont_mul4_##impl(mpw *dv, \
+ const mpw *av, const mpw *bv, \
+ const mpw *mv, \
+ size_t n, const mpw *mi); \
+ static void maybe_mul4_##impl(mpw *dv, mpw *dvl, \
+ const mpw *av, const mpw *avl, \
+ const mpw *bv, const mpw *bvl, \
+ const mpw *mv, size_t n, const mpw *mi) \
+ { \
+ size_t an = avl - av, bn = bvl - bv; \
+ if (n%4 || an != n || bn != n) \
+ simple_mulcore(dv, dvl, av, avl, bv, bvl, mv, n, mi); \
+ else { \
+ mpxmont_mul4_##impl(dv, av, bv, mv, n, mi); \
+ MPX_ZERO(dv + 2*n + 1, dvl); \
+ } \
+ }
+
+#if CPUFAM_X86
+ MAYBE_MUL4(x86_sse2)
+ MAYBE_MUL4(x86_avx)
+#endif
+
+#if CPUFAM_AMD64
+ MAYBE_MUL4(amd64_sse2)
+ MAYBE_MUL4(amd64_avx)
+#endif
+
+static mulcore__functype *pick_mulcore(void)
+{
+#if CPUFAM_X86
+ DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_avx,
+ cpu_feature_p(CPUFEAT_X86_AVX));
+ DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+#endif
+#if CPUFAM_AMD64
+ DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_avx,
+ cpu_feature_p(CPUFEAT_X86_AVX));
+ DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_sse2,
+ cpu_feature_p(CPUFEAT_X86_SSE2));
+#endif
+ DISPATCH_PICK_FALLBACK(mpmont_mul, simple_mulcore);
+}
+