(x86 asm): Zero the high parts of the ?MM registers if available.
[catacomb] / math / mpmont.c
index f8a2611..094ac40 100644 (file)
@@ -90,19 +90,25 @@ static void simple_redccore(mpw *dv, mpw *dvl, const mpw *mv,
 
 #if CPUFAM_X86
   MAYBE_REDC4(x86_sse2)
+  MAYBE_REDC4(x86_avx)
 #endif
 
 #if CPUFAM_AMD64
   MAYBE_REDC4(amd64_sse2)
+  MAYBE_REDC4(amd64_avx)
 #endif
 
 static redccore__functype *pick_redccore(void)
 {
 #if CPUFAM_X86
+  DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_avx,
+                    cpu_feature_p(CPUFEAT_X86_AVX));
   DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_x86_sse2,
                     cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif
 #if CPUFAM_AMD64
+  DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_avx,
+                    cpu_feature_p(CPUFEAT_X86_AVX));
   DISPATCH_PICK_COND(mpmont_reduce, maybe_redc4_amd64_sse2,
                     cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif
@@ -190,19 +196,25 @@ static void simple_mulcore(mpw *dv, mpw *dvl,
 
 #if CPUFAM_X86
   MAYBE_MUL4(x86_sse2)
+  MAYBE_MUL4(x86_avx)
 #endif
 
 #if CPUFAM_AMD64
   MAYBE_MUL4(amd64_sse2)
+  MAYBE_MUL4(amd64_avx)
 #endif
 
 static mulcore__functype *pick_mulcore(void)
 {
 #if CPUFAM_X86
+  DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_avx,
+                    cpu_feature_p(CPUFEAT_X86_AVX));
   DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_x86_sse2,
                     cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif
 #if CPUFAM_AMD64
+  DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_avx,
+                    cpu_feature_p(CPUFEAT_X86_AVX));
   DISPATCH_PICK_COND(mpmont_mul, maybe_mul4_amd64_sse2,
                     cpu_feature_p(CPUFEAT_X86_SSE2));
 #endif