Moved the Karatsuba macros into a separate file for better sharing.
authormdw <mdw>
Sat, 17 Jun 2000 11:42:54 +0000 (11:42 +0000)
committermdw <mdw>
Sat, 17 Jun 2000 11:42:54 +0000 (11:42 +0000)
Fixed some comments.  Use an improved technique so that all the
operations are squarings.

mpx-ksqr.c

index 3ca145d..226b99a 100644 (file)
@@ -1,6 +1,6 @@
 /* -*-c-*-
  *
- * $Id: mpx-ksqr.c,v 1.2 1999/12/13 15:35:01 mdw Exp $
+ * $Id: mpx-ksqr.c,v 1.3 2000/06/17 11:42:54 mdw Exp $
  *
  * Karatsuba-based squaring algorithm
  *
 /*----- Revision history --------------------------------------------------* 
  *
  * $Log: mpx-ksqr.c,v $
+ * Revision 1.3  2000/06/17 11:42:54  mdw
+ * Moved the Karatsuba macros into a separate file for better sharing.
+ * Fixed some comments.  Use an improved technique so that all the
+ * operations are squarings.
+ *
  * Revision 1.2  1999/12/13 15:35:01  mdw
  * Simplify and improve.
  *
@@ -44,6 +49,7 @@
 #include <stdio.h>
 
 #include "mpx.h"
+#include "mpx-kmac.h"
 
 /*----- Tweakables --------------------------------------------------------*/
 
 #  define KARATSUBA_CUTOFF 2
 #endif
 
-/*----- Addition macros ---------------------------------------------------*/
-
-#define ULSL1(dv, av, avl) do {                                                \
-  mpw *_dv = (dv);                                                     \
-  const mpw *_av = (av), *_avl = (avl);                                        \
-  mpw _c = 0;                                                          \
-                                                                       \
-  while (_av < _avl) {                                                 \
-    mpw _x = *_av++;                                                   \
-    *_dv++ = MPW(_c | (_x << 1));                                      \
-    _c = MPW(_x >> (MPW_BITS - 1));                                    \
-  }                                                                    \
-  *_dv++ = _c;                                                         \
-} while (0)
-
-#define UADD(dv, av, avl) do {                                         \
-  mpw *_dv = (dv);                                                     \
-  const mpw *_av = (av), *_avl = (avl);                                        \
-  mpw _c = 0;                                                          \
-                                                                       \
-  while (_av < _avl) {                                                 \
-    mpw _a, _b;                                                                \
-    mpd _x;                                                            \
-    _a = *_av++;                                                       \
-    _b = *_dv;                                                         \
-    _x = (mpd)_a + (mpd)_b + _c;                                       \
-    *_dv++ = MPW(_x);                                                  \
-    _c = _x >> MPW_BITS;                                               \
-  }                                                                    \
-  while (_c) {                                                         \
-    mpd _x = (mpd)*_dv + (mpd)_c;                                      \
-    *_dv++ = MPW(_x);                                                  \
-    _c = _x >> MPW_BITS;                                               \
-  }                                                                    \
-} while (0)
-
 /*----- Main code ---------------------------------------------------------*/
 
 /* --- @mpx_ksqr@ --- *
@@ -133,10 +103,10 @@ void mpx_ksqr(mpw *dv, mpw *dvl,
 
   /* --- How the algorithm works --- *
    *
-   * Unlike Karatsuba's identity for multiplication which isn't particularly
-   * obvious, the identity for multiplication is known to all schoolchildren.
-   * Let %$A = xb + y$%.  Then %$A^2 = x^2 b^x + 2 x y b + y^2$%.  So now I
-   * have three multiplications, each four times easier, and that's a win.
+   * The identity for squaring is known to all schoolchildren.
+   * Let %$A = xb + y$%.  Then %$A^2 = x^2 b^2 + 2 x y b + y^2$%.  Now,
+   * %$(x + y)^2 - x^2 - y^2 = 2 x y$%, which means I only need to do three
+   * squarings.
    */
 
   /* --- First things --- *
@@ -159,17 +129,11 @@ void mpx_ksqr(mpw *dv, mpw *dvl,
     mpw *tdv = dv + m;
     mpw *rdv = tdv + m;
 
-    /* --- The cross term in the middle needs a multiply --- *
-     *
-     * This isn't actually true, since %$x y = ((x + y)^2 - (x - y)^2)/4%.
-     * But that's two squarings, versus one multiplication.
-     */
-
+    UADD2(sv, svm, av, avm, avm, avl);
     if (m > KARATSUBA_CUTOFF)
-      mpx_kmul(sv, ssv, av, avm, avm, avl, ssv, svl);
+      mpx_ksqr(tdv, rdv + m + 4, sv, svm + 1, ssv, svl);
     else
-      mpx_umul(sv, ssv, av, avm, avm, avl);
-    ULSL1(tdv, sv, svn);
+      mpx_usqr(tdv, rdv + m + 4, sv, svm + 1);
 
     if (m > KARATSUBA_CUTOFF)
       mpx_ksqr(sv, ssv, avm, avl, ssv, svl);
@@ -177,6 +141,7 @@ void mpx_ksqr(mpw *dv, mpw *dvl,
       mpx_usqr(sv, ssv, avm, avl);
     MPX_COPY(rdv + m + 1, dvl, svm + 1, svn);
     UADD(rdv, sv, svm + 1);
+    USUB(tdv, sv, svn);
     
     if (m > KARATSUBA_CUTOFF)
       mpx_ksqr(sv, ssv, av, avm, ssv, svl);
@@ -184,6 +149,7 @@ void mpx_ksqr(mpw *dv, mpw *dvl,
       mpx_usqr(sv, ssv, av, avm);
     MPX_COPY(dv, tdv, sv, svm);
     UADD(tdv, svm, svn);
+    USUB(tdv, sv, svn);
   }
 }