X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/blobdiff_plain/7765e926307a21c737a6f8ca64ba3ddc44175de5..4739c68a6cb81cae53cd7df694ecd6176d6f11f8:/exp.h

diff --git a/exp.h b/exp.h
index 6cfdfd8..a8b3812 100644
--- a/exp.h
+++ b/exp.h
@@ -1,6 +1,6 @@
 /* -*-c-*-
  *
- * $Id: exp.h,v 1.1 2001/06/16 13:00:59 mdw Exp $
+ * $Id: exp.h,v 1.4 2004/04/08 01:36:15 mdw Exp $
  *
  * Generalized exponentiation
  *
@@ -27,15 +27,6 @@
  * MA 02111-1307, USA.
  */
 
-/*----- Revision history --------------------------------------------------* 
- *
- * $Log: exp.h,v $
- * Revision 1.1  2001/06/16 13:00:59  mdw
- * New generic exponentation code.  Includes sliding-window simultaneous
- * exponentiation.
- *
- */
-
 #ifdef CATACOMB_EXP_H
 #  error "Multiple inclusion of <catacomb/exp.h>"
 #endif
@@ -78,10 +69,19 @@ typedef struct exp_simul {
 #  define EXP_WINSZ 4			/* Predefine if you need to */
 #endif
 
-/* --- These are determined from the window size --- */
+/* --- These are determined from the window size --- *
+ *
+ * Given a %$k$%-bit exponent, I expect to do %$k/2$% multiplies if I use the
+ * simple way.  If I use an n-bit sliding window, then I do %$2^n$%
+ * multiplies up front, but I only do %$(2^n - 1)/2^n k/n$% multiplies for
+ * the exponentiation.  This is a win when
+ *
+ *   %$k \ge \frac{n 2^{n+1}}{n - 2}$%
+ */
 
 #define EXP_TABSZ (1 << EXP_WINSZ)
-#define EXP_THRESH (((MPW_BITS / EXP_WINSZ) << 2) + 1)
+#define EXP_THRESH							\
+    ((EXP_WINSZ * (2 << EXP_WINSZ))/((EXP_WINSZ - 2) * MPW_BITS))
 
 /* --- Required operations --- *
  *
@@ -99,6 +99,10 @@ typedef struct exp_simul {
  * @EXP_MUL(a, x)@		Multiplies @a@ by @x@ (writing the result
  *				back to @a@).
  *
+ * @EXP_FIX(x)@			Makes @x@ be a canonical representation of
+ *				its value.  All multiplications have the
+ *				right argument canonical.
+ *
  * @EXP_SQR(a)@			Multiplies @a@ by itself.
  *
  * @EXP_SETMUL(d, x, y)@	Sets @d@ to be the product of @x@ and @y@.
@@ -140,6 +144,7 @@ typedef struct exp_simul {
 									\
   /* --- Do the main body of the work --- */				\
 									\
+  EXP_FIX(g);								\
   for (;;) {								\
     EXP_MUL(a, g);							\
     sq = 0;								\
@@ -184,11 +189,15 @@ exp_simple_exit:;							\
 									\
   /* --- Do the precomputation --- */					\
 									\
+  EXP_FIX(g);								\
   EXP_SETSQR(g2, g);							\
+  EXP_FIX(g2);								\
   v = xmalloc(EXP_TABSZ * sizeof(EXP_TYPE));				\
   EXP_COPY(v[0], g);							\
-  for (i = 1; i < EXP_TABSZ; i++)					\
+  for (i = 1; i < EXP_TABSZ; i++) {					\
     EXP_SETMUL(v[i], v[i - 1], g2);					\
+    EXP_FIX(v[i]);							\
+  }									\
   EXP_DROP(g2);								\
 									\
   /* --- Skip top-end zero bits --- *					\
@@ -286,17 +295,21 @@ exp_window_exit:;							\
   j = 1;								\
   for (i = 0; i < n; i++) {						\
     EXP_COPY(v[j], f[n - 1 - i].base);					\
+    EXP_FIX(v[j]);							\
     j <<= 1;								\
   }									\
   k = n * EXP_WINSZ;							\
   jj = 1;								\
   for (; i < k; i++) {							\
     EXP_SETSQR(v[j], v[jj]);						\
+    EXP_FIX(v[j]);							\
     j <<= 1; jj <<= 1;							\
   }									\
   for (i = 1; i < vn; i <<= 1) {					\
-    for (j = 1; j < i; j++)						\
+    for (j = 1; j < i; j++) {						\
       EXP_SETMUL(v[j + i], v[j], v[i]);					\
+      EXP_FIX(v[j + i]);						\
+    }									\
   }									\
 									\
   /* --- Set up the bitscanners --- *					\
@@ -381,7 +394,7 @@ exp_window_exit:;							\
 									\
 exp_simul_done:								\
   while (sq--) EXP_SQR(a);						\
-  for (i = 1; i < vn; i++)					\
+  for (i = 1; i < vn; i++)						\
     EXP_DROP(v[i]);							\
   xfree(v);								\
 } while (0)