From: Mark Wooding <mdw@distorted.org.uk>
Date: Mon, 1 May 2017 00:38:30 +0000 (+0100)
Subject: math/scmul.h, pub/ed25519.c: Abstract out scalar multiplication code.
X-Git-Tag: 2.4.0~3
X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/581ac808d0e8ee74c2c97bd41721d572bca90ed6?ds=sidebyside

math/scmul.h, pub/ed25519.c: Abstract out scalar multiplication code.

Because what it needed was to be embedded in a hairy macro.
---

diff --git a/math/Makefile.am b/math/Makefile.am
index 1eb0534b..59bcaea0 100644
--- a/math/Makefile.am
+++ b/math/Makefile.am
@@ -448,5 +448,6 @@ pkginclude_HEADERS	+= scaf.h
 libmath_la_SOURCES	+= scaf.c
 
 pkginclude_HEADERS	+= montladder.h
+pkginclude_HEADERS	+= scmul.h
 
 ###----- That's all, folks --------------------------------------------------
diff --git a/math/scmul.h b/math/scmul.h
new file mode 100644
index 00000000..caf096ee
--- /dev/null
+++ b/math/scmul.h
@@ -0,0 +1,189 @@
+/* -*-c-*-
+ *
+ * Scalar multiplication on elliptic curves
+ *
+ * (c) 2017 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+#ifndef CATACOMB_SCMUL_H
+#define CATACOMB_SCMUL_H
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/*----- Macros provided ---------------------------------------------------*/
+
+#define SCMUL_WINLIM(winwd) (1 << (winwd))
+#define SCMUL_WINMASK(winwd) (SCMUL_WINLIM(winwd) - 1)
+#define SCMUL_TABSZ(winwd) (SCMUL_WINLIM(winwd)/2 + 1)
+
+#define DEFINE_SCMUL(mulfn, f, winwd, scafwd, npiece, addfn, dblfn)	\
+void mulfn(f *X, f *Y, f *Z, const scaf_piece n[npiece],		\
+	   const f *X0, const f *Y0, const f *Z0)			\
+{									\
+  f VX[SCMUL_TABSZ(winwd)],						\
+    VY[SCMUL_TABSZ(winwd)],						\
+    VZ[SCMUL_TABSZ(winwd)];						\
+  f TX, TY, TZ, UX, UY, UZ;						\
+  unsigned i, j, k, w;							\
+  uint32 m_neg;								\
+  scaf_piece ni;							\
+									\
+  /* Build a table of small multiples. */				\
+  f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1);		\
+  VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0;				\
+  ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]);		\
+  for (i = 3; i < SCMUL_TABSZ(winwd); i += 2) {				\
+    addfn(&VX[i], &VY[i], &VZ[i],					\
+	  &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0);		\
+    dblfn(&VX[i + 1], &VY[i + 1], &VZ[i + 1],				\
+	  &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]);		\
+  }									\
+									\
+  /* Now do the multiplication.  We lag a window behind the cursor	\
+   * position because of the scalar recoding we do.			\
+   */									\
+  f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1);			\
+  for (i = (npiece), w = 0, m_neg = 0; i--; ) {				\
+    ni = n[i];								\
+									\
+    /* Work through each window in the scalar piece. */			\
+    for (j = 0; j < (scafwd); j += (winwd)) {				\
+									\
+      /* Shift along by a window. */					\
+      for (k = 0; k < (winwd); k++)					\
+	dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);				\
+									\
+      /* Peek at the next window of four bits.  If the top bit is set	\
+       * we lend a bit leftwards, into w.  It's too late for this to	\
+       * affect the sign now, but if we negated earlier then the	\
+       * addition would be wrong.					\
+       */								\
+      w += (ni >> ((scafwd) - 1))&0x1u;					\
+      w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg);		\
+									\
+      /* Collect the entry from the table, and add or subtract. */	\
+      f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w);			\
+      f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w);			\
+      f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w);			\
+      f##_condneg(&UX, &UX, m_neg);					\
+      addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);		\
+									\
+      /* Move the next window into the delay slot.  If its top bit is	\
+       * set, then negate it and set m_neg.				\
+       */								\
+      w = (ni >> ((scafwd) - (winwd)))&SCMUL_WINMASK(winwd);		\
+      m_neg = -(uint32)((w >> ((winwd) - 1))&0x1u);			\
+      ni <<= (winwd);							\
+    }									\
+  }									\
+									\
+  /* Do the final window.  Just fix the sign and go. */			\
+  for (k = 0; k < (winwd); k++)						\
+    dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);				\
+  w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg);			\
+  f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w);				\
+  f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w);				\
+  f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w);				\
+  f##_condneg(&UX, &UX, m_neg);						\
+  addfn(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ);				\
+}
+
+#define SCSIMMUL_WINLIM(winwd) (1 << (winwd))
+#define SCSIMMUL_WINMASK(winwd) (SCSIMMUL_WINLIM(winwd) - 1)
+#define SCSIMMUL_TABSZ(winwd) (1 << 2*(winwd))
+
+#define DEFINE_SCSIMMUL(simmulfn, f, winwd,				\
+			scafwd, npiece, addfn, dblfn)			\
+void simmulfn(f *X, f *Y, f *Z,						\
+	      const scaf_piece n0[npiece],				\
+	      const f *X0, const f *Y0, const f *Z0,			\
+	      const scaf_piece n1[npiece],				\
+	      const f *X1, const f *Y1, const f *Z1)			\
+{									\
+  f VX[SCSIMMUL_TABSZ(winwd)],						\
+    VY[SCSIMMUL_TABSZ(winwd)],						\
+    VZ[SCSIMMUL_TABSZ(winwd)];						\
+  f TX, TY, TZ, UX, UY, UZ;						\
+  unsigned i, j, k, w, ni0, ni1;					\
+									\
+  /* Build a table of small linear combinations. */			\
+  f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1);		\
+  VX[1] = *X0; VX[SCSIMMUL_WINLIM(winwd)] = *X1;			\
+  VY[1] = *Y0; VY[SCSIMMUL_WINLIM(winwd)] = *Y1;			\
+  VZ[1] = *Z0; VZ[SCSIMMUL_WINLIM(winwd)] = *Z1;			\
+  for (i = 2; i < SCSIMMUL_WINLIM(winwd); i <<= 1) {			\
+    dblfn(&VX[i], &VY[i], &VZ[i],					\
+	  &VX[i/2], &VY[i/2], &VZ[i/2]);				\
+    dblfn(&VX[i*SCSIMMUL_WINLIM(winwd)],				\
+	  &VY[i*SCSIMMUL_WINLIM(winwd)],				\
+	  &VZ[i*SCSIMMUL_WINLIM(winwd)],				\
+	  &VX[i*SCSIMMUL_WINLIM(winwd)/2],				\
+	  &VY[i*SCSIMMUL_WINLIM(winwd)/2],				\
+	  &VZ[i*SCSIMMUL_WINLIM(winwd)/2]);				\
+  }									\
+  for (i = 2; i < SCSIMMUL_TABSZ(winwd); i <<= 1) {			\
+    for (j = 1; j < i; j++)						\
+      addfn(&VX[i + j], &VY[i + j], &VZ[i + j],				\
+	    &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]);		\
+  }									\
+									\
+  /* Do the multiplication. */						\
+  f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1);			\
+  for (i = (npiece); i--; ) {						\
+    ni0 = n0[i]; ni1 = n1[i];						\
+									\
+    /* Work through each window in the scalar pieces. */		\
+    for (j = 0; j < (scafwd); j += (winwd)) {				\
+									\
+      /* Shift along by a window. */					\
+      for (k = 0; k < (winwd); k++)					\
+	dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ);				\
+									\
+      /* Collect the next window from the scalars. */			\
+      w = (((ni0 >> ((scafwd) - (winwd)))&				\
+	    SCSIMMUL_WINMASK(winwd)) |					\
+	   ((ni1 >> ((scafwd) - 2*(winwd)))&				\
+	    (SCSIMMUL_WINMASK(winwd) << (winwd))));			\
+      ni0 <<= (winwd); ni1 <<= (winwd);					\
+									\
+      /* Collect the entry from the table, and add. */			\
+      f##_pickn(&UX, VX, SCSIMMUL_TABSZ(winwd), w);			\
+      f##_pickn(&UY, VY, SCSIMMUL_TABSZ(winwd), w);			\
+      f##_pickn(&UZ, VZ, SCSIMMUL_TABSZ(winwd), w);			\
+      addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);		\
+    }									\
+  }									\
+									\
+  /* Done. */								\
+  *X = TX; *Y = TY; *Z = TZ;						\
+}
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif
diff --git a/pub/ed25519.c b/pub/ed25519.c
index 82099f2c..2dc11613 100644
--- a/pub/ed25519.c
+++ b/pub/ed25519.c
@@ -32,6 +32,7 @@
 #include "f25519.h"
 #include "ed25519.h"
 #include "scaf.h"
+#include "scmul.h"
 #include "sha512.h"
 
 /*----- Key fetching ------------------------------------------------------*/
@@ -239,144 +240,8 @@ static void ptdbl(f25519 *X, f25519 *Y, f25519 *Z,
   f25519_mul(Z, &t0, &t1);		/* Z = F J */
 }
 
-static void ptmul(f25519 *X, f25519 *Y, f25519 *Z,
-		  const scaf_piece n[NPIECE],
-		  const f25519 *X0, const f25519 *Y0, const f25519 *Z0)
-{
-  /* We assume that the window width divides the scalar piece width. */
-#define WINWD 4
-#define WINLIM (1 << WINWD)
-#define WINMASK (WINLIM - 1)
-#define TABSZ (WINLIM/2 + 1)
-
-  f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ];
-  f25519 TX, TY, TZ, UX, UY, UZ;
-  unsigned i, j, k, w;
-  uint32 m_neg;
-  scaf_piece ni;
-
-  /* Build a table of small multiples. */
-  f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1);
-  VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0;
-  ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]);
-  for (i = 3; i < TABSZ; i += 2) {
-    ptadd(&VX[i], &VY[i], &VZ[i],
-	  &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0);
-    ptdbl(&VX[i + 1], &VY[i + 1], &VZ[i + 1],
-	  &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]);
-  }
-
-  /* Now do the multiplication.  We lag a window behind the cursor position
-   * because of the scalar recoding we do.
-   */
-  f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1);
-  for (i = NPIECE, w = 0, m_neg = 0; i--; ) {
-    ni = n[i];
-
-    /* Work through each window in the scalar piece. */
-    for (j = 0; j < PIECEWD; j += WINWD) {
-
-      /* Shift along by a window. */
-      for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
-
-      /* Peek at the next window of four bits.  If the top bit is set we lend
-       * a bit leftwards, into w.  It's too late for this to affect the sign
-       * now, but if we negated earlier then the addition would be wrong.
-       */
-      w += (ni >> (PIECEWD - 1))&0x1u;
-      w = ((WINLIM - w)&m_neg) | (w&~m_neg);
-
-      /* Collect the entry from the table, and add or subtract. */
-      f25519_pickn(&UX, VX, TABSZ, w);
-      f25519_pickn(&UY, VY, TABSZ, w);
-      f25519_pickn(&UZ, VZ, TABSZ, w);
-      f25519_condneg(&UX, &UX, m_neg);
-      ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);
-
-      /* Move the next window into the delay slot.  If its top bit is set,
-       * then negate it and set m_neg.
-       */
-      w = (ni >> (PIECEWD - WINWD))&WINMASK;
-      m_neg = -(uint32)((w >> (WINWD - 1))&0x1u);
-      ni <<= WINWD;
-    }
-  }
-
-  /* Do the final window.  Just fix the sign and go. */
-  for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
-  w = ((WINLIM - w)&m_neg) | (w&~m_neg);
-  f25519_pickn(&UX, VX, TABSZ, w);
-  f25519_pickn(&UY, VY, TABSZ, w);
-  f25519_pickn(&UZ, VZ, TABSZ, w);
-  f25519_condneg(&UX, &UX, m_neg);
-  ptadd(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ);
-
-#undef WINWD
-#undef WINLIM
-#undef WINMASK
-#undef TABSZ
-}
-
-static void ptsimmul(f25519 *X, f25519 *Y, f25519 *Z,
-		     const scaf_piece n0[NPIECE],
-		     const f25519 *X0, const f25519 *Y0, const f25519 *Z0,
-		     const scaf_piece n1[NPIECE],
-		     const f25519 *X1, const f25519 *Y1, const f25519 *Z1)
-{
-  /* We assume that the window width divides the scalar piece width. */
-#define WINWD 2
-#define WINLIM (1 << WINWD)
-#define WINMASK (WINLIM - 1)
-#define TABSZ (1 << 2*WINWD)
-
-  f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ];
-  f25519 TX, TY, TZ, UX, UY, UZ;
-  unsigned i, j, k, w, ni0, ni1;
-
-  /* Build a table of small linear combinations. */
-  f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1);
-  VX[1] = *X0; VX[WINLIM] = *X1;
-  VY[1] = *Y0; VY[WINLIM] = *Y1;
-  VZ[1] = *Z0; VZ[WINLIM] = *Z1;
-  for (i = 2; i < WINLIM; i <<= 1) {
-    ptdbl(&VX[i], &VY[i], &VZ[i],
-	  &VX[i/2], &VY[i/2], &VZ[i/2]);
-    ptdbl(&VX[i*WINLIM], &VY[i*WINLIM], &VZ[i*WINLIM],
-	  &VX[i*WINLIM/2], &VY[i*WINLIM/2], &VZ[i*WINLIM/2]);
-  }
-  for (i = 2; i < TABSZ; i <<= 1) {
-    for (j = 1; j < i; j++)
-      ptadd(&VX[i + j], &VY[i + j], &VZ[i + j],
-	    &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]);
-  }
-
-  /* Do the multiplication. */
-  f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1);
-  for (i = NPIECE; i--; ) {
-    ni0 = n0[i]; ni1 = n1[i];
-
-    /* Work through each window in the scalar pieces. */
-    for (j = 0; j < PIECEWD; j += WINWD) {
-
-      /* Shift along by a window. */
-      for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
-
-      /* Collect the next window from the scalars. */
-      w = ((ni0 >> (PIECEWD - WINWD))&WINMASK) |
-	((ni1 >> (PIECEWD - 2*WINWD))&(WINMASK << WINWD));
-      ni0 <<= WINWD; ni1 <<= WINWD;
-
-      /* Collect the entry from the table, and add. */
-      f25519_pickn(&UX, VX, TABSZ, w);
-      f25519_pickn(&UY, VY, TABSZ, w);
-      f25519_pickn(&UZ, VZ, TABSZ, w);
-      ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);
-    }
-  }
-
-  /* Done. */
-  *X = TX; *Y = TY; *Z = TZ;
-}
+static DEFINE_SCMUL(ptmul, f25519, 4, PIECEWD, NPIECE, ptadd, ptdbl)
+static DEFINE_SCSIMMUL(ptsimmul, f25519, 2, PIECEWD, NPIECE, ptadd, ptdbl)
 
 /*----- Key derivation utilities ------------------------------------------*/