From: Mark Wooding Date: Mon, 1 May 2017 00:38:30 +0000 (+0100) Subject: math/scmul.h, pub/ed25519.c: Abstract out scalar multiplication code. X-Git-Tag: 2.4.0~3 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/581ac808d0e8ee74c2c97bd41721d572bca90ed6?ds=sidebyside math/scmul.h, pub/ed25519.c: Abstract out scalar multiplication code. Because what it needed was to be embedded in a hairy macro. --- diff --git a/math/Makefile.am b/math/Makefile.am index 1eb0534b..59bcaea0 100644 --- a/math/Makefile.am +++ b/math/Makefile.am @@ -448,5 +448,6 @@ pkginclude_HEADERS += scaf.h libmath_la_SOURCES += scaf.c pkginclude_HEADERS += montladder.h +pkginclude_HEADERS += scmul.h ###----- That's all, folks -------------------------------------------------- diff --git a/math/scmul.h b/math/scmul.h new file mode 100644 index 00000000..caf096ee --- /dev/null +++ b/math/scmul.h @@ -0,0 +1,189 @@ +/* -*-c-*- + * + * Scalar multiplication on elliptic curves + * + * (c) 2017 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +#ifndef CATACOMB_SCMUL_H +#define CATACOMB_SCMUL_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Macros provided ---------------------------------------------------*/ + +#define SCMUL_WINLIM(winwd) (1 << (winwd)) +#define SCMUL_WINMASK(winwd) (SCMUL_WINLIM(winwd) - 1) +#define SCMUL_TABSZ(winwd) (SCMUL_WINLIM(winwd)/2 + 1) + +#define DEFINE_SCMUL(mulfn, f, winwd, scafwd, npiece, addfn, dblfn) \ +void mulfn(f *X, f *Y, f *Z, const scaf_piece n[npiece], \ + const f *X0, const f *Y0, const f *Z0) \ +{ \ + f VX[SCMUL_TABSZ(winwd)], \ + VY[SCMUL_TABSZ(winwd)], \ + VZ[SCMUL_TABSZ(winwd)]; \ + f TX, TY, TZ, UX, UY, UZ; \ + unsigned i, j, k, w; \ + uint32 m_neg; \ + scaf_piece ni; \ + \ + /* Build a table of small multiples. */ \ + f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1); \ + VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0; \ + ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]); \ + for (i = 3; i < SCMUL_TABSZ(winwd); i += 2) { \ + addfn(&VX[i], &VY[i], &VZ[i], \ + &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0); \ + dblfn(&VX[i + 1], &VY[i + 1], &VZ[i + 1], \ + &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]); \ + } \ + \ + /* Now do the multiplication. We lag a window behind the cursor \ + * position because of the scalar recoding we do. \ + */ \ + f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1); \ + for (i = (npiece), w = 0, m_neg = 0; i--; ) { \ + ni = n[i]; \ + \ + /* Work through each window in the scalar piece. */ \ + for (j = 0; j < (scafwd); j += (winwd)) { \ + \ + /* Shift along by a window. */ \ + for (k = 0; k < (winwd); k++) \ + dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ); \ + \ + /* Peek at the next window of four bits. If the top bit is set \ + * we lend a bit leftwards, into w. It's too late for this to \ + * affect the sign now, but if we negated earlier then the \ + * addition would be wrong. \ + */ \ + w += (ni >> ((scafwd) - 1))&0x1u; \ + w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg); \ + \ + /* Collect the entry from the table, and add or subtract. */ \ + f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w); \ + f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w); \ + f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w); \ + f##_condneg(&UX, &UX, m_neg); \ + addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ); \ + \ + /* Move the next window into the delay slot. If its top bit is \ + * set, then negate it and set m_neg. \ + */ \ + w = (ni >> ((scafwd) - (winwd)))&SCMUL_WINMASK(winwd); \ + m_neg = -(uint32)((w >> ((winwd) - 1))&0x1u); \ + ni <<= (winwd); \ + } \ + } \ + \ + /* Do the final window. Just fix the sign and go. */ \ + for (k = 0; k < (winwd); k++) \ + dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ); \ + w = ((SCMUL_WINLIM(winwd) - w)&m_neg) | (w&~m_neg); \ + f##_pickn(&UX, VX, SCMUL_TABSZ(winwd), w); \ + f##_pickn(&UY, VY, SCMUL_TABSZ(winwd), w); \ + f##_pickn(&UZ, VZ, SCMUL_TABSZ(winwd), w); \ + f##_condneg(&UX, &UX, m_neg); \ + addfn(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ); \ +} + +#define SCSIMMUL_WINLIM(winwd) (1 << (winwd)) +#define SCSIMMUL_WINMASK(winwd) (SCSIMMUL_WINLIM(winwd) - 1) +#define SCSIMMUL_TABSZ(winwd) (1 << 2*(winwd)) + +#define DEFINE_SCSIMMUL(simmulfn, f, winwd, \ + scafwd, npiece, addfn, dblfn) \ +void simmulfn(f *X, f *Y, f *Z, \ + const scaf_piece n0[npiece], \ + const f *X0, const f *Y0, const f *Z0, \ + const scaf_piece n1[npiece], \ + const f *X1, const f *Y1, const f *Z1) \ +{ \ + f VX[SCSIMMUL_TABSZ(winwd)], \ + VY[SCSIMMUL_TABSZ(winwd)], \ + VZ[SCSIMMUL_TABSZ(winwd)]; \ + f TX, TY, TZ, UX, UY, UZ; \ + unsigned i, j, k, w, ni0, ni1; \ + \ + /* Build a table of small linear combinations. */ \ + f##_set(&VX[0], 0); f##_set(&VY[0], 1); f##_set(&VZ[0], 1); \ + VX[1] = *X0; VX[SCSIMMUL_WINLIM(winwd)] = *X1; \ + VY[1] = *Y0; VY[SCSIMMUL_WINLIM(winwd)] = *Y1; \ + VZ[1] = *Z0; VZ[SCSIMMUL_WINLIM(winwd)] = *Z1; \ + for (i = 2; i < SCSIMMUL_WINLIM(winwd); i <<= 1) { \ + dblfn(&VX[i], &VY[i], &VZ[i], \ + &VX[i/2], &VY[i/2], &VZ[i/2]); \ + dblfn(&VX[i*SCSIMMUL_WINLIM(winwd)], \ + &VY[i*SCSIMMUL_WINLIM(winwd)], \ + &VZ[i*SCSIMMUL_WINLIM(winwd)], \ + &VX[i*SCSIMMUL_WINLIM(winwd)/2], \ + &VY[i*SCSIMMUL_WINLIM(winwd)/2], \ + &VZ[i*SCSIMMUL_WINLIM(winwd)/2]); \ + } \ + for (i = 2; i < SCSIMMUL_TABSZ(winwd); i <<= 1) { \ + for (j = 1; j < i; j++) \ + addfn(&VX[i + j], &VY[i + j], &VZ[i + j], \ + &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]); \ + } \ + \ + /* Do the multiplication. */ \ + f##_set(&TX, 0); f##_set(&TY, 1); f##_set(&TZ, 1); \ + for (i = (npiece); i--; ) { \ + ni0 = n0[i]; ni1 = n1[i]; \ + \ + /* Work through each window in the scalar pieces. */ \ + for (j = 0; j < (scafwd); j += (winwd)) { \ + \ + /* Shift along by a window. */ \ + for (k = 0; k < (winwd); k++) \ + dblfn(&TX, &TY, &TZ, &TX, &TY, &TZ); \ + \ + /* Collect the next window from the scalars. */ \ + w = (((ni0 >> ((scafwd) - (winwd)))& \ + SCSIMMUL_WINMASK(winwd)) | \ + ((ni1 >> ((scafwd) - 2*(winwd)))& \ + (SCSIMMUL_WINMASK(winwd) << (winwd)))); \ + ni0 <<= (winwd); ni1 <<= (winwd); \ + \ + /* Collect the entry from the table, and add. */ \ + f##_pickn(&UX, VX, SCSIMMUL_TABSZ(winwd), w); \ + f##_pickn(&UY, VY, SCSIMMUL_TABSZ(winwd), w); \ + f##_pickn(&UZ, VZ, SCSIMMUL_TABSZ(winwd), w); \ + addfn(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ); \ + } \ + } \ + \ + /* Done. */ \ + *X = TX; *Y = TY; *Z = TZ; \ +} + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/pub/ed25519.c b/pub/ed25519.c index 82099f2c..2dc11613 100644 --- a/pub/ed25519.c +++ b/pub/ed25519.c @@ -32,6 +32,7 @@ #include "f25519.h" #include "ed25519.h" #include "scaf.h" +#include "scmul.h" #include "sha512.h" /*----- Key fetching ------------------------------------------------------*/ @@ -239,144 +240,8 @@ static void ptdbl(f25519 *X, f25519 *Y, f25519 *Z, f25519_mul(Z, &t0, &t1); /* Z = F J */ } -static void ptmul(f25519 *X, f25519 *Y, f25519 *Z, - const scaf_piece n[NPIECE], - const f25519 *X0, const f25519 *Y0, const f25519 *Z0) -{ - /* We assume that the window width divides the scalar piece width. */ -#define WINWD 4 -#define WINLIM (1 << WINWD) -#define WINMASK (WINLIM - 1) -#define TABSZ (WINLIM/2 + 1) - - f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ]; - f25519 TX, TY, TZ, UX, UY, UZ; - unsigned i, j, k, w; - uint32 m_neg; - scaf_piece ni; - - /* Build a table of small multiples. */ - f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1); - VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0; - ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]); - for (i = 3; i < TABSZ; i += 2) { - ptadd(&VX[i], &VY[i], &VZ[i], - &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0); - ptdbl(&VX[i + 1], &VY[i + 1], &VZ[i + 1], - &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]); - } - - /* Now do the multiplication. We lag a window behind the cursor position - * because of the scalar recoding we do. - */ - f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1); - for (i = NPIECE, w = 0, m_neg = 0; i--; ) { - ni = n[i]; - - /* Work through each window in the scalar piece. */ - for (j = 0; j < PIECEWD; j += WINWD) { - - /* Shift along by a window. */ - for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ); - - /* Peek at the next window of four bits. If the top bit is set we lend - * a bit leftwards, into w. It's too late for this to affect the sign - * now, but if we negated earlier then the addition would be wrong. - */ - w += (ni >> (PIECEWD - 1))&0x1u; - w = ((WINLIM - w)&m_neg) | (w&~m_neg); - - /* Collect the entry from the table, and add or subtract. */ - f25519_pickn(&UX, VX, TABSZ, w); - f25519_pickn(&UY, VY, TABSZ, w); - f25519_pickn(&UZ, VZ, TABSZ, w); - f25519_condneg(&UX, &UX, m_neg); - ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ); - - /* Move the next window into the delay slot. If its top bit is set, - * then negate it and set m_neg. - */ - w = (ni >> (PIECEWD - WINWD))&WINMASK; - m_neg = -(uint32)((w >> (WINWD - 1))&0x1u); - ni <<= WINWD; - } - } - - /* Do the final window. Just fix the sign and go. */ - for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ); - w = ((WINLIM - w)&m_neg) | (w&~m_neg); - f25519_pickn(&UX, VX, TABSZ, w); - f25519_pickn(&UY, VY, TABSZ, w); - f25519_pickn(&UZ, VZ, TABSZ, w); - f25519_condneg(&UX, &UX, m_neg); - ptadd(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ); - -#undef WINWD -#undef WINLIM -#undef WINMASK -#undef TABSZ -} - -static void ptsimmul(f25519 *X, f25519 *Y, f25519 *Z, - const scaf_piece n0[NPIECE], - const f25519 *X0, const f25519 *Y0, const f25519 *Z0, - const scaf_piece n1[NPIECE], - const f25519 *X1, const f25519 *Y1, const f25519 *Z1) -{ - /* We assume that the window width divides the scalar piece width. */ -#define WINWD 2 -#define WINLIM (1 << WINWD) -#define WINMASK (WINLIM - 1) -#define TABSZ (1 << 2*WINWD) - - f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ]; - f25519 TX, TY, TZ, UX, UY, UZ; - unsigned i, j, k, w, ni0, ni1; - - /* Build a table of small linear combinations. */ - f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1); - VX[1] = *X0; VX[WINLIM] = *X1; - VY[1] = *Y0; VY[WINLIM] = *Y1; - VZ[1] = *Z0; VZ[WINLIM] = *Z1; - for (i = 2; i < WINLIM; i <<= 1) { - ptdbl(&VX[i], &VY[i], &VZ[i], - &VX[i/2], &VY[i/2], &VZ[i/2]); - ptdbl(&VX[i*WINLIM], &VY[i*WINLIM], &VZ[i*WINLIM], - &VX[i*WINLIM/2], &VY[i*WINLIM/2], &VZ[i*WINLIM/2]); - } - for (i = 2; i < TABSZ; i <<= 1) { - for (j = 1; j < i; j++) - ptadd(&VX[i + j], &VY[i + j], &VZ[i + j], - &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]); - } - - /* Do the multiplication. */ - f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1); - for (i = NPIECE; i--; ) { - ni0 = n0[i]; ni1 = n1[i]; - - /* Work through each window in the scalar pieces. */ - for (j = 0; j < PIECEWD; j += WINWD) { - - /* Shift along by a window. */ - for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ); - - /* Collect the next window from the scalars. */ - w = ((ni0 >> (PIECEWD - WINWD))&WINMASK) | - ((ni1 >> (PIECEWD - 2*WINWD))&(WINMASK << WINWD)); - ni0 <<= WINWD; ni1 <<= WINWD; - - /* Collect the entry from the table, and add. */ - f25519_pickn(&UX, VX, TABSZ, w); - f25519_pickn(&UY, VY, TABSZ, w); - f25519_pickn(&UZ, VZ, TABSZ, w); - ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ); - } - } - - /* Done. */ - *X = TX; *Y = TY; *Z = TZ; -} +static DEFINE_SCMUL(ptmul, f25519, 4, PIECEWD, NPIECE, ptadd, ptdbl) +static DEFINE_SCSIMMUL(ptsimmul, f25519, 2, PIECEWD, NPIECE, ptadd, ptdbl) /*----- Key derivation utilities ------------------------------------------*/