-static void ptmul(f25519 *X, f25519 *Y, f25519 *Z,
- const scaf_piece n[NPIECE],
- const f25519 *X0, const f25519 *Y0, const f25519 *Z0)
-{
- /* We assume that the window width divides the scalar piece width. */
-#define WINWD 4
-#define WINLIM (1 << WINWD)
-#define WINMASK (WINLIM - 1)
-#define TABSZ (WINLIM/2 + 1)
-
- f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ];
- f25519 TX, TY, TZ, UX, UY, UZ;
- unsigned i, j, k, w;
- uint32 m_neg;
- scaf_piece ni;
-
- /* Build a table of small multiples. */
- f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1);
- VX[1] = *X0; VY[1] = *Y0; VZ[1] = *Z0;
- ptdbl(&VX[2], &VY[2], &VZ[2], &VX[1], &VY[1], &VZ[1]);
- for (i = 3; i < TABSZ; i += 2) {
- ptadd(&VX[i], &VY[i], &VZ[i],
- &VX[i - 1], &VY[i - 1], &VZ[i - 1], X0, Y0, Z0);
- ptdbl(&VX[i + 1], &VY[i + 1], &VZ[i + 1],
- &VX[(i + 1)/2], &VY[(i + 1)/2], &VZ[(i + 1)/2]);
- }
-
- /* Now do the multiplication. We lag a window behind the cursor position
- * because of the scalar recoding we do.
- */
- f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1);
- for (i = NPIECE, w = 0, m_neg = 0; i--; ) {
- ni = n[i];
-
- /* Work through each window in the scalar piece. */
- for (j = 0; j < PIECEWD; j += WINWD) {
-
- /* Shift along by a window. */
- for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
-
- /* Peek at the next window of four bits. If the top bit is set we lend
- * a bit leftwards, into w. It's too late for this to affect the sign
- * now, but if we negated earlier then the addition would be wrong.
- */
- w += (ni >> (PIECEWD - 1))&0x1u;
- w = ((WINLIM - w)&m_neg) | (w&~m_neg);
-
- /* Collect the entry from the table, and add or subtract. */
- f25519_pickn(&UX, VX, TABSZ, w);
- f25519_pickn(&UY, VY, TABSZ, w);
- f25519_pickn(&UZ, VZ, TABSZ, w);
- f25519_condneg(&UX, &UX, m_neg);
- ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);
-
- /* Move the next window into the delay slot. If its top bit is set,
- * then negate it and set m_neg.
- */
- w = (ni >> (PIECEWD - WINWD))&WINMASK;
- m_neg = -(uint32)((w >> (WINWD - 1))&0x1u);
- ni <<= WINWD;
- }
- }
-
- /* Do the final window. Just fix the sign and go. */
- for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
- w = ((WINLIM - w)&m_neg) | (w&~m_neg);
- f25519_pickn(&UX, VX, TABSZ, w);
- f25519_pickn(&UY, VY, TABSZ, w);
- f25519_pickn(&UZ, VZ, TABSZ, w);
- f25519_condneg(&UX, &UX, m_neg);
- ptadd(X, Y, Z, &TX, &TY, &TZ, &UX, &UY, &UZ);
-
-#undef WINWD
-#undef WINLIM
-#undef WINMASK
-#undef TABSZ
-}
-
-static void ptsimmul(f25519 *X, f25519 *Y, f25519 *Z,
- const scaf_piece n0[NPIECE],
- const f25519 *X0, const f25519 *Y0, const f25519 *Z0,
- const scaf_piece n1[NPIECE],
- const f25519 *X1, const f25519 *Y1, const f25519 *Z1)
-{
- /* We assume that the window width divides the scalar piece width. */
-#define WINWD 2
-#define WINLIM (1 << WINWD)
-#define WINMASK (WINLIM - 1)
-#define TABSZ (1 << 2*WINWD)
-
- f25519 VX[TABSZ], VY[TABSZ], VZ[TABSZ];
- f25519 TX, TY, TZ, UX, UY, UZ;
- unsigned i, j, k, w, ni0, ni1;
-
- /* Build a table of small linear combinations. */
- f25519_set(&VX[0], 0); f25519_set(&VY[0], 1); f25519_set(&VZ[0], 1);
- VX[1] = *X0; VX[WINLIM] = *X1;
- VY[1] = *Y0; VY[WINLIM] = *Y1;
- VZ[1] = *Z0; VZ[WINLIM] = *Z1;
- for (i = 2; i < WINLIM; i <<= 1) {
- ptdbl(&VX[i], &VY[i], &VZ[i],
- &VX[i/2], &VY[i/2], &VZ[i/2]);
- ptdbl(&VX[i*WINLIM], &VY[i*WINLIM], &VZ[i*WINLIM],
- &VX[i*WINLIM/2], &VY[i*WINLIM/2], &VZ[i*WINLIM/2]);
- }
- for (i = 2; i < TABSZ; i <<= 1) {
- for (j = 1; j < i; j++)
- ptadd(&VX[i + j], &VY[i + j], &VZ[i + j],
- &VX[i], &VY[i], &VZ[i], &VX[j], &VY[j], &VZ[j]);
- }
-
- /* Do the multiplication. */
- f25519_set(&TX, 0); f25519_set(&TY, 1); f25519_set(&TZ, 1);
- for (i = NPIECE; i--; ) {
- ni0 = n0[i]; ni1 = n1[i];
-
- /* Work through each window in the scalar pieces. */
- for (j = 0; j < PIECEWD; j += WINWD) {
-
- /* Shift along by a window. */
- for (k = 0; k < WINWD; k++) ptdbl(&TX, &TY, &TZ, &TX, &TY, &TZ);
-
- /* Collect the next window from the scalars. */
- w = ((ni0 >> (PIECEWD - WINWD))&WINMASK) |
- ((ni1 >> (PIECEWD - 2*WINWD))&(WINMASK << WINWD));
- ni0 <<= WINWD; ni1 <<= WINWD;
-
- /* Collect the entry from the table, and add. */
- f25519_pickn(&UX, VX, TABSZ, w);
- f25519_pickn(&UY, VY, TABSZ, w);
- f25519_pickn(&UZ, VZ, TABSZ, w);
- ptadd(&TX, &TY, &TZ, &TX, &TY, &TZ, &UX, &UY, &UZ);
- }
- }
-
- /* Done. */
- *X = TX; *Y = TY; *Z = TZ;
-}