X-Git-Url: https://git.distorted.org.uk/~mdw/secnet/blobdiff_plain/1047c205103e6da9fc6a317f41583147dbc11aa3..a1a6042e24c9873aa6abf668bcb68d39d0eb4190:/f25519.c

diff --git a/f25519.c b/f25519.c
index d16943c..fc57ae4 100644
--- a/f25519.c
+++ b/f25519.c
@@ -50,12 +50,14 @@
 
 /*----- Basic setup -------------------------------------------------------*/
 
+typedef f25519_piece piece;
+
 /* Elements x of GF(2^255 - 19) are represented by ten signed integers x_i: x
  * = SUM_{0<=i<10} x_i 2^ceil(51i/2), mostly following Bernstein's original
  * paper.
  */
 
-typedef  int32  piece;  typedef  int64  dblpiece;
+			typedef  int64  dblpiece;
 typedef uint32 upiece;  typedef uint64 udblpiece;
 #define P p26
 #define PIECEWD(i) ((i)%2 ? 25 : 26)
@@ -63,7 +65,6 @@ typedef uint32 upiece;  typedef uint64 udblpiece;
 
 #define M26 0x03ffffffu
 #define M25 0x01ffffffu
-#define B26 0x04000000u
 #define B25 0x02000000u
 #define B24 0x01000000u
 
@@ -793,7 +794,7 @@ static const piece sqrtm1_pieces[NPIECE] = {
 
 int f25519_quosqrt(f25519 *z, const f25519 *x, const f25519 *y)
 {
-  f25519 t, u, w, beta, xy3, t2p50m1;
+  f25519 t, u, v, w, t15;
   octet xb[32], b0[32], b1[32];
   int32 rc = -1;
   mask32 m;
@@ -804,68 +805,72 @@ int f25519_quosqrt(f25519 *z, const f25519 *x, const f25519 *y)
   for (i = 1; i < (n); i++) f25519_sqr((z), (z));			\
 } while (0)
 
-  /* This is a bit tricky; the algorithm is from Bernstein, Duif, Lange,
-   * Schwabe, and Yang, `High-speed high-security signatures', 2011-09-26,
-   * https://ed25519.cr.yp.to/ed25519-20110926.pdf.
+  /* This is a bit tricky; the algorithm is loosely based on Bernstein, Duif,
+   * Lange, Schwabe, and Yang, `High-speed high-security signatures',
+   * 2011-09-26, https://ed25519.cr.yp.to/ed25519-20110926.pdf.
+   */
+  f25519_mul(&v, x, y);
+
+  /* Now for an addition chain. */	/* step | value */
+  f25519_sqr(&u, &v);			/*    1 | 2 */
+  f25519_mul(&t, &u, &v);		/*    2 | 3 */
+  SQRN(&u, &t, 2);			/*    4 | 12 */
+  f25519_mul(&t15, &u, &t);		/*    5 | 15 */
+  f25519_sqr(&u, &t15);			/*    6 | 30 */
+  f25519_mul(&t, &u, &v);		/*    7 | 31 = 2^5 - 1 */
+  SQRN(&u, &t, 5);			/*   12 | 2^10 - 2^5 */
+  f25519_mul(&t, &u, &t);		/*   13 | 2^10 - 1 */
+  SQRN(&u, &t, 10);			/*   23 | 2^20 - 2^10 */
+  f25519_mul(&u, &u, &t);		/*   24 | 2^20 - 1 */
+  SQRN(&u, &u, 10);			/*   34 | 2^30 - 2^10 */
+  f25519_mul(&t, &u, &t);		/*   35 | 2^30 - 1 */
+  f25519_sqr(&u, &t);			/*   36 | 2^31 - 2 */
+  f25519_mul(&t, &u, &v);		/*   37 | 2^31 - 1 */
+  SQRN(&u, &t, 31);			/*   68 | 2^62 - 2^31 */
+  f25519_mul(&t, &u, &t);		/*   69 | 2^62 - 1 */
+  SQRN(&u, &t, 62);			/*  131 | 2^124 - 2^62 */
+  f25519_mul(&t, &u, &t);		/*  132 | 2^124 - 1 */
+  SQRN(&u, &t, 124);			/*  256 | 2^248 - 2^124 */
+  f25519_mul(&t, &u, &t);		/*  257 | 2^248 - 1 */
+  f25519_sqr(&u, &t);			/*  258 | 2^249 - 2 */
+  f25519_mul(&t, &u, &v);		/*  259 | 2^249 - 1 */
+  SQRN(&t, &t, 3);			/*  262 | 2^252 - 8 */
+  f25519_sqr(&u, &t);			/*  263 | 2^253 - 16 */
+  f25519_mul(&t, &u, &t);		/*  264 | 3*2^252 - 24 */
+  f25519_mul(&t, &t, &t15);		/*  265 | 3*2^252 - 9 */
+  f25519_mul(&w, &t, &v);		/*  266 | 3*2^252 - 8 */
+
+  /* Awesome.  Now let me explain.  Let v be a square in GF(p), and let w =
+   * v^(3*2^252 - 8).  In particular, let's consider
    *
-   * First of all, a complicated exponentation.  The addition chain here is
-   * mine.  We start with some preliminary values.
-   */					/* step | value */
-  SQRN(&u, y, 1);			/*    1 | 0, 2 */
-  f25519_mul(&t, &u, y);		/*    2 | 0, 3 */
-  f25519_mul(&xy3, &t, x);		/*    3 | 1, 3 */
-  SQRN(&u, &u, 1);			/*    4 | 0, 4 */
-  f25519_mul(&w, &u, &xy3);		/*    5 | 1, 7 */
-
-  /* And now we calculate w^((p - 5)/8) = w^(252 - 3). */
-  SQRN(&u, &w, 1);			/*    6 | 2 */
-  f25519_mul(&t, &w, &u);		/*    7 | 3 */
-  SQRN(&u, &t, 1);			/*    8 | 6 */
-  f25519_mul(&t, &u, &w);		/*    9 | 7 */
-  SQRN(&u, &t, 3);			/*   12 | 56 */
-  f25519_mul(&t, &t, &u);		/*   13 | 63 = 2^6 - 1 */
-  SQRN(&u, &t, 6);			/*   19 | 2^12 - 2^6 */
-  f25519_mul(&t, &t, &u);		/*   20 | 2^12 - 1 */
-  SQRN(&u, &t, 12);			/*   32 | 2^24 - 2^12 */
-  f25519_mul(&t, &t, &u);		/*   33 | 2^24 - 1 */
-  SQRN(&u, &t, 1);			/*   34 | 2^25 - 2 */
-  f25519_mul(&t, &u, &w);		/*   35 | 2^25 - 1 */
-  SQRN(&u, &t, 25);			/*   60 | 2^50 - 2^25 */
-  f25519_mul(&t2p50m1, &t, &u);		/*   61 | 2^50 - 1 */
-  SQRN(&u, &t2p50m1, 50);		/*  111 | 2^100 - 2^50 */
-  f25519_mul(&t, &t2p50m1, &u);		/*  112 | 2^100 - 1 */
-  SQRN(&u, &t, 100);			/*  212 | 2^200 - 2^100 */
-  f25519_mul(&t, &t, &u);		/*  213 | 2^200 - 1 */
-  SQRN(&u, &t, 50);			/*  263 | 2^250 - 2^50 */
-  f25519_mul(&t, &t2p50m1, &u);		/*  264 | 2^250 - 1 */
-  SQRN(&u, &t, 2);			/*  266 | 2^252 - 4 */
-  f25519_mul(&t, &u, &w);		/*  267 | 2^252 - 3 */
-
-  /* And finally... */
-  f25519_mul(&beta, &t, &xy3);		/*  268 | ... */
-
-  /* Now we have beta = (x y^3) (x y^7)^((p - 5)/8) = (x/y)^((p + 3)/8), and
-   * we're ready to finish the computation.  Suppose that alpha^2 = u/w.
-   * Then beta^4 = (x/y)^((p + 3)/2) = alpha^(p + 3) = alpha^4 = (x/y)^2, so
-   * we have beta^2 = Â±x/y.  If y beta^2 = x then beta is the one we wanted;
-   * if -y beta^2 = x, then we want beta sqrt(-1), which we already know.  Of
-   * course, it might not match either, in which case we fail.
+   *    v^2 w^4 = v^2 v^{3*2^254 - 32} = (v^{2^254 - 10})^3
+   *
+   * But 2^254 - 10 = ((2^255 - 19) - 1)/2 = (p - 1)/2.  Since v is a square,
+   * it has order dividing (p - 1)/2, and therefore v^2 w^4 = 1 and
+   *
+   *    w^4 = 1/v^2
+   *
+   * That in turn implies that w^2 = Â±1/v.  Now, recall that v = x y, and let
+   * w' = w x.  Then w'^2 = Â±x^2/v = Â±x/y.  If y w'^2 = x then we set
+   * z = w', since we have z^2 = x/y; otherwise let z = i w', where i^2 = -1,
+   * so z^2 = -w^2 = x/y, and we're done.
    *
    * The easiest way to compare is to encode.  This isn't as wasteful as it
    * sounds: the hard part is normalizing the representations, which we have
    * to do anyway.
    */
-  f25519_sqr(&t, &beta);
+  f25519_mul(&w, &w, x);
+  f25519_sqr(&t, &w);
   f25519_mul(&t, &t, y);
   f25519_neg(&u, &t);
   f25519_store(xb, x);
   f25519_store(b0, &t);
   f25519_store(b1, &u);
-  f25519_mul(&u, &beta, SQRTM1);
+  f25519_mul(&u, &w, SQRTM1);
 
   m = -consttime_memeq(b0, xb, 32);
   rc = PICK2(0, rc, m);
-  f25519_pick2(z, &beta, &u, m);
+  f25519_pick2(z, &w, &u, m);
   m = -consttime_memeq(b1, xb, 32);
   rc = PICK2(0, rc, m);