From: Mark Wooding <mdw@distorted.org.uk>
Date: Mon, 1 May 2017 00:38:30 +0000 (+0100)
Subject: math/f25519.c, utils/curve25519.sage: Slightly improve `quosqrt' algorithm.
X-Git-Tag: 2.4.0~6
X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/e830bb692041c75eb29b8c511db21af81b3aae2d

math/f25519.c, utils/curve25519.sage: Slightly improve `quosqrt' algorithm.

The algorithm from the Bernstein et al. paper was somewhat ugly.
Replace it with a different one using the techniques I used in `fgoldi'
for the main calculation, but with the same end structure.
---

diff --git a/math/f25519.c b/math/f25519.c
index a58a68a9..78844be6 100644
--- a/math/f25519.c
+++ b/math/f25519.c
@@ -1092,7 +1092,7 @@ static const piece sqrtm1_pieces[NPIECE] = {
 
 int f25519_quosqrt(f25519 *z, const f25519 *x, const f25519 *y)
 {
-  f25519 t, u, w, beta, xy3, t2p50m1;
+  f25519 t, u, v, w, t15;
   octet xb[32], b0[32], b1[32];
   int32 rc = -1;
   mask32 m;
@@ -1103,68 +1103,72 @@ int f25519_quosqrt(f25519 *z, const f25519 *x, const f25519 *y)
   for (i = 1; i < (n); i++) f25519_sqr((z), (z));			\
 } while (0)
 
-  /* This is a bit tricky; the algorithm is from Bernstein, Duif, Lange,
-   * Schwabe, and Yang, `High-speed high-security signatures', 2011-09-26,
-   * https://ed25519.cr.yp.to/ed25519-20110926.pdf.
+  /* This is a bit tricky; the algorithm is loosely based on Bernstein, Duif,
+   * Lange, Schwabe, and Yang, `High-speed high-security signatures',
+   * 2011-09-26, https://ed25519.cr.yp.to/ed25519-20110926.pdf.
+   */
+  f25519_mul(&v, x, y);
+
+  /* Now for an addition chain. */	/* step | value */
+  f25519_sqr(&u, &v);			/*    1 | 2 */
+  f25519_mul(&t, &u, &v);		/*    2 | 3 */
+  SQRN(&u, &t, 2);			/*    4 | 12 */
+  f25519_mul(&t15, &u, &t);		/*    5 | 15 */
+  f25519_sqr(&u, &t15);			/*    6 | 30 */
+  f25519_mul(&t, &u, &v);		/*    7 | 31 = 2^5 - 1 */
+  SQRN(&u, &t, 5);			/*   12 | 2^10 - 2^5 */
+  f25519_mul(&t, &u, &t);		/*   13 | 2^10 - 1 */
+  SQRN(&u, &t, 10);			/*   23 | 2^20 - 2^10 */
+  f25519_mul(&u, &u, &t);		/*   24 | 2^20 - 1 */
+  SQRN(&u, &u, 10);			/*   34 | 2^30 - 2^10 */
+  f25519_mul(&t, &u, &t);		/*   35 | 2^30 - 1 */
+  f25519_sqr(&u, &t);			/*   36 | 2^31 - 2 */
+  f25519_mul(&t, &u, &v);		/*   37 | 2^31 - 1 */
+  SQRN(&u, &t, 31);			/*   68 | 2^62 - 2^31 */
+  f25519_mul(&t, &u, &t);		/*   69 | 2^62 - 1 */
+  SQRN(&u, &t, 62);			/*  131 | 2^124 - 2^62 */
+  f25519_mul(&t, &u, &t);		/*  132 | 2^124 - 1 */
+  SQRN(&u, &t, 124);			/*  256 | 2^248 - 2^124 */
+  f25519_mul(&t, &u, &t);		/*  257 | 2^248 - 1 */
+  f25519_sqr(&u, &t);			/*  258 | 2^249 - 2 */
+  f25519_mul(&t, &u, &v);		/*  259 | 2^249 - 1 */
+  SQRN(&t, &t, 3);			/*  262 | 2^252 - 8 */
+  f25519_sqr(&u, &t);			/*  263 | 2^253 - 16 */
+  f25519_mul(&t, &u, &t);		/*  264 | 3*2^252 - 24 */
+  f25519_mul(&t, &t, &t15);		/*  265 | 3*2^252 - 9 */
+  f25519_mul(&w, &t, &v);		/*  266 | 3*2^252 - 8 */
+
+  /* Awesome.  Now let me explain.  Let v be a square in GF(p), and let w =
+   * v^(3*2^252 - 8).  In particular, let's consider
    *
-   * First of all, a complicated exponentation.  The addition chain here is
-   * mine.  We start with some preliminary values.
-   */					/* step | value */
-  SQRN(&u, y, 1);			/*    1 | 0, 2 */
-  f25519_mul(&t, &u, y);		/*    2 | 0, 3 */
-  f25519_mul(&xy3, &t, x);		/*    3 | 1, 3 */
-  SQRN(&u, &u, 1);			/*    4 | 0, 4 */
-  f25519_mul(&w, &u, &xy3);		/*    5 | 1, 7 */
-
-  /* And now we calculate w^((p - 5)/8) = w^(252 - 3). */
-  SQRN(&u, &w, 1);			/*    6 | 2 */
-  f25519_mul(&t, &w, &u);		/*    7 | 3 */
-  SQRN(&u, &t, 1);			/*    8 | 6 */
-  f25519_mul(&t, &u, &w);		/*    9 | 7 */
-  SQRN(&u, &t, 3);			/*   12 | 56 */
-  f25519_mul(&t, &t, &u);		/*   13 | 63 = 2^6 - 1 */
-  SQRN(&u, &t, 6);			/*   19 | 2^12 - 2^6 */
-  f25519_mul(&t, &t, &u);		/*   20 | 2^12 - 1 */
-  SQRN(&u, &t, 12);			/*   32 | 2^24 - 2^12 */
-  f25519_mul(&t, &t, &u);		/*   33 | 2^24 - 1 */
-  SQRN(&u, &t, 1);			/*   34 | 2^25 - 2 */
-  f25519_mul(&t, &u, &w);		/*   35 | 2^25 - 1 */
-  SQRN(&u, &t, 25);			/*   60 | 2^50 - 2^25 */
-  f25519_mul(&t2p50m1, &t, &u);		/*   61 | 2^50 - 1 */
-  SQRN(&u, &t2p50m1, 50);		/*  111 | 2^100 - 2^50 */
-  f25519_mul(&t, &t2p50m1, &u);		/*  112 | 2^100 - 1 */
-  SQRN(&u, &t, 100);			/*  212 | 2^200 - 2^100 */
-  f25519_mul(&t, &t, &u);		/*  213 | 2^200 - 1 */
-  SQRN(&u, &t, 50);			/*  263 | 2^250 - 2^50 */
-  f25519_mul(&t, &t2p50m1, &u);		/*  264 | 2^250 - 1 */
-  SQRN(&u, &t, 2);			/*  266 | 2^252 - 4 */
-  f25519_mul(&t, &u, &w);		/*  267 | 2^252 - 3 */
-
-  /* And finally... */
-  f25519_mul(&beta, &t, &xy3);		/*  268 | ... */
-
-  /* Now we have beta = (x y^3) (x y^7)^((p - 5)/8) = (x/y)^((p + 3)/8), and
-   * we're ready to finish the computation.  Suppose that alpha^2 = u/w.
-   * Then beta^4 = (x/y)^((p + 3)/2) = alpha^(p + 3) = alpha^4 = (x/y)^2, so
-   * we have beta^2 = Â±x/y.  If y beta^2 = x then beta is the one we wanted;
-   * if -y beta^2 = x, then we want beta sqrt(-1), which we already know.  Of
-   * course, it might not match either, in which case we fail.
+   *    v^2 w^4 = v^2 v^{3*2^254 - 32} = (v^{2^254 - 10})^3
+   *
+   * But 2^254 - 10 = ((2^255 - 19) - 1)/2 = (p - 1)/2.  Since v is a square,
+   * it has order dividing (p - 1)/2, and therefore v^2 w^4 = 1 and
+   *
+   *    w^4 = 1/v^2
+   *
+   * That in turn implies that w^2 = Â±1/v.  Now, recall that v = x y, and let
+   * w' = w x.  Then w'^2 = Â±x^2/v = Â±x/y.  If y w'^2 = x then we set
+   * z = w', since we have z^2 = x/y; otherwise let z = i w', where i^2 = -1,
+   * so z^2 = -w^2 = x/y, and we're done.
    *
    * The easiest way to compare is to encode.  This isn't as wasteful as it
    * sounds: the hard part is normalizing the representations, which we have
    * to do anyway.
    */
-  f25519_sqr(&t, &beta);
+  f25519_mul(&w, &w, x);
+  f25519_sqr(&t, &w);
   f25519_mul(&t, &t, y);
   f25519_neg(&u, &t);
   f25519_store(xb, x);
   f25519_store(b0, &t);
   f25519_store(b1, &u);
-  f25519_mul(&u, &beta, SQRTM1);
+  f25519_mul(&u, &w, SQRTM1);
 
   m = -ct_memeq(b0, xb, 32);
   rc = PICK2(0, rc, m);
-  f25519_pick2(z, &beta, &u, m);
+  f25519_pick2(z, &w, &u, m);
   m = -ct_memeq(b1, xb, 32);
   rc = PICK2(0, rc, m);
 
diff --git a/utils/curve25519.sage b/utils/curve25519.sage
index 416f059f..24e74b12 100644
--- a/utils/curve25519.sage
+++ b/utils/curve25519.sage
@@ -115,7 +115,7 @@ def inv(x):
   t = u*t11              # 265 | 2^255 - 21
   return t
 
-def quosqrt(x, y):
+def quosqrt_djb(x, y):
 
   ## First, some preliminary values.
   y2 = sqrn(y, 1)        #   1 | 0, 2
@@ -162,6 +162,60 @@ def quosqrt(x, y):
   elif t == -x: return beta*sqrtm1
   else: raise ValueError, 'not a square'
 
+def quosqrt_mdw(x, y):
+  v = x*y
+
+  ## Now we calculate w = v^{3*2^252 - 8}.  This will be explained later.
+  u = sqrn(v, 1)         #   1 | 2
+  t = u*v                #   2 | 3
+  u = sqrn(t, 2)         #   4 | 12
+  t15 = u*t              #   5 | 15
+  u = sqrn(t15, 1)       #   6 | 30
+  t = u*v                #   7 | 31 = 2^5 - 1
+  u = sqrn(t, 5)         #  12 | 2^10 - 2^5
+  t = u*t                #  13 | 2^10 - 1
+  u = sqrn(t, 10)        #  23 | 2^20 - 2^10
+  u = u*t                #  24 | 2^20 - 1
+  u = sqrn(u, 10)        #  34 | 2^30 - 2^10
+  t = u*t                #  35 | 2^30 - 1
+  u = sqrn(t, 1)         #  36 | 2^31 - 2
+  t = u*v                #  37 | 2^31 - 1
+  u = sqrn(t, 31)        #  68 | 2^62 - 2^31
+  t = u*t                #  69 | 2^62 - 1
+  u = sqrn(t, 62)        # 131 | 2^124 - 2^62
+  t = u*t                # 132 | 2^124 - 1
+  u = sqrn(t, 124)       # 256 | 2^248 - 2^124
+  t = u*t                # 257 | 2^248 - 1
+  u = sqrn(t, 1)         # 258 | 2^249 - 2
+  t = u*v                # 259 | 2^249 - 1
+  t = sqrn(t, 3)         # 262 | 2^252 - 8
+  u = sqrn(t, 1)         # 263 | 2^253 - 16
+  t = u*t                # 264 | 3*2^252 - 24
+  t = t*t15              # 265 | 3*2^252 - 9
+  w = t*v                # 266 | 3*2^252 - 8
+
+  ## Awesome.  Now let me explain.  Let v be a square in GF(p), and let w =
+  ## v^(3*2^252 - 8).  In particular, let's consider
+  ##
+  ##    v^2 w^4 = v^2 v^{3*2^254 - 32} = (v^{2^254 - 10})^3
+  ##
+  ## But 2^254 - 10 = ((2^255 - 19) - 1)/2 = (p - 1)/2.  Since v is a square,
+  ## it has order dividing (p - 1)/2, and therefore v^2 w^4 = 1 and
+  ##
+  ##    w^4 = 1/v^2
+  ##
+  ## That in turn implies that w^2 = Â±1/v.  Now, recall that v = x y, and let
+  ## w' = w x.  Then w'^2 = Â±x^2/v = Â±x/y.  If y w'^2 = x then we set
+  ## z = w', since we have z^2 = x/y; otherwise let z = i w', where i^2 = -1,
+  ## so z^2 = -w^2 = x/y, and we're done.
+  t = w*x
+  u = y*t^2
+  if u == x: return t
+  elif u == -x: return t*sqrtm1
+  else: raise ValueError, 'not a square'
+
+quosqrt = quosqrt_mdw
+
 assert inv(k(9))*9 == 1
 assert 5*quosqrt(k(4), k(5))^2 == 4