X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/f521d4c7a97076db34681c598d7965c7d05713b0..HEAD:/math/fgoldi.c

diff --git a/math/fgoldi.c b/math/fgoldi.c
index afc43329..43309dcd 100644
--- a/math/fgoldi.c
+++ b/math/fgoldi.c
@@ -29,6 +29,7 @@
 
 #include "config.h"
 
+#include "ct.h"
 #include "fgoldi.h"
 
 /*----- Basic setup -------------------------------------------------------*
@@ -50,10 +51,8 @@ typedef uint32 upiece; typedef uint64 udblpiece;
 #define NPIECE 16
 #define P p28
 
-#define B28 0x10000000u
 #define B27 0x08000000u
 #define M28 0x0fffffffu
-#define M27 0x07ffffffu
 #define M32 0xffffffffu
 
 #elif FGOLDI_IMPL == 12
@@ -69,12 +68,10 @@ typedef uint16 upiece; typedef uint32 udblpiece;
 #define NPIECE 40
 #define P p12
 
-#define B12 0x1000u
 #define B11 0x0800u
 #define B10 0x0400u
 #define M12 0xfffu
 #define M11 0x7ffu
-#define M10 0x3ffu
 #define M8 0xffu
 
 #endif
@@ -399,8 +396,72 @@ void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y)
   for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i];
 }
 
+/* --- @fgoldi_neg@ --- *
+ *
+ * Arguments:	@fgoldi *z@ = where to put the result (may alias @x@)
+ *		@const fgoldi *x@ = an operand
+ *
+ * Returns:	---
+ *
+ * Use:		Set @z = -x@.
+ */
+
+void fgoldi_neg(fgoldi *z, const fgoldi *x)
+{
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i];
+}
+
 /*----- Constant-time utilities -------------------------------------------*/
 
+/* --- @fgoldi_pick2@ --- *
+ *
+ * Arguments:	@fgoldi *z@ = where to put the result (may alias @x@ or @y@)
+ *		@const fgoldi *x, *y@ = two operands
+ *		@uint32 m@ = a mask
+ *
+ * Returns:	---
+ *
+ * Use:		If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set
+ *		@z = x@.  If @m@ has some other value, then scramble @z@ in
+ *		an unhelpful way.
+ */
+
+void fgoldi_pick2(fgoldi *z, const fgoldi *x, const fgoldi *y, uint32 m)
+{
+  mask32 mm = FIX_MASK32(m);
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm);
+}
+
+/* --- @fgoldi_pickn@ --- *
+ *
+ * Arguments:	@fgoldi *z@ = where to put the result
+ *		@const fgoldi *v@ = a table of entries
+ *		@size_t n@ = the number of entries in @v@
+ *		@size_t i@ = an index
+ *
+ * Returns:	---
+ *
+ * Use:		If @0 <= i < n < 32@ then set @z = v[i]@.  If @n >= 32@ then
+ *		do something unhelpful; otherwise, if @i >= n@ then set @z@
+ *		to zero.
+ */
+
+void fgoldi_pickn(fgoldi *z, const fgoldi *v, size_t n, size_t i)
+{
+  uint32 b = (uint32)1 << (31 - i);
+  mask32 m;
+  unsigned j;
+
+  for (j = 0; j < NPIECE; j++) z->P[j] = 0;
+  while (n--) {
+    m = SIGN(b);
+    for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m);
+    v++; b <<= 1;
+  }
+}
+
 /* --- @fgoldi_condswap@ --- *
  *
  * Arguments:	@fgoldi *x, *y@ = two operands
@@ -421,6 +482,36 @@ void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m)
   for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm);
 }
 
+/* --- @fgoldi_condneg@ --- *
+ *
+ * Arguments:	@fgoldi *z@ = where to put the result (may alias @x@)
+ *		@const fgoldi *x@ = an operand
+ *		@uint32 m@ = a mask
+ *
+ * Returns:	---
+ *
+ * Use:		If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set
+ *		@z = -x@.  If @m@ has some other value then scramble @z@ in
+ *		an unhelpful way.
+ */
+
+void fgoldi_condneg(fgoldi *z, const fgoldi *x, uint32 m)
+{
+#ifdef NEG_TWOC
+  mask32 m_xor = FIX_MASK32(m);
+  piece m_add = m&1;
+# define CONDNEG(x) (((x) ^ m_xor) + m_add)
+#else
+  int s = PICK2(-1, +1, m);
+# define CONDNEG(x) (s*(x))
+#endif
+
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]);
+
+#undef CONDNEG
+}
+
 /*----- Multiplication ----------------------------------------------------*/
 
 #if FGOLDI_IMPL == 28
@@ -785,10 +876,93 @@ void fgoldi_inv(fgoldi *z, const fgoldi *x)
 #undef SQRN
 }
 
+/* --- @fgoldi_quosqrt@ --- *
+ *
+ * Arguments:	@fgoldi *z@ = where to put the result (may alias @x@ or @y@)
+ *		@const fgoldi *x, *y@ = two operands
+ *
+ * Returns:	Zero if successful, @-1@ if %$x/y$% is not a square.
+ *
+ * Use:		Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%.
+ *		If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x
+ *		\ne 0$% then the operation fails.  If you wanted a specific
+ *		square root then you'll have to pick it yourself.
+ */
+
+int fgoldi_quosqrt(fgoldi *z, const fgoldi *x, const fgoldi *y)
+{
+  fgoldi t, u, v;
+  octet xb[56], b0[56];
+  int32 rc = -1;
+  mask32 m;
+  unsigned i;
+
+#define SQRN(z, x, n) do {						\
+  fgoldi_sqr((z), (x));							\
+  for (i = 1; i < (n); i++) fgoldi_sqr((z), (z));			\
+} while (0)
+
+  /* This is, fortunately, significantly easier than the equivalent problem
+   * in GF(2^255 - 19), since p == 3 (mod 4).
+   *
+   * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has
+   * order r = (p - 1)/2.  Let w = v^{(p-3)/4}.  Then w^2 = v^{(p-3)/2} =
+   * u^{r-1} = 1/v = 1/x y.  Clearly, then, (x w)^2 = x^2/x y = x/y, so x w
+   * is one of the square roots we seek.
+   *
+   * The addition chain, then, is a prefix of the previous one.
+   */
+  fgoldi_mul(&v, x, y);
+
+  fgoldi_sqr(&u, &v);			/*    1 | 2 */
+  fgoldi_mul(&t, &u, &v);		/*    2 | 3 */
+  SQRN(&u, &t, 2);			/*    4 | 12 */
+  fgoldi_mul(&t, &u, &t);		/*    5 | 15 */
+  SQRN(&u, &t, 4);			/*    9 | 240 */
+  fgoldi_mul(&u, &u, &t);		/*   10 | 255 = 2^8 - 1 */
+  SQRN(&u, &u, 4);			/*   14 | 2^12 - 16 */
+  fgoldi_mul(&t, &u, &t);		/*   15 | 2^12 - 1 */
+  SQRN(&u, &t, 12);			/*   27 | 2^24 - 2^12 */
+  fgoldi_mul(&u, &u, &t);		/*   28 | 2^24 - 1 */
+  SQRN(&u, &u, 12);			/*   40 | 2^36 - 2^12 */
+  fgoldi_mul(&t, &u, &t);		/*   41 | 2^36 - 1 */
+  fgoldi_sqr(&t, &t);			/*   42 | 2^37 - 2 */
+  fgoldi_mul(&t, &t, &v);		/*   43 | 2^37 - 1 */
+  SQRN(&u, &t, 37);			/*   80 | 2^74 - 2^37 */
+  fgoldi_mul(&u, &u, &t);		/*   81 | 2^74 - 1 */
+  SQRN(&u, &u, 37);			/*  118 | 2^111 - 2^37 */
+  fgoldi_mul(&t, &u, &t);		/*  119 | 2^111 - 1 */
+  SQRN(&u, &t, 111);			/*  230 | 2^222 - 2^111 */
+  fgoldi_mul(&t, &u, &t);		/*  231 | 2^222 - 1 */
+  fgoldi_sqr(&u, &t);			/*  232 | 2^223 - 2 */
+  fgoldi_mul(&u, &u, &v);		/*  233 | 2^223 - 1 */
+  SQRN(&u, &u, 223);			/*  456 | 2^446 - 2^223 */
+  fgoldi_mul(&t, &u, &t);		/*  457 | 2^446 - 2^222 - 1 */
+
+#undef SQRN
+
+  /* Now we must decide whether the answer was right.  We should have z^2 =
+   * x/y, so y z^2 = x.
+   *
+   * The easiest way to compare is to encode.  This isn't as wasteful as it
+   * sounds: the hard part is normalizing the representations, which we have
+   * to do anyway.
+   */
+  fgoldi_mul(z, x, &t);
+  fgoldi_sqr(&t, z);
+  fgoldi_mul(&t, &t, y);
+  fgoldi_store(xb, x);
+  fgoldi_store(b0, &t);
+  m = -ct_memeq(xb, b0, 56);
+  rc = PICK2(0, rc, m);
+  return (rc);
+}
+
 /*----- Test rig ----------------------------------------------------------*/
 
 #ifdef TEST_RIG
 
+#include <mLib/macros.h>
 #include <mLib/report.h>
 #include <mLib/str.h>
 #include <mLib/testrig.h>
@@ -829,7 +1003,7 @@ static void dump_fgoldi_ref(dstr *d, FILE *fp)
 }
 
 static int eq(const fgoldi *x, dstr *d)
-  { octet b[56]; fgoldi_store(b, x); return (memcmp(b, d->buf, 56) == 0); }
+  { octet b[56]; fgoldi_store(b, x); return (MEMCMP(b, ==, d->buf, 56)); }
 
 static const test_type
   type_fgoldi = { cvt_fgoldi, dump_fgoldi },
@@ -857,6 +1031,7 @@ static const test_type
 
 TEST_UNOP(sqr)
 TEST_UNOP(inv)
+TEST_UNOP(neg)
 
 #define TEST_BINOP(op)							\
   static int vrf_##op(dstr dv[])					\
@@ -904,6 +1079,79 @@ static int vrf_mulc(dstr dv[])
   return (ok);
 }
 
+static int vrf_condneg(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf;
+  uint32 m = *(uint32 *)dv[1].buf;
+  fgoldi z;
+  int ok = 1;
+
+  fgoldi_condneg(&z, x, m);
+  if (!eq(&z, &dv[2])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[1].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  return (ok);
+}
+
+static int vrf_pick2(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
+  uint32 m = *(uint32 *)dv[2].buf;
+  fgoldi z;
+  int ok = 1;
+
+  fgoldi_pick2(&z, x, y, m);
+  if (!eq(&z, &dv[3])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fdump(stderr, "y", y->P);
+    fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[3].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  return (ok);
+}
+
+static int vrf_pickn(dstr dv[])
+{
+  dstr d = DSTR_INIT;
+  fgoldi v[32], z;
+  size_t i = *(uint32 *)dv[1].buf, j, n;
+  const char *p;
+  char *q;
+  int ok = 1;
+
+  for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++)
+    { cvt_fgoldi(p, &d); v[n] = *(fgoldi *)d.buf; }
+
+  fgoldi_pickn(&z, v, n, i);
+  if (!eq(&z, &dv[2])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    for (j = 0; j < n; j++) {
+      fprintf(stderr, "v[%2u]", (unsigned)j);
+      fdump(stderr, "", v[j].P);
+    }
+    fprintf(stderr, "i = %u\n", (unsigned)i);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[2].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  dstr_destroy(&d);
+  return (ok);
+}
+
 static int vrf_condswap(dstr dv[])
 {
   fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
@@ -929,6 +1177,35 @@ static int vrf_condswap(dstr dv[])
   return (ok);
 }
 
+static int vrf_quosqrt(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
+  fgoldi z, zz;
+  int rc;
+  int ok = 1;
+
+  if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); }
+  rc = fgoldi_quosqrt(&z, x, y);
+  if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fdump(stderr, "y", y->P);
+    if (rc) fprintf(stderr, "calc: FAIL\n");
+    else fdump(stderr, "calc", z.P);
+    if (!dv[2].len)
+      fprintf(stderr, "exp: FAIL\n");
+    else {
+      fgoldi_load(&zz, (const octet *)dv[2].buf);
+      fdump(stderr, "z", zz.P);
+      fgoldi_load(&zz, (const octet *)dv[3].buf);
+      fdump(stderr, "z'", zz.P);
+    }
+  }
+
+  return (ok);
+}
+
 static int vrf_sub_mulc_add_sub_mul(dstr dv[])
 {
   fgoldi *u = (fgoldi *)dv[0].buf, *v = (fgoldi *)dv[1].buf,
@@ -966,13 +1243,22 @@ static int vrf_sub_mulc_add_sub_mul(dstr dv[])
 static test_chunk tests[] = {
   { "add", vrf_add, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
   { "sub", vrf_sub, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
+  { "neg", vrf_neg, { &type_fgoldi, &type_fgoldi_ref } },
+  { "condneg", vrf_condneg,
+    { &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
   { "mul", vrf_mul, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
   { "mulconst", vrf_mulc, { &type_fgoldi, &type_long, &type_fgoldi_ref } },
+  { "pick2", vrf_pick2,
+    { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
+  { "pickn", vrf_pickn,
+    { &type_string, &type_uint32, &type_fgoldi_ref } },
   { "condswap", vrf_condswap,
     { &type_fgoldi, &type_fgoldi, &type_uint32,
       &type_fgoldi_ref, &type_fgoldi_ref } },
   { "sqr", vrf_sqr, { &type_fgoldi, &type_fgoldi_ref } },
   { "inv", vrf_inv, { &type_fgoldi, &type_fgoldi_ref } },
+  { "quosqrt", vrf_quosqrt,
+    { &type_fgoldi, &type_fgoldi, &type_hex, &type_hex } },
   { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul,
     { &type_fgoldi, &type_fgoldi, &type_long, &type_fgoldi,
       &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },