progs/perftest.c: Use from Glibc syscall numbers.

[catacomb] / math / fgoldi.c
diff --git a/math/fgoldi.c b/math/fgoldi.c

index 5b2889f..43309dc 100644 (file)
--- a/math/fgoldi.c
+++ b/math/fgoldi.c
@@ -29,6 +29,7 @@
  
  #include "config.h"
  
+#include "ct.h"
  #include "fgoldi.h"
  
  /*----- Basic setup -------------------------------------------------------*
@@ -37,21 +38,21 @@
   * (hence the name).
   */
  
+typedef fgoldi_piece piece;
+
  #if FGOLDI_IMPL == 28
  /* We represent an element of GF(p) as 16 28-bit signed integer pieces x_i:
   * x = SUM_{0<=i<16} x_i 2^(28i).
   */
  
-typedef  int32  piece; typedef  int64  dblpiece;
+                      typedef  int64  dblpiece;
  typedef uint32 upiece; typedef uint64 udblpiece;
  #define PIECEWD(i) 28
  #define NPIECE 16
  #define P p28
  
-#define B28 0x10000000u
  #define B27 0x08000000u
  #define M28 0x0fffffffu
-#define M27 0x07ffffffu
  #define M32 0xffffffffu
  
  #elif FGOLDI_IMPL == 12
@@ -61,18 +62,16 @@ typedef uint32 upiece; typedef uint64 udblpiece;
   * bits.
   */
  
-typedef  int16  piece; typedef  int32  dblpiece;
+                      typedef  int32  dblpiece;
  typedef uint16 upiece; typedef uint32 udblpiece;
  #define PIECEWD(i) ((i)%5 ? 11 : 12)
  #define NPIECE 40
  #define P p12
  
-#define B12 0x1000u
  #define B11 0x0800u
  #define B10 0x0400u
  #define M12 0xfffu
  #define M11 0x7ffu
-#define M10 0x3ffu
  #define M8 0xffu
  
  #endif
@@ -397,8 +396,72 @@ void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y)
    for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i];
  }
  
+/* --- @fgoldi_neg@ --- *
+ *
+ * Arguments:  @fgoldi *z@ = where to put the result (may alias @x@)
+ *             @const fgoldi *x@ = an operand
+ *
+ * Returns:    ---
+ *
+ * Use:                Set @z = -x@.
+ */
+
+void fgoldi_neg(fgoldi *z, const fgoldi *x)
+{
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i];
+}
+
  /*----- Constant-time utilities -------------------------------------------*/
  
+/* --- @fgoldi_pick2@ --- *
+ *
+ * Arguments:  @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
+ *             @const fgoldi *x, *y@ = two operands
+ *             @uint32 m@ = a mask
+ *
+ * Returns:    ---
+ *
+ * Use:                If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set
+ *             @z = x@.  If @m@ has some other value, then scramble @z@ in
+ *             an unhelpful way.
+ */
+
+void fgoldi_pick2(fgoldi *z, const fgoldi *x, const fgoldi *y, uint32 m)
+{
+  mask32 mm = FIX_MASK32(m);
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm);
+}
+
+/* --- @fgoldi_pickn@ --- *
+ *
+ * Arguments:  @fgoldi *z@ = where to put the result
+ *             @const fgoldi *v@ = a table of entries
+ *             @size_t n@ = the number of entries in @v@
+ *             @size_t i@ = an index
+ *
+ * Returns:    ---
+ *
+ * Use:                If @0 <= i < n < 32@ then set @z = v[i]@.  If @n >= 32@ then
+ *             do something unhelpful; otherwise, if @i >= n@ then set @z@
+ *             to zero.
+ */
+
+void fgoldi_pickn(fgoldi *z, const fgoldi *v, size_t n, size_t i)
+{
+  uint32 b = (uint32)1 << (31 - i);
+  mask32 m;
+  unsigned j;
+
+  for (j = 0; j < NPIECE; j++) z->P[j] = 0;
+  while (n--) {
+    m = SIGN(b);
+    for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m);
+    v++; b <<= 1;
+  }
+}
+
  /* --- @fgoldi_condswap@ --- *
   *
   * Arguments:  @fgoldi *x, *y@ = two operands
@@ -419,6 +482,36 @@ void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m)
    for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm);
  }
  
+/* --- @fgoldi_condneg@ --- *
+ *
+ * Arguments:  @fgoldi *z@ = where to put the result (may alias @x@)
+ *             @const fgoldi *x@ = an operand
+ *             @uint32 m@ = a mask
+ *
+ * Returns:    ---
+ *
+ * Use:                If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set
+ *             @z = -x@.  If @m@ has some other value then scramble @z@ in
+ *             an unhelpful way.
+ */
+
+void fgoldi_condneg(fgoldi *z, const fgoldi *x, uint32 m)
+{
+#ifdef NEG_TWOC
+  mask32 m_xor = FIX_MASK32(m);
+  piece m_add = m&1;
+# define CONDNEG(x) (((x) ^ m_xor) + m_add)
+#else
+  int s = PICK2(-1, +1, m);
+# define CONDNEG(x) (s*(x))
+#endif
+
+  unsigned i;
+  for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]);
+
+#undef CONDNEG
+}
+
  /*----- Multiplication ----------------------------------------------------*/
  
  #if FGOLDI_IMPL == 28
@@ -783,10 +876,93 @@ void fgoldi_inv(fgoldi *z, const fgoldi *x)
  #undef SQRN
  }
  
+/* --- @fgoldi_quosqrt@ --- *
+ *
+ * Arguments:  @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
+ *             @const fgoldi *x, *y@ = two operands
+ *
+ * Returns:    Zero if successful, @-1@ if %$x/y$% is not a square.
+ *
+ * Use:                Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%.
+ *             If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x
+ *             \ne 0$% then the operation fails.  If you wanted a specific
+ *             square root then you'll have to pick it yourself.
+ */
+
+int fgoldi_quosqrt(fgoldi *z, const fgoldi *x, const fgoldi *y)
+{
+  fgoldi t, u, v;
+  octet xb[56], b0[56];
+  int32 rc = -1;
+  mask32 m;
+  unsigned i;
+
+#define SQRN(z, x, n) do {                                             \
+  fgoldi_sqr((z), (x));                                                        \
+  for (i = 1; i < (n); i++) fgoldi_sqr((z), (z));                      \
+} while (0)
+
+  /* This is, fortunately, significantly easier than the equivalent problem
+   * in GF(2^255 - 19), since p == 3 (mod 4).
+   *
+   * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has
+   * order r = (p - 1)/2.  Let w = v^{(p-3)/4}.  Then w^2 = v^{(p-3)/2} =
+   * u^{r-1} = 1/v = 1/x y.  Clearly, then, (x w)^2 = x^2/x y = x/y, so x w
+   * is one of the square roots we seek.
+   *
+   * The addition chain, then, is a prefix of the previous one.
+   */
+  fgoldi_mul(&v, x, y);
+
+  fgoldi_sqr(&u, &v);                  /*    1 | 2 */
+  fgoldi_mul(&t, &u, &v);              /*    2 | 3 */
+  SQRN(&u, &t, 2);                     /*    4 | 12 */
+  fgoldi_mul(&t, &u, &t);              /*    5 | 15 */
+  SQRN(&u, &t, 4);                     /*    9 | 240 */
+  fgoldi_mul(&u, &u, &t);              /*   10 | 255 = 2^8 - 1 */
+  SQRN(&u, &u, 4);                     /*   14 | 2^12 - 16 */
+  fgoldi_mul(&t, &u, &t);              /*   15 | 2^12 - 1 */
+  SQRN(&u, &t, 12);                    /*   27 | 2^24 - 2^12 */
+  fgoldi_mul(&u, &u, &t);              /*   28 | 2^24 - 1 */
+  SQRN(&u, &u, 12);                    /*   40 | 2^36 - 2^12 */
+  fgoldi_mul(&t, &u, &t);              /*   41 | 2^36 - 1 */
+  fgoldi_sqr(&t, &t);                  /*   42 | 2^37 - 2 */
+  fgoldi_mul(&t, &t, &v);              /*   43 | 2^37 - 1 */
+  SQRN(&u, &t, 37);                    /*   80 | 2^74 - 2^37 */
+  fgoldi_mul(&u, &u, &t);              /*   81 | 2^74 - 1 */
+  SQRN(&u, &u, 37);                    /*  118 | 2^111 - 2^37 */
+  fgoldi_mul(&t, &u, &t);              /*  119 | 2^111 - 1 */
+  SQRN(&u, &t, 111);                   /*  230 | 2^222 - 2^111 */
+  fgoldi_mul(&t, &u, &t);              /*  231 | 2^222 - 1 */
+  fgoldi_sqr(&u, &t);                  /*  232 | 2^223 - 2 */
+  fgoldi_mul(&u, &u, &v);              /*  233 | 2^223 - 1 */
+  SQRN(&u, &u, 223);                   /*  456 | 2^446 - 2^223 */
+  fgoldi_mul(&t, &u, &t);              /*  457 | 2^446 - 2^222 - 1 */
+
+#undef SQRN
+
+  /* Now we must decide whether the answer was right.  We should have z^2 =
+   * x/y, so y z^2 = x.
+   *
+   * The easiest way to compare is to encode.  This isn't as wasteful as it
+   * sounds: the hard part is normalizing the representations, which we have
+   * to do anyway.
+   */
+  fgoldi_mul(z, x, &t);
+  fgoldi_sqr(&t, z);
+  fgoldi_mul(&t, &t, y);
+  fgoldi_store(xb, x);
+  fgoldi_store(b0, &t);
+  m = -ct_memeq(xb, b0, 56);
+  rc = PICK2(0, rc, m);
+  return (rc);
+}
+
  /*----- Test rig ----------------------------------------------------------*/
  
  #ifdef TEST_RIG
  
+#include <mLib/macros.h>
  #include <mLib/report.h>
  #include <mLib/str.h>
  #include <mLib/testrig.h>
@@ -827,7 +1003,7 @@ static void dump_fgoldi_ref(dstr *d, FILE *fp)
  }
  
  static int eq(const fgoldi *x, dstr *d)
-  { octet b[56]; fgoldi_store(b, x); return (memcmp(b, d->buf, 56) == 0); }
+  { octet b[56]; fgoldi_store(b, x); return (MEMCMP(b, ==, d->buf, 56)); }
  
  static const test_type
    type_fgoldi = { cvt_fgoldi, dump_fgoldi },
@@ -855,6 +1031,7 @@ static const test_type
  
  TEST_UNOP(sqr)
  TEST_UNOP(inv)
+TEST_UNOP(neg)
  
  #define TEST_BINOP(op)                                                 \
    static int vrf_##op(dstr dv[])                                       \
@@ -902,6 +1079,79 @@ static int vrf_mulc(dstr dv[])
    return (ok);
  }
  
+static int vrf_condneg(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf;
+  uint32 m = *(uint32 *)dv[1].buf;
+  fgoldi z;
+  int ok = 1;
+
+  fgoldi_condneg(&z, x, m);
+  if (!eq(&z, &dv[2])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[1].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  return (ok);
+}
+
+static int vrf_pick2(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
+  uint32 m = *(uint32 *)dv[2].buf;
+  fgoldi z;
+  int ok = 1;
+
+  fgoldi_pick2(&z, x, y, m);
+  if (!eq(&z, &dv[3])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fdump(stderr, "y", y->P);
+    fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[3].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  return (ok);
+}
+
+static int vrf_pickn(dstr dv[])
+{
+  dstr d = DSTR_INIT;
+  fgoldi v[32], z;
+  size_t i = *(uint32 *)dv[1].buf, j, n;
+  const char *p;
+  char *q;
+  int ok = 1;
+
+  for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++)
+    { cvt_fgoldi(p, &d); v[n] = *(fgoldi *)d.buf; }
+
+  fgoldi_pickn(&z, v, n, i);
+  if (!eq(&z, &dv[2])) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    for (j = 0; j < n; j++) {
+      fprintf(stderr, "v[%2u]", (unsigned)j);
+      fdump(stderr, "", v[j].P);
+    }
+    fprintf(stderr, "i = %u\n", (unsigned)i);
+    fdump(stderr, "calc z", z.P);
+    fgoldi_load(&z, (const octet *)dv[2].buf);
+    fdump(stderr, "want z", z.P);
+  }
+
+  dstr_destroy(&d);
+  return (ok);
+}
+
  static int vrf_condswap(dstr dv[])
  {
    fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
@@ -927,6 +1177,35 @@ static int vrf_condswap(dstr dv[])
    return (ok);
  }
  
+static int vrf_quosqrt(dstr dv[])
+{
+  fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
+  fgoldi z, zz;
+  int rc;
+  int ok = 1;
+
+  if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); }
+  rc = fgoldi_quosqrt(&z, x, y);
+  if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) {
+    ok = 0;
+    fprintf(stderr, "failed!\n");
+    fdump(stderr, "x", x->P);
+    fdump(stderr, "y", y->P);
+    if (rc) fprintf(stderr, "calc: FAIL\n");
+    else fdump(stderr, "calc", z.P);
+    if (!dv[2].len)
+      fprintf(stderr, "exp: FAIL\n");
+    else {
+      fgoldi_load(&zz, (const octet *)dv[2].buf);
+      fdump(stderr, "z", zz.P);
+      fgoldi_load(&zz, (const octet *)dv[3].buf);
+      fdump(stderr, "z'", zz.P);
+    }
+  }
+
+  return (ok);
+}
+
  static int vrf_sub_mulc_add_sub_mul(dstr dv[])
  {
    fgoldi *u = (fgoldi *)dv[0].buf, *v = (fgoldi *)dv[1].buf,
@@ -964,13 +1243,22 @@ static int vrf_sub_mulc_add_sub_mul(dstr dv[])
  static test_chunk tests[] = {
    { "add", vrf_add, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
    { "sub", vrf_sub, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
+  { "neg", vrf_neg, { &type_fgoldi, &type_fgoldi_ref } },
+  { "condneg", vrf_condneg,
+    { &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
    { "mul", vrf_mul, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
    { "mulconst", vrf_mulc, { &type_fgoldi, &type_long, &type_fgoldi_ref } },
+  { "pick2", vrf_pick2,
+    { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
+  { "pickn", vrf_pickn,
+    { &type_string, &type_uint32, &type_fgoldi_ref } },
    { "condswap", vrf_condswap,
      { &type_fgoldi, &type_fgoldi, &type_uint32,
        &type_fgoldi_ref, &type_fgoldi_ref } },
    { "sqr", vrf_sqr, { &type_fgoldi, &type_fgoldi_ref } },
    { "inv", vrf_inv, { &type_fgoldi, &type_fgoldi_ref } },
+  { "quosqrt", vrf_quosqrt,
+    { &type_fgoldi, &type_fgoldi, &type_hex, &type_hex } },
    { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul,
      { &type_fgoldi, &type_fgoldi, &type_long, &type_fgoldi,
        &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },