X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/f521d4c7a97076db34681c598d7965c7d05713b0..HEAD:/math/fgoldi.c diff --git a/math/fgoldi.c b/math/fgoldi.c index afc43329..43309dcd 100644 --- a/math/fgoldi.c +++ b/math/fgoldi.c @@ -29,6 +29,7 @@ #include "config.h" +#include "ct.h" #include "fgoldi.h" /*----- Basic setup -------------------------------------------------------* @@ -50,10 +51,8 @@ typedef uint32 upiece; typedef uint64 udblpiece; #define NPIECE 16 #define P p28 -#define B28 0x10000000u #define B27 0x08000000u #define M28 0x0fffffffu -#define M27 0x07ffffffu #define M32 0xffffffffu #elif FGOLDI_IMPL == 12 @@ -69,12 +68,10 @@ typedef uint16 upiece; typedef uint32 udblpiece; #define NPIECE 40 #define P p12 -#define B12 0x1000u #define B11 0x0800u #define B10 0x0400u #define M12 0xfffu #define M11 0x7ffu -#define M10 0x3ffu #define M8 0xffu #endif @@ -399,8 +396,72 @@ void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y) for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i]; } +/* --- @fgoldi_neg@ --- * + * + * Arguments: @fgoldi *z@ = where to put the result (may alias @x@) + * @const fgoldi *x@ = an operand + * + * Returns: --- + * + * Use: Set @z = -x@. + */ + +void fgoldi_neg(fgoldi *z, const fgoldi *x) +{ + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i]; +} + /*----- Constant-time utilities -------------------------------------------*/ +/* --- @fgoldi_pick2@ --- * + * + * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) + * @const fgoldi *x, *y@ = two operands + * @uint32 m@ = a mask + * + * Returns: --- + * + * Use: If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set + * @z = x@. If @m@ has some other value, then scramble @z@ in + * an unhelpful way. + */ + +void fgoldi_pick2(fgoldi *z, const fgoldi *x, const fgoldi *y, uint32 m) +{ + mask32 mm = FIX_MASK32(m); + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm); +} + +/* --- @fgoldi_pickn@ --- * + * + * Arguments: @fgoldi *z@ = where to put the result + * @const fgoldi *v@ = a table of entries + * @size_t n@ = the number of entries in @v@ + * @size_t i@ = an index + * + * Returns: --- + * + * Use: If @0 <= i < n < 32@ then set @z = v[i]@. If @n >= 32@ then + * do something unhelpful; otherwise, if @i >= n@ then set @z@ + * to zero. + */ + +void fgoldi_pickn(fgoldi *z, const fgoldi *v, size_t n, size_t i) +{ + uint32 b = (uint32)1 << (31 - i); + mask32 m; + unsigned j; + + for (j = 0; j < NPIECE; j++) z->P[j] = 0; + while (n--) { + m = SIGN(b); + for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m); + v++; b <<= 1; + } +} + /* --- @fgoldi_condswap@ --- * * * Arguments: @fgoldi *x, *y@ = two operands @@ -421,6 +482,36 @@ void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m) for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm); } +/* --- @fgoldi_condneg@ --- * + * + * Arguments: @fgoldi *z@ = where to put the result (may alias @x@) + * @const fgoldi *x@ = an operand + * @uint32 m@ = a mask + * + * Returns: --- + * + * Use: If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set + * @z = -x@. If @m@ has some other value then scramble @z@ in + * an unhelpful way. + */ + +void fgoldi_condneg(fgoldi *z, const fgoldi *x, uint32 m) +{ +#ifdef NEG_TWOC + mask32 m_xor = FIX_MASK32(m); + piece m_add = m&1; +# define CONDNEG(x) (((x) ^ m_xor) + m_add) +#else + int s = PICK2(-1, +1, m); +# define CONDNEG(x) (s*(x)) +#endif + + unsigned i; + for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]); + +#undef CONDNEG +} + /*----- Multiplication ----------------------------------------------------*/ #if FGOLDI_IMPL == 28 @@ -785,10 +876,93 @@ void fgoldi_inv(fgoldi *z, const fgoldi *x) #undef SQRN } +/* --- @fgoldi_quosqrt@ --- * + * + * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@) + * @const fgoldi *x, *y@ = two operands + * + * Returns: Zero if successful, @-1@ if %$x/y$% is not a square. + * + * Use: Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%. + * If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x + * \ne 0$% then the operation fails. If you wanted a specific + * square root then you'll have to pick it yourself. + */ + +int fgoldi_quosqrt(fgoldi *z, const fgoldi *x, const fgoldi *y) +{ + fgoldi t, u, v; + octet xb[56], b0[56]; + int32 rc = -1; + mask32 m; + unsigned i; + +#define SQRN(z, x, n) do { \ + fgoldi_sqr((z), (x)); \ + for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \ +} while (0) + + /* This is, fortunately, significantly easier than the equivalent problem + * in GF(2^255 - 19), since p == 3 (mod 4). + * + * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has + * order r = (p - 1)/2. Let w = v^{(p-3)/4}. Then w^2 = v^{(p-3)/2} = + * u^{r-1} = 1/v = 1/x y. Clearly, then, (x w)^2 = x^2/x y = x/y, so x w + * is one of the square roots we seek. + * + * The addition chain, then, is a prefix of the previous one. + */ + fgoldi_mul(&v, x, y); + + fgoldi_sqr(&u, &v); /* 1 | 2 */ + fgoldi_mul(&t, &u, &v); /* 2 | 3 */ + SQRN(&u, &t, 2); /* 4 | 12 */ + fgoldi_mul(&t, &u, &t); /* 5 | 15 */ + SQRN(&u, &t, 4); /* 9 | 240 */ + fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */ + SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */ + fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */ + SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */ + fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */ + SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */ + fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */ + fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */ + fgoldi_mul(&t, &t, &v); /* 43 | 2^37 - 1 */ + SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */ + fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */ + SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */ + fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */ + SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */ + fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */ + fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */ + fgoldi_mul(&u, &u, &v); /* 233 | 2^223 - 1 */ + SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */ + fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */ + +#undef SQRN + + /* Now we must decide whether the answer was right. We should have z^2 = + * x/y, so y z^2 = x. + * + * The easiest way to compare is to encode. This isn't as wasteful as it + * sounds: the hard part is normalizing the representations, which we have + * to do anyway. + */ + fgoldi_mul(z, x, &t); + fgoldi_sqr(&t, z); + fgoldi_mul(&t, &t, y); + fgoldi_store(xb, x); + fgoldi_store(b0, &t); + m = -ct_memeq(xb, b0, 56); + rc = PICK2(0, rc, m); + return (rc); +} + /*----- Test rig ----------------------------------------------------------*/ #ifdef TEST_RIG +#include #include #include #include @@ -829,7 +1003,7 @@ static void dump_fgoldi_ref(dstr *d, FILE *fp) } static int eq(const fgoldi *x, dstr *d) - { octet b[56]; fgoldi_store(b, x); return (memcmp(b, d->buf, 56) == 0); } + { octet b[56]; fgoldi_store(b, x); return (MEMCMP(b, ==, d->buf, 56)); } static const test_type type_fgoldi = { cvt_fgoldi, dump_fgoldi }, @@ -857,6 +1031,7 @@ static const test_type TEST_UNOP(sqr) TEST_UNOP(inv) +TEST_UNOP(neg) #define TEST_BINOP(op) \ static int vrf_##op(dstr dv[]) \ @@ -904,6 +1079,79 @@ static int vrf_mulc(dstr dv[]) return (ok); } +static int vrf_condneg(dstr dv[]) +{ + fgoldi *x = (fgoldi *)dv[0].buf; + uint32 m = *(uint32 *)dv[1].buf; + fgoldi z; + int ok = 1; + + fgoldi_condneg(&z, x, m); + if (!eq(&z, &dv[2])) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m); + fdump(stderr, "calc z", z.P); + fgoldi_load(&z, (const octet *)dv[1].buf); + fdump(stderr, "want z", z.P); + } + + return (ok); +} + +static int vrf_pick2(dstr dv[]) +{ + fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; + uint32 m = *(uint32 *)dv[2].buf; + fgoldi z; + int ok = 1; + + fgoldi_pick2(&z, x, y, m); + if (!eq(&z, &dv[3])) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fdump(stderr, "y", y->P); + fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m); + fdump(stderr, "calc z", z.P); + fgoldi_load(&z, (const octet *)dv[3].buf); + fdump(stderr, "want z", z.P); + } + + return (ok); +} + +static int vrf_pickn(dstr dv[]) +{ + dstr d = DSTR_INIT; + fgoldi v[32], z; + size_t i = *(uint32 *)dv[1].buf, j, n; + const char *p; + char *q; + int ok = 1; + + for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++) + { cvt_fgoldi(p, &d); v[n] = *(fgoldi *)d.buf; } + + fgoldi_pickn(&z, v, n, i); + if (!eq(&z, &dv[2])) { + ok = 0; + fprintf(stderr, "failed!\n"); + for (j = 0; j < n; j++) { + fprintf(stderr, "v[%2u]", (unsigned)j); + fdump(stderr, "", v[j].P); + } + fprintf(stderr, "i = %u\n", (unsigned)i); + fdump(stderr, "calc z", z.P); + fgoldi_load(&z, (const octet *)dv[2].buf); + fdump(stderr, "want z", z.P); + } + + dstr_destroy(&d); + return (ok); +} + static int vrf_condswap(dstr dv[]) { fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; @@ -929,6 +1177,35 @@ static int vrf_condswap(dstr dv[]) return (ok); } +static int vrf_quosqrt(dstr dv[]) +{ + fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; + fgoldi z, zz; + int rc; + int ok = 1; + + if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); } + rc = fgoldi_quosqrt(&z, x, y); + if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) { + ok = 0; + fprintf(stderr, "failed!\n"); + fdump(stderr, "x", x->P); + fdump(stderr, "y", y->P); + if (rc) fprintf(stderr, "calc: FAIL\n"); + else fdump(stderr, "calc", z.P); + if (!dv[2].len) + fprintf(stderr, "exp: FAIL\n"); + else { + fgoldi_load(&zz, (const octet *)dv[2].buf); + fdump(stderr, "z", zz.P); + fgoldi_load(&zz, (const octet *)dv[3].buf); + fdump(stderr, "z'", zz.P); + } + } + + return (ok); +} + static int vrf_sub_mulc_add_sub_mul(dstr dv[]) { fgoldi *u = (fgoldi *)dv[0].buf, *v = (fgoldi *)dv[1].buf, @@ -966,13 +1243,22 @@ static int vrf_sub_mulc_add_sub_mul(dstr dv[]) static test_chunk tests[] = { { "add", vrf_add, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } }, { "sub", vrf_sub, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } }, + { "neg", vrf_neg, { &type_fgoldi, &type_fgoldi_ref } }, + { "condneg", vrf_condneg, + { &type_fgoldi, &type_uint32, &type_fgoldi_ref } }, { "mul", vrf_mul, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } }, { "mulconst", vrf_mulc, { &type_fgoldi, &type_long, &type_fgoldi_ref } }, + { "pick2", vrf_pick2, + { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref } }, + { "pickn", vrf_pickn, + { &type_string, &type_uint32, &type_fgoldi_ref } }, { "condswap", vrf_condswap, { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref, &type_fgoldi_ref } }, { "sqr", vrf_sqr, { &type_fgoldi, &type_fgoldi_ref } }, { "inv", vrf_inv, { &type_fgoldi, &type_fgoldi_ref } }, + { "quosqrt", vrf_quosqrt, + { &type_fgoldi, &type_fgoldi, &type_hex, &type_hex } }, { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul, { &type_fgoldi, &type_fgoldi, &type_long, &type_fgoldi, &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },