progs/perftest.c: Use from Glibc syscall numbers.
[catacomb] / math / fgoldi.c
CommitLineData
266efb73
MW
1/* -*-c-*-
2 *
3 * Arithmetic in the Goldilocks field GF(2^448 - 2^224 - 1)
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include "config.h"
31
1bc00e2a 32#include "ct.h"
266efb73
MW
33#include "fgoldi.h"
34
35/*----- Basic setup -------------------------------------------------------*
36 *
37 * Let φ = 2^224; then p = φ^2 - φ - 1, and, in GF(p), we have φ^2 = φ + 1
38 * (hence the name).
39 */
40
f521d4c7
MW
41typedef fgoldi_piece piece;
42
266efb73
MW
43#if FGOLDI_IMPL == 28
44/* We represent an element of GF(p) as 16 28-bit signed integer pieces x_i:
45 * x = SUM_{0<=i<16} x_i 2^(28i).
46 */
47
f521d4c7 48 typedef int64 dblpiece;
266efb73
MW
49typedef uint32 upiece; typedef uint64 udblpiece;
50#define PIECEWD(i) 28
51#define NPIECE 16
52#define P p28
53
266efb73
MW
54#define B27 0x08000000u
55#define M28 0x0fffffffu
266efb73
MW
56#define M32 0xffffffffu
57
58#elif FGOLDI_IMPL == 12
59/* We represent an element of GF(p) as 40 signed integer pieces x_i: x =
60 * SUM_{0<=i<40} x_i 2^ceil(224i/20). Pieces i with i == 0 (mod 5) are 12
61 * bits wide; the others are 11 bits wide, so they form eight groups of 56
62 * bits.
63 */
64
f521d4c7 65 typedef int32 dblpiece;
266efb73
MW
66typedef uint16 upiece; typedef uint32 udblpiece;
67#define PIECEWD(i) ((i)%5 ? 11 : 12)
68#define NPIECE 40
69#define P p12
70
266efb73
MW
71#define B11 0x0800u
72#define B10 0x0400u
73#define M12 0xfffu
74#define M11 0x7ffu
266efb73
MW
75#define M8 0xffu
76
77#endif
78
79/*----- Debugging machinery -----------------------------------------------*/
80
81#if defined(FGOLDI_DEBUG) || defined(TEST_RIG)
82
83#include <stdio.h>
84
85#include "mp.h"
86#include "mptext.h"
87
88static mp *get_pgoldi(void)
89{
90 mp *p = MP_NEW, *t = MP_NEW;
91
92 p = mp_setbit(p, MP_ZERO, 448);
93 t = mp_setbit(t, MP_ZERO, 224);
94 p = mp_sub(p, p, t);
95 p = mp_sub(p, p, MP_ONE);
96 mp_drop(t);
97 return (p);
98}
99
100DEF_FDUMP(fdump, piece, PIECEWD, NPIECE, 56, get_pgoldi())
101
102#endif
103
104/*----- Loading and storing -----------------------------------------------*/
105
106/* --- @fgoldi_load@ --- *
107 *
108 * Arguments: @fgoldi *z@ = where to store the result
109 * @const octet xv[56]@ = source to read
110 *
111 * Returns: ---
112 *
113 * Use: Reads an element of %$\gf{2^{448} - 2^{224} - 19}$% in
114 * external representation from @xv@ and stores it in @z@.
115 *
116 * External representation is little-endian base-256. Some
117 * elements have multiple encodings, which are not produced by
118 * correct software; use of noncanonical encodings is not an
119 * error, and toleration of them is considered a performance
120 * feature.
121 */
122
123void fgoldi_load(fgoldi *z, const octet xv[56])
124{
125#if FGOLDI_IMPL == 28
126
127 unsigned i;
128 uint32 xw[14];
129 piece b, c;
130
131 /* First, read the input value as words. */
132 for (i = 0; i < 14; i++) xw[i] = LOAD32_L(xv + 4*i);
133
134 /* Extract unsigned 28-bit pieces from the words. */
135 z->P[ 0] = (xw[ 0] >> 0)&M28;
136 z->P[ 7] = (xw[ 6] >> 4)&M28;
137 z->P[ 8] = (xw[ 7] >> 0)&M28;
138 z->P[15] = (xw[13] >> 4)&M28;
139 for (i = 1; i < 7; i++) {
140 z->P[i + 0] = ((xw[i + 0] << (4*i)) | (xw[i - 1] >> (32 - 4*i)))&M28;
141 z->P[i + 8] = ((xw[i + 7] << (4*i)) | (xw[i + 6] >> (32 - 4*i)))&M28;
142 }
143
144 /* Convert the nonnegative pieces into a balanced signed representation, so
145 * each piece ends up in the interval |z_i| <= 2^27. For each piece, if
146 * its top bit is set, lend a bit leftwards; in the case of z_15, reduce
147 * this bit by adding it onto z_0 and z_8, since this is the φ^2 bit, and
148 * φ^2 = φ + 1. We delay this carry until after all of the pieces have
149 * been balanced. If we don't do this, then we have to do a more expensive
150 * test for nonzeroness to decide whether to lend a bit leftwards rather
151 * than just testing a single bit.
152 *
153 * Note that we don't try for a canonical representation here: both upper
154 * and lower bounds are achievable.
155 */
156 b = z->P[15]&B27; z->P[15] -= b << 1; c = b >> 27;
157 for (i = NPIECE - 1; i--; )
158 { b = z->P[i]&B27; z->P[i] -= b << 1; z->P[i + 1] += b >> 27; }
159 z->P[0] += c; z->P[8] += c;
160
161#elif FGOLDI_IMPL == 12
162
163 unsigned i, j, n, w, b;
164 uint32 a;
165 int c;
166
167 /* First, convert the bytes into nonnegative pieces. */
168 for (i = j = a = n = 0, w = PIECEWD(0); i < 56; i++) {
169 a |= (uint32)xv[i] << n; n += 8;
170 if (n >= w) {
171 z->P[j++] = a&MASK(w);
172 a >>= w; n -= w; w = PIECEWD(j);
173 }
174 }
175
176 /* Convert the nonnegative pieces into a balanced signed representation, so
177 * each piece ends up in the interval |z_i| <= 2^11 + 1.
178 */
179 b = z->P[39]&B10; z->P[39] -= b << 1; c = b >> 10;
180 for (i = NPIECE - 1; i--; ) {
181 w = PIECEWD(i) - 1;
182 b = z->P[i]&BIT(w);
183 z->P[i] -= b << 1;
184 z->P[i + 1] += b >> w;
185 }
186 z->P[0] += c; z->P[20] += c;
187
188#endif
189}
190
191/* --- @fgoldi_store@ --- *
192 *
193 * Arguments: @octet zv[56]@ = where to write the result
194 * @const fgoldi *x@ = the field element to write
195 *
196 * Returns: ---
197 *
198 * Use: Stores a field element in the given octet vector in external
199 * representation. A canonical encoding is always stored.
200 */
201
202void fgoldi_store(octet zv[56], const fgoldi *x)
203{
204#if FGOLDI_IMPL == 28
205
206 piece y[NPIECE], yy[NPIECE], c, d;
207 uint32 u, v;
208 mask32 m;
209 unsigned i;
210
211 for (i = 0; i < NPIECE; i++) y[i] = x->P[i];
212
213 /* First, propagate the carries. By the end of this, we'll have all of the
214 * the pieces canonically sized and positive, and maybe there'll be
215 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
216 * value will be in the half-open interval [0, φ^2). The whole represented
217 * value is then y + φ^2 c.
218 *
219 * Assume that we start out with |y_i| <= 2^30. We start off by cutting
220 * off and reducing the carry c_15 from the topmost piece, y_15. This
221 * leaves 0 <= y_15 < 2^28; and we'll have |c_15| <= 4. We'll add this
222 * onto y_0 and y_8, and propagate the carries. It's very clear that we'll
223 * end up with |y + (φ + 1) c_15 - φ^2/2| << φ^2.
224 *
225 * Here, the y_i are signed, so we must be cautious about bithacking them.
226 */
227 c = ASR(piece, y[15], 28); y[15] = (upiece)y[15]&M28; y[8] += c;
228 for (i = 0; i < NPIECE; i++)
229 { y[i] += c; c = ASR(piece, y[i], 28); y[i] = (upiece)y[i]&M28; }
230
231 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
232 * y >= p, then we should subtract p from the whole value; if c = -1 then
233 * we should add p; and otherwise we should do nothing.
234 *
235 * But conditional behaviour is bad, m'kay. So here's what we do instead.
236 *
237 * The first job is to sort out what we wanted to do. If c = -1 then we
238 * want to (a) invert the constant addend and (b) feed in a carry-in;
239 * otherwise, we don't.
240 */
241 m = SIGN(c)&M28;
242 d = m&1;
243
244 /* Now do the addition/subtraction. Remember that all of the y_i are
245 * nonnegative, so shifting and masking are safe and easy.
246 */
247 d += y[0] + (1 ^ m); yy[0] = d&M28; d >>= 28;
248 for (i = 1; i < 8; i++)
249 { d += y[i] + m; yy[i] = d&M28; d >>= 28; }
250 d += y[8] + (1 ^ m); yy[8] = d&M28; d >>= 28;
251 for (i = 9; i < 16; i++)
252 { d += y[i] + m; yy[i] = d&M28; d >>= 28; }
253
254 /* The final carry-out is in d; since we only did addition, and the y_i are
255 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
256 * if (a) c /= 0 (in which case we know that the old value was
257 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
258 * the subtraction didn't cause a borrow, so we must be in the case where
259 * p <= y < φ^2.
260 */
261 m = NONZEROP(c) | ~NONZEROP(d - 1);
262 for (i = 0; i < NPIECE; i++) y[i] = (yy[i]&m) | (y[i]&~m);
263
264 /* Extract 32-bit words from the value. */
265 for (i = 0; i < 7; i++) {
266 u = ((y[i + 0] >> (4*i)) | ((uint32)y[i + 1] << (28 - 4*i)))&M32;
267 v = ((y[i + 8] >> (4*i)) | ((uint32)y[i + 9] << (28 - 4*i)))&M32;
268 STORE32_L(zv + 4*i, u);
269 STORE32_L(zv + 4*i + 28, v);
270 }
271
272#elif FGOLDI_IMPL == 12
273
274 piece y[NPIECE], yy[NPIECE], c, d;
275 uint32 a;
276 mask32 m, mm;
277 unsigned i, j, n, w;
278
279 for (i = 0; i < NPIECE; i++) y[i] = x->P[i];
280
281 /* First, propagate the carries. By the end of this, we'll have all of the
282 * the pieces canonically sized and positive, and maybe there'll be
283 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
284 * value will be in the half-open interval [0, φ^2). The whole represented
285 * value is then y + φ^2 c.
286 *
287 * Assume that we start out with |y_i| <= 2^14. We start off by cutting
288 * off and reducing the carry c_39 from the topmost piece, y_39. This
289 * leaves 0 <= y_39 < 2^11; and we'll have |c_39| <= 16. We'll add this
290 * onto y_0 and y_20, and propagate the carries. It's very clear that
291 * we'll end up with |y + (φ + 1) c_39 - φ^2/2| << φ^2.
292 *
293 * Here, the y_i are signed, so we must be cautious about bithacking them.
294 */
295 c = ASR(piece, y[39], 11); y[39] = (piece)y[39]&M11; y[20] += c;
296 for (i = 0; i < NPIECE; i++) {
297 w = PIECEWD(i); m = (1 << w) - 1;
298 y[i] += c; c = ASR(piece, y[i], w); y[i] = (upiece)y[i]&m;
299 }
300
301 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
302 * y >= p, then we should subtract p from the whole value; if c = -1 then
303 * we should add p; and otherwise we should do nothing.
304 *
305 * But conditional behaviour is bad, m'kay. So here's what we do instead.
306 *
307 * The first job is to sort out what we wanted to do. If c = -1 then we
308 * want to (a) invert the constant addend and (b) feed in a carry-in;
309 * otherwise, we don't.
310 */
311 mm = SIGN(c);
312 d = m&1;
313
314 /* Now do the addition/subtraction. Remember that all of the y_i are
315 * nonnegative, so shifting and masking are safe and easy.
316 */
317 d += y[ 0] + (1 ^ (mm&M12)); yy[ 0] = d&M12; d >>= 12;
318 for (i = 1; i < 20; i++) {
319 w = PIECEWD(i); m = MASK(w);
320 d += y[ i] + (mm&m); yy[ i] = d&m; d >>= w;
321 }
322 d += y[20] + (1 ^ (mm&M12)); yy[20] = d&M12; d >>= 12;
323 for (i = 21; i < 40; i++) {
324 w = PIECEWD(i); m = MASK(w);
325 d += y[ i] + (mm&m); yy[ i] = d&m; d >>= w;
326 }
327
328 /* The final carry-out is in d; since we only did addition, and the y_i are
329 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
330 * if (a) c /= 0 (in which case we know that the old value was
331 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
332 * the subtraction didn't cause a borrow, so we must be in the case where
333 * p <= y < φ^2.
334 */
335 m = NONZEROP(c) | ~NONZEROP(d - 1);
336 for (i = 0; i < NPIECE; i++) y[i] = (yy[i]&m) | (y[i]&~m);
337
338 /* Convert that back into octets. */
339 for (i = j = a = n = 0; i < NPIECE; i++) {
340 a |= (uint32)y[i] << n; n += PIECEWD(i);
341 while (n >= 8) { zv[j++] = a&M8; a >>= 8; n -= 8; }
342 }
343
344#endif
345}
346
347/* --- @fgoldi_set@ --- *
348 *
349 * Arguments: @fgoldi *z@ = where to write the result
350 * @int a@ = a small-ish constant
351 *
352 * Returns: ---
353 *
354 * Use: Sets @z@ to equal @a@.
355 */
356
357void fgoldi_set(fgoldi *x, int a)
358{
359 unsigned i;
360
361 x->P[0] = a;
362 for (i = 1; i < NPIECE; i++) x->P[i] = 0;
363}
364
365/*----- Basic arithmetic --------------------------------------------------*/
366
367/* --- @fgoldi_add@ --- *
368 *
369 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
370 * @const fgoldi *x, *y@ = two operands
371 *
372 * Returns: ---
373 *
374 * Use: Set @z@ to the sum %$x + y$%.
375 */
376
377void fgoldi_add(fgoldi *z, const fgoldi *x, const fgoldi *y)
378{
379 unsigned i;
380 for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] + y->P[i];
381}
382
383/* --- @fgoldi_sub@ --- *
384 *
385 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
386 * @const fgoldi *x, *y@ = two operands
387 *
388 * Returns: ---
389 *
390 * Use: Set @z@ to the difference %$x - y$%.
391 */
392
393void fgoldi_sub(fgoldi *z, const fgoldi *x, const fgoldi *y)
394{
395 unsigned i;
396 for (i = 0; i < NPIECE; i++) z->P[i] = x->P[i] - y->P[i];
397}
398
1bc00e2a
MW
399/* --- @fgoldi_neg@ --- *
400 *
401 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
402 * @const fgoldi *x@ = an operand
403 *
404 * Returns: ---
405 *
406 * Use: Set @z = -x@.
407 */
408
409void fgoldi_neg(fgoldi *z, const fgoldi *x)
410{
411 unsigned i;
412 for (i = 0; i < NPIECE; i++) z->P[i] = -x->P[i];
413}
414
266efb73
MW
415/*----- Constant-time utilities -------------------------------------------*/
416
1bc00e2a
MW
417/* --- @fgoldi_pick2@ --- *
418 *
419 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
420 * @const fgoldi *x, *y@ = two operands
421 * @uint32 m@ = a mask
422 *
423 * Returns: ---
424 *
425 * Use: If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set
426 * @z = x@. If @m@ has some other value, then scramble @z@ in
427 * an unhelpful way.
428 */
429
430void fgoldi_pick2(fgoldi *z, const fgoldi *x, const fgoldi *y, uint32 m)
431{
432 mask32 mm = FIX_MASK32(m);
433 unsigned i;
434 for (i = 0; i < NPIECE; i++) z->P[i] = PICK2(x->P[i], y->P[i], mm);
435}
436
437/* --- @fgoldi_pickn@ --- *
438 *
439 * Arguments: @fgoldi *z@ = where to put the result
440 * @const fgoldi *v@ = a table of entries
441 * @size_t n@ = the number of entries in @v@
442 * @size_t i@ = an index
443 *
444 * Returns: ---
445 *
446 * Use: If @0 <= i < n < 32@ then set @z = v[i]@. If @n >= 32@ then
447 * do something unhelpful; otherwise, if @i >= n@ then set @z@
448 * to zero.
449 */
450
451void fgoldi_pickn(fgoldi *z, const fgoldi *v, size_t n, size_t i)
452{
453 uint32 b = (uint32)1 << (31 - i);
454 mask32 m;
455 unsigned j;
456
457 for (j = 0; j < NPIECE; j++) z->P[j] = 0;
458 while (n--) {
459 m = SIGN(b);
460 for (j = 0; j < NPIECE; j++) CONDPICK(z->P[j], v->P[j], m);
461 v++; b <<= 1;
462 }
463}
464
266efb73
MW
465/* --- @fgoldi_condswap@ --- *
466 *
467 * Arguments: @fgoldi *x, *y@ = two operands
468 * @uint32 m@ = a mask
469 *
470 * Returns: ---
471 *
472 * Use: If @m@ is zero, do nothing; if @m@ is all-bits-set, then
473 * exchange @x@ and @y@. If @m@ has some other value, then
474 * scramble @x@ and @y@ in an unhelpful way.
475 */
476
477void fgoldi_condswap(fgoldi *x, fgoldi *y, uint32 m)
478{
479 unsigned i;
480 mask32 mm = FIX_MASK32(m);
481
482 for (i = 0; i < NPIECE; i++) CONDSWAP(x->P[i], y->P[i], mm);
483}
484
1bc00e2a
MW
485/* --- @fgoldi_condneg@ --- *
486 *
487 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
488 * @const fgoldi *x@ = an operand
489 * @uint32 m@ = a mask
490 *
491 * Returns: ---
492 *
493 * Use: If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set
494 * @z = -x@. If @m@ has some other value then scramble @z@ in
495 * an unhelpful way.
496 */
497
498void fgoldi_condneg(fgoldi *z, const fgoldi *x, uint32 m)
499{
500#ifdef NEG_TWOC
501 mask32 m_xor = FIX_MASK32(m);
502 piece m_add = m&1;
503# define CONDNEG(x) (((x) ^ m_xor) + m_add)
504#else
505 int s = PICK2(-1, +1, m);
506# define CONDNEG(x) (s*(x))
507#endif
508
509 unsigned i;
510 for (i = 0; i < NPIECE; i++) z->P[i] = CONDNEG(x->P[i]);
511
512#undef CONDNEG
513}
514
266efb73
MW
515/*----- Multiplication ----------------------------------------------------*/
516
517#if FGOLDI_IMPL == 28
518
519/* Let B = 2^63 - 1 be the largest value such that +B and -B can be
520 * represented in a double-precision piece. On entry, it must be the case
521 * that |X_i| <= M <= B - 2^27 for some M. If this is the case, then, on
522 * exit, we will have |Z_i| <= 2^27 + M/2^27.
523 */
524#define CARRY_REDUCE(z, x) do { \
525 dblpiece _t[NPIECE], _c; \
526 unsigned _i; \
527 \
528 /* Bias the input pieces. This keeps the carries and so on centred \
529 * around zero rather than biased positive. \
530 */ \
531 for (_i = 0; _i < NPIECE; _i++) _t[_i] = (x)[_i] + B27; \
532 \
533 /* Calculate the reduced pieces. Careful with the bithacking. */ \
534 _c = ASR(dblpiece, _t[15], 28); \
535 (z)[0] = (dblpiece)((udblpiece)_t[0]&M28) - B27 + _c; \
536 for (_i = 1; _i < NPIECE; _i++) { \
537 (z)[_i] = (dblpiece)((udblpiece)_t[_i]&M28) - B27 + \
538 ASR(dblpiece, _t[_i - 1], 28); \
539 } \
540 (z)[8] += _c; \
541} while (0)
542
543#elif FGOLDI_IMPL == 12
544
545static void carry_reduce(dblpiece x[NPIECE])
546{
547 /* Initial bounds: we assume |x_i| < 2^31 - 2^27. */
548
549 unsigned i, j;
550 dblpiece c;
551
552 /* The result is nearly canonical, because we do sequential carry
553 * propagation, because smaller processors are more likely to prefer the
554 * smaller working set than the instruction-level parallelism.
555 *
556 * Start at x_37; truncate it to 10 bits, and propagate the carry to x_38.
557 * Truncate x_38 to 10 bits, and add the carry onto x_39. Truncate x_39 to
558 * 10 bits, and add the carry onto x_0 and x_20. And so on.
559 *
560 * Once we reach x_37 for the second time, we start with |x_37| <= 2^10.
561 * The carry into x_37 is at most 2^21; so the carry out into x_38 has
562 * magnitude at most 2^10. In turn, |x_38| <= 2^10 before the carry, so is
563 * now no more than 2^11 in magnitude, and the carry out into x_39 is at
564 * most 1. This leaves |x_39| <= 2^10 + 1 after carry propagation.
565 *
566 * Be careful with the bit hacking because the quantities involved are
567 * signed.
568 */
569
570 /* For each piece, we bias it so that floor division (as done by an
571 * arithmetic right shift) and modulus (as done by bitwise-AND) does the
572 * right thing.
573 */
574#define CARRY(i, wd, b, m) do { \
575 x[i] += (b); \
576 c = ASR(dblpiece, x[i], (wd)); \
577 x[i] = (dblpiece)((udblpiece)x[i]&(m)) - (b); \
578} while (0)
579
580 { CARRY(37, 11, B10, M11); }
581 { x[38] += c; CARRY(38, 11, B10, M11); }
582 { x[39] += c; CARRY(39, 11, B10, M11); }
583 x[20] += c;
584 for (i = 0; i < 35; ) {
585 { x[i] += c; CARRY( i, 12, B11, M12); i++; }
586 for (j = i + 4; i < j; ) { x[i] += c; CARRY( i, 11, B10, M11); i++; }
587 }
588 { x[i] += c; CARRY( i, 12, B11, M12); i++; }
589 while (i < 39) { x[i] += c; CARRY( i, 11, B10, M11); i++; }
590 x[39] += c;
591}
592
593#endif
594
595/* --- @fgoldi_mulconst@ --- *
596 *
597 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
598 * @const fgoldi *x@ = an operand
599 * @long a@ = a small-ish constant; %$|a| < 2^{20}$%.
600 *
601 * Returns: ---
602 *
603 * Use: Set @z@ to the product %$a x$%.
604 */
605
606void fgoldi_mulconst(fgoldi *z, const fgoldi *x, long a)
607{
608 unsigned i;
609 dblpiece zz[NPIECE], aa = a;
610
611 for (i = 0; i < NPIECE; i++) zz[i] = aa*x->P[i];
612#if FGOLDI_IMPL == 28
613 CARRY_REDUCE(z->P, zz);
614#elif FGOLDI_IMPL == 12
615 carry_reduce(zz);
616 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
617#endif
618}
619
620/* --- @fgoldi_mul@ --- *
621 *
622 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
623 * @const fgoldi *x, *y@ = two operands
624 *
625 * Returns: ---
626 *
627 * Use: Set @z@ to the product %$x y$%.
628 */
629
630void fgoldi_mul(fgoldi *z, const fgoldi *x, const fgoldi *y)
631{
632 dblpiece zz[NPIECE], u[NPIECE];
633 piece ab[NPIECE/2], cd[NPIECE/2];
634 const piece
635 *a = x->P + NPIECE/2, *b = x->P,
636 *c = y->P + NPIECE/2, *d = y->P;
637 unsigned i, j;
638
639#if FGOLDI_IMPL == 28
640
641# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
642
643#elif FGOLDI_IMPL == 12
644
645 static const unsigned short off[39] = {
646 0, 12, 23, 34, 45, 56, 68, 79, 90, 101,
647 112, 124, 135, 146, 157, 168, 180, 191, 202, 213,
648 224, 236, 247, 258, 269, 280, 292, 303, 314, 325,
649 336, 348, 359, 370, 381, 392, 404, 415, 426
650 };
651
652#define M(x,i, y,j) \
653 (((dblpiece)(x)[i]*(y)[j]) << (off[i] + off[j] - off[(i) + (j)]))
654
655#endif
656
657 /* Behold the magic.
658 *
659 * Write x = a φ + b, and y = c φ + d. Then x y = a c φ^2 +
660 * (a d + b c) φ + b d. Karatsuba and Ofman observed that a d + b c =
661 * (a + b) (c + d) - a c - b d, saving a multiplication, and Hamburg chose
662 * the prime p so that φ^2 = φ + 1. So
663 *
664 * x y = ((a + b) (c + d) - b d) φ + a c + b d
665 */
666
667 for (i = 0; i < NPIECE; i++) zz[i] = 0;
668
669 /* Our first job will be to calculate (1 - φ) b d, and write the result
670 * into z. As we do this, an interesting thing will happen. Write
671 * b d = u φ + v; then (1 - φ) b d = u φ + v - u φ^2 - v φ = (1 - φ) v - u.
672 * So, what we do is to write the product end-swapped and negated, and then
673 * we'll subtract the (negated, remember) high half from the low half.
674 */
675 for (i = 0; i < NPIECE/2; i++) {
676 for (j = 0; j < NPIECE/2 - i; j++)
677 zz[i + j + NPIECE/2] -= M(b,i, d,j);
678 for (; j < NPIECE/2; j++)
679 zz[i + j - NPIECE/2] -= M(b,i, d,j);
680 }
681 for (i = 0; i < NPIECE/2; i++)
682 zz[i] -= zz[i + NPIECE/2];
683
684 /* Next, we add on a c. There are no surprises here. */
685 for (i = 0; i < NPIECE/2; i++)
686 for (j = 0; j < NPIECE/2; j++)
687 zz[i + j] += M(a,i, c,j);
688
689 /* Now, calculate a + b and c + d. */
690 for (i = 0; i < NPIECE/2; i++)
691 { ab[i] = a[i] + b[i]; cd[i] = c[i] + d[i]; }
692
693 /* Finally (for the multiplication) we must add on (a + b) (c + d) φ.
694 * Write (a + b) (c + d) as u φ + v; then we actually want u φ^2 + v φ =
695 * v φ + (1 + φ) u. We'll store u in a temporary place and add it on
696 * twice.
697 */
698 for (i = 0; i < NPIECE; i++) u[i] = 0;
699 for (i = 0; i < NPIECE/2; i++) {
700 for (j = 0; j < NPIECE/2 - i; j++)
701 zz[i + j + NPIECE/2] += M(ab,i, cd,j);
702 for (; j < NPIECE/2; j++)
703 u[i + j - NPIECE/2] += M(ab,i, cd,j);
704 }
705 for (i = 0; i < NPIECE/2; i++)
706 { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; }
707
708#undef M
709
710#if FGOLDI_IMPL == 28
711 /* That wraps it up for the multiplication. Let's figure out some bounds.
712 * Fortunately, Karatsuba is a polynomial identity, so all of the pieces
713 * end up the way they'd be if we'd done the thing the easy way, which
714 * simplifies the analysis. Suppose we started with |x_i|, |y_i| <= 9/5
715 * 2^28. The overheads in the result are given by the coefficients of
716 *
717 * ((u^16 - 1)/(u - 1))^2 mod u^16 - u^8 - 1
718 *
719 * the greatest of which is 38. So |z_i| <= 38*81/25*2^56 < 2^63.
720 *
721 * Anyway, a round of `CARRY_REDUCE' will leave us with |z_i| < 2^27 +
722 * 2^36; and a second round will leave us with |z_i| < 2^27 + 512.
723 */
724 for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz);
725 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
726#elif FGOLDI_IMPL == 12
727 carry_reduce(zz);
728 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
729#endif
730}
731
732/* --- @fgoldi_sqr@ --- *
733 *
734 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
735 * @const fgoldi *x@ = an operand
736 *
737 * Returns: ---
738 *
739 * Use: Set @z@ to the square %$x^2$%.
740 */
741
742void fgoldi_sqr(fgoldi *z, const fgoldi *x)
743{
744#if FGOLDI_IMPL == 28
745
746 dblpiece zz[NPIECE], u[NPIECE];
747 piece ab[NPIECE];
748 const piece *a = x->P + NPIECE/2, *b = x->P;
749 unsigned i, j;
750
751# define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
752
753 /* The magic is basically the same as `fgoldi_mul' above. We write
754 * x = a φ + b and use Karatsuba and the special prime shape. This time,
755 * we have
756 *
757 * x^2 = ((a + b)^2 - b^2) φ + a^2 + b^2
758 */
759
760 for (i = 0; i < NPIECE; i++) zz[i] = 0;
761
762 /* Our first job will be to calculate (1 - φ) b^2, and write the result
763 * into z. Again, this interacts pleasantly with the prime shape.
764 */
765 for (i = 0; i < NPIECE/4; i++) {
766 zz[2*i + NPIECE/2] -= M(b,i, b,i);
767 for (j = i + 1; j < NPIECE/2 - i; j++)
768 zz[i + j + NPIECE/2] -= 2*M(b,i, b,j);
769 for (; j < NPIECE/2; j++)
770 zz[i + j - NPIECE/2] -= 2*M(b,i, b,j);
771 }
772 for (; i < NPIECE/2; i++) {
773 zz[2*i - NPIECE/2] -= M(b,i, b,i);
774 for (j = i + 1; j < NPIECE/2; j++)
775 zz[i + j - NPIECE/2] -= 2*M(b,i, b,j);
776 }
777 for (i = 0; i < NPIECE/2; i++)
778 zz[i] -= zz[i + NPIECE/2];
779
780 /* Next, we add on a^2. There are no surprises here. */
781 for (i = 0; i < NPIECE/2; i++) {
782 zz[2*i] += M(a,i, a,i);
783 for (j = i + 1; j < NPIECE/2; j++)
784 zz[i + j] += 2*M(a,i, a,j);
785 }
786
787 /* Now, calculate a + b. */
788 for (i = 0; i < NPIECE/2; i++)
789 ab[i] = a[i] + b[i];
790
791 /* Finally (for the multiplication) we must add on (a + b)^2 φ.
792 * Write (a + b)^2 as u φ + v; then we actually want (u + v) φ + u. We'll
793 * store u in a temporary place and add it on twice.
794 */
795 for (i = 0; i < NPIECE; i++) u[i] = 0;
796 for (i = 0; i < NPIECE/4; i++) {
797 zz[2*i + NPIECE/2] += M(ab,i, ab,i);
798 for (j = i + 1; j < NPIECE/2 - i; j++)
799 zz[i + j + NPIECE/2] += 2*M(ab,i, ab,j);
800 for (; j < NPIECE/2; j++)
801 u[i + j - NPIECE/2] += 2*M(ab,i, ab,j);
802 }
803 for (; i < NPIECE/2; i++) {
804 u[2*i - NPIECE/2] += M(ab,i, ab,i);
805 for (j = i + 1; j < NPIECE/2; j++)
806 u[i + j - NPIECE/2] += 2*M(ab,i, ab,j);
807 }
808 for (i = 0; i < NPIECE/2; i++)
809 { zz[i] += u[i]; zz[i + NPIECE/2] += u[i]; }
810
811#undef M
812
813 /* Finally, carrying. */
814 for (i = 0; i < 2; i++) CARRY_REDUCE(zz, zz);
815 for (i = 0; i < NPIECE; i++) z->P[i] = zz[i];
816
817#elif FGOLDI_IMPL == 12
818 fgoldi_mul(z, x, x);
819#endif
820}
821
822/*----- More advanced operations ------------------------------------------*/
823
824/* --- @fgoldi_inv@ --- *
825 *
826 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
827 * @const fgoldi *x@ = an operand
828 *
829 * Returns: ---
830 *
831 * Use: Stores in @z@ the multiplicative inverse %$x^{-1}$%. If
832 * %$x = 0$% then @z@ is set to zero. This is considered a
833 * feature.
834 */
835
836void fgoldi_inv(fgoldi *z, const fgoldi *x)
837{
838 fgoldi t, u;
839 unsigned i;
840
841#define SQRN(z, x, n) do { \
842 fgoldi_sqr((z), (x)); \
843 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
844} while (0)
845
846 /* Calculate x^-1 = x^(p - 2) = x^(2^448 - 2^224 - 3), which also handles
847 * x = 0 as intended. The addition chain is home-made.
848 */ /* step | value */
849 fgoldi_sqr(&u, x); /* 1 | 2 */
850 fgoldi_mul(&t, &u, x); /* 2 | 3 */
851 SQRN(&u, &t, 2); /* 4 | 12 */
852 fgoldi_mul(&t, &u, &t); /* 5 | 15 */
853 SQRN(&u, &t, 4); /* 9 | 240 */
854 fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */
855 SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */
856 fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */
857 SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */
858 fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */
859 SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */
860 fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */
861 fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */
862 fgoldi_mul(&t, &t, x); /* 43 | 2^37 - 1 */
863 SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */
864 fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */
865 SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */
866 fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */
867 SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */
868 fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */
869 fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */
870 fgoldi_mul(&u, &u, x); /* 233 | 2^223 - 1 */
871 SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */
872 fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */
873 SQRN(&t, &t, 2); /* 459 | 2^448 - 2^224 - 4 */
874 fgoldi_mul(z, &t, x); /* 460 | 2^448 - 2^224 - 3 */
875
876#undef SQRN
877}
878
1bc00e2a
MW
879/* --- @fgoldi_quosqrt@ --- *
880 *
881 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
882 * @const fgoldi *x, *y@ = two operands
883 *
884 * Returns: Zero if successful, @-1@ if %$x/y$% is not a square.
885 *
886 * Use: Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%.
887 * If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x
888 * \ne 0$% then the operation fails. If you wanted a specific
889 * square root then you'll have to pick it yourself.
890 */
891
892int fgoldi_quosqrt(fgoldi *z, const fgoldi *x, const fgoldi *y)
893{
894 fgoldi t, u, v;
895 octet xb[56], b0[56];
896 int32 rc = -1;
897 mask32 m;
898 unsigned i;
899
900#define SQRN(z, x, n) do { \
901 fgoldi_sqr((z), (x)); \
902 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
903} while (0)
904
905 /* This is, fortunately, significantly easier than the equivalent problem
906 * in GF(2^255 - 19), since p == 3 (mod 4).
907 *
908 * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has
909 * order r = (p - 1)/2. Let w = v^{(p-3)/4}. Then w^2 = v^{(p-3)/2} =
910 * u^{r-1} = 1/v = 1/x y. Clearly, then, (x w)^2 = x^2/x y = x/y, so x w
911 * is one of the square roots we seek.
912 *
913 * The addition chain, then, is a prefix of the previous one.
914 */
915 fgoldi_mul(&v, x, y);
916
917 fgoldi_sqr(&u, &v); /* 1 | 2 */
918 fgoldi_mul(&t, &u, &v); /* 2 | 3 */
919 SQRN(&u, &t, 2); /* 4 | 12 */
920 fgoldi_mul(&t, &u, &t); /* 5 | 15 */
921 SQRN(&u, &t, 4); /* 9 | 240 */
922 fgoldi_mul(&u, &u, &t); /* 10 | 255 = 2^8 - 1 */
923 SQRN(&u, &u, 4); /* 14 | 2^12 - 16 */
924 fgoldi_mul(&t, &u, &t); /* 15 | 2^12 - 1 */
925 SQRN(&u, &t, 12); /* 27 | 2^24 - 2^12 */
926 fgoldi_mul(&u, &u, &t); /* 28 | 2^24 - 1 */
927 SQRN(&u, &u, 12); /* 40 | 2^36 - 2^12 */
928 fgoldi_mul(&t, &u, &t); /* 41 | 2^36 - 1 */
929 fgoldi_sqr(&t, &t); /* 42 | 2^37 - 2 */
930 fgoldi_mul(&t, &t, &v); /* 43 | 2^37 - 1 */
931 SQRN(&u, &t, 37); /* 80 | 2^74 - 2^37 */
932 fgoldi_mul(&u, &u, &t); /* 81 | 2^74 - 1 */
933 SQRN(&u, &u, 37); /* 118 | 2^111 - 2^37 */
934 fgoldi_mul(&t, &u, &t); /* 119 | 2^111 - 1 */
935 SQRN(&u, &t, 111); /* 230 | 2^222 - 2^111 */
936 fgoldi_mul(&t, &u, &t); /* 231 | 2^222 - 1 */
937 fgoldi_sqr(&u, &t); /* 232 | 2^223 - 2 */
938 fgoldi_mul(&u, &u, &v); /* 233 | 2^223 - 1 */
939 SQRN(&u, &u, 223); /* 456 | 2^446 - 2^223 */
940 fgoldi_mul(&t, &u, &t); /* 457 | 2^446 - 2^222 - 1 */
941
942#undef SQRN
943
944 /* Now we must decide whether the answer was right. We should have z^2 =
945 * x/y, so y z^2 = x.
946 *
947 * The easiest way to compare is to encode. This isn't as wasteful as it
948 * sounds: the hard part is normalizing the representations, which we have
949 * to do anyway.
950 */
951 fgoldi_mul(z, x, &t);
952 fgoldi_sqr(&t, z);
953 fgoldi_mul(&t, &t, y);
954 fgoldi_store(xb, x);
955 fgoldi_store(b0, &t);
956 m = -ct_memeq(xb, b0, 56);
957 rc = PICK2(0, rc, m);
958 return (rc);
959}
960
266efb73
MW
961/*----- Test rig ----------------------------------------------------------*/
962
963#ifdef TEST_RIG
964
141c1284 965#include <mLib/macros.h>
266efb73
MW
966#include <mLib/report.h>
967#include <mLib/str.h>
968#include <mLib/testrig.h>
969
970static void fixdstr(dstr *d)
971{
972 if (d->len > 56)
973 die(1, "invalid length for fgoldi");
974 else if (d->len < 56) {
975 dstr_ensure(d, 56);
976 memset(d->buf + d->len, 0, 56 - d->len);
977 d->len = 56;
978 }
979}
980
981static void cvt_fgoldi(const char *buf, dstr *d)
982{
983 dstr dd = DSTR_INIT;
984
985 type_hex.cvt(buf, &dd); fixdstr(&dd);
986 dstr_ensure(d, sizeof(fgoldi)); d->len = sizeof(fgoldi);
987 fgoldi_load((fgoldi *)d->buf, (const octet *)dd.buf);
988 dstr_destroy(&dd);
989}
990
991static void dump_fgoldi(dstr *d, FILE *fp)
992 { fdump(stderr, "???", (const piece *)d->buf); }
993
994static void cvt_fgoldi_ref(const char *buf, dstr *d)
995 { type_hex.cvt(buf, d); fixdstr(d); }
996
997static void dump_fgoldi_ref(dstr *d, FILE *fp)
998{
999 fgoldi x;
1000
1001 fgoldi_load(&x, (const octet *)d->buf);
1002 fdump(stderr, "???", x.P);
1003}
1004
1005static int eq(const fgoldi *x, dstr *d)
141c1284 1006 { octet b[56]; fgoldi_store(b, x); return (MEMCMP(b, ==, d->buf, 56)); }
266efb73
MW
1007
1008static const test_type
1009 type_fgoldi = { cvt_fgoldi, dump_fgoldi },
1010 type_fgoldi_ref = { cvt_fgoldi_ref, dump_fgoldi_ref };
1011
1012#define TEST_UNOP(op) \
1013 static int vrf_##op(dstr dv[]) \
1014 { \
1015 fgoldi *x = (fgoldi *)dv[0].buf; \
1016 fgoldi z, zz; \
1017 int ok = 1; \
1018 \
1019 fgoldi_##op(&z, x); \
1020 if (!eq(&z, &dv[1])) { \
1021 ok = 0; \
1022 fprintf(stderr, "failed!\n"); \
1023 fdump(stderr, "x", x->P); \
1024 fdump(stderr, "calc", z.P); \
1025 fgoldi_load(&zz, (const octet *)dv[1].buf); \
1026 fdump(stderr, "z", zz.P); \
1027 } \
1028 \
1029 return (ok); \
1030 }
1031
1032TEST_UNOP(sqr)
1033TEST_UNOP(inv)
1bc00e2a 1034TEST_UNOP(neg)
266efb73
MW
1035
1036#define TEST_BINOP(op) \
1037 static int vrf_##op(dstr dv[]) \
1038 { \
1039 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; \
1040 fgoldi z, zz; \
1041 int ok = 1; \
1042 \
1043 fgoldi_##op(&z, x, y); \
1044 if (!eq(&z, &dv[2])) { \
1045 ok = 0; \
1046 fprintf(stderr, "failed!\n"); \
1047 fdump(stderr, "x", x->P); \
1048 fdump(stderr, "y", y->P); \
1049 fdump(stderr, "calc", z.P); \
1050 fgoldi_load(&zz, (const octet *)dv[2].buf); \
1051 fdump(stderr, "z", zz.P); \
1052 } \
1053 \
1054 return (ok); \
1055 }
1056
1057TEST_BINOP(add)
1058TEST_BINOP(sub)
1059TEST_BINOP(mul)
1060
1061static int vrf_mulc(dstr dv[])
1062{
1063 fgoldi *x = (fgoldi *)dv[0].buf;
1064 long a = *(const long *)dv[1].buf;
1065 fgoldi z, zz;
1066 int ok = 1;
1067
1068 fgoldi_mulconst(&z, x, a);
1069 if (!eq(&z, &dv[2])) {
1070 ok = 0;
1071 fprintf(stderr, "failed!\n");
1072 fdump(stderr, "x", x->P);
1073 fprintf(stderr, "a = %ld\n", a);
1074 fdump(stderr, "calc", z.P);
1075 fgoldi_load(&zz, (const octet *)dv[2].buf);
1076 fdump(stderr, "z", zz.P);
1077 }
1078
1079 return (ok);
1080}
1081
1bc00e2a
MW
1082static int vrf_condneg(dstr dv[])
1083{
1084 fgoldi *x = (fgoldi *)dv[0].buf;
1085 uint32 m = *(uint32 *)dv[1].buf;
1086 fgoldi z;
1087 int ok = 1;
1088
1089 fgoldi_condneg(&z, x, m);
1090 if (!eq(&z, &dv[2])) {
1091 ok = 0;
1092 fprintf(stderr, "failed!\n");
1093 fdump(stderr, "x", x->P);
1094 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1095 fdump(stderr, "calc z", z.P);
1096 fgoldi_load(&z, (const octet *)dv[1].buf);
1097 fdump(stderr, "want z", z.P);
1098 }
1099
1100 return (ok);
1101}
1102
1103static int vrf_pick2(dstr dv[])
1104{
1105 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1106 uint32 m = *(uint32 *)dv[2].buf;
1107 fgoldi z;
1108 int ok = 1;
1109
1110 fgoldi_pick2(&z, x, y, m);
1111 if (!eq(&z, &dv[3])) {
1112 ok = 0;
1113 fprintf(stderr, "failed!\n");
1114 fdump(stderr, "x", x->P);
1115 fdump(stderr, "y", y->P);
1116 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1117 fdump(stderr, "calc z", z.P);
1118 fgoldi_load(&z, (const octet *)dv[3].buf);
1119 fdump(stderr, "want z", z.P);
1120 }
1121
1122 return (ok);
1123}
1124
1125static int vrf_pickn(dstr dv[])
1126{
1127 dstr d = DSTR_INIT;
1128 fgoldi v[32], z;
1129 size_t i = *(uint32 *)dv[1].buf, j, n;
1130 const char *p;
1131 char *q;
1132 int ok = 1;
1133
1134 for (q = dv[0].buf, n = 0; (p = str_qword(&q, 0)) != 0; n++)
1135 { cvt_fgoldi(p, &d); v[n] = *(fgoldi *)d.buf; }
1136
1137 fgoldi_pickn(&z, v, n, i);
1138 if (!eq(&z, &dv[2])) {
1139 ok = 0;
1140 fprintf(stderr, "failed!\n");
1141 for (j = 0; j < n; j++) {
1142 fprintf(stderr, "v[%2u]", (unsigned)j);
1143 fdump(stderr, "", v[j].P);
1144 }
1145 fprintf(stderr, "i = %u\n", (unsigned)i);
1146 fdump(stderr, "calc z", z.P);
1147 fgoldi_load(&z, (const octet *)dv[2].buf);
1148 fdump(stderr, "want z", z.P);
1149 }
1150
1151 dstr_destroy(&d);
1152 return (ok);
1153}
1154
266efb73
MW
1155static int vrf_condswap(dstr dv[])
1156{
1157 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1158 fgoldi xx = *x, yy = *y;
1159 uint32 m = *(uint32 *)dv[2].buf;
1160 int ok = 1;
1161
1162 fgoldi_condswap(&xx, &yy, m);
1163 if (!eq(&xx, &dv[3]) || !eq(&yy, &dv[4])) {
1164 ok = 0;
1165 fprintf(stderr, "failed!\n");
1166 fdump(stderr, "x", x->P);
1167 fdump(stderr, "y", y->P);
1168 fprintf(stderr, "m = 0x%08lx\n", (unsigned long)m);
1169 fdump(stderr, "calc xx", xx.P);
1170 fdump(stderr, "calc yy", yy.P);
1171 fgoldi_load(&xx, (const octet *)dv[3].buf);
1172 fgoldi_load(&yy, (const octet *)dv[4].buf);
1173 fdump(stderr, "want xx", xx.P);
1174 fdump(stderr, "want yy", yy.P);
1175 }
1176
1177 return (ok);
1178}
1179
1bc00e2a
MW
1180static int vrf_quosqrt(dstr dv[])
1181{
1182 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf;
1183 fgoldi z, zz;
1184 int rc;
1185 int ok = 1;
1186
1187 if (dv[2].len) { fixdstr(&dv[2]); fixdstr(&dv[3]); }
1188 rc = fgoldi_quosqrt(&z, x, y);
1189 if (!dv[2].len ? !rc : (rc || (!eq(&z, &dv[2]) && !eq(&z, &dv[3])))) {
1190 ok = 0;
1191 fprintf(stderr, "failed!\n");
1192 fdump(stderr, "x", x->P);
1193 fdump(stderr, "y", y->P);
1194 if (rc) fprintf(stderr, "calc: FAIL\n");
1195 else fdump(stderr, "calc", z.P);
1196 if (!dv[2].len)
1197 fprintf(stderr, "exp: FAIL\n");
1198 else {
1199 fgoldi_load(&zz, (const octet *)dv[2].buf);
1200 fdump(stderr, "z", zz.P);
1201 fgoldi_load(&zz, (const octet *)dv[3].buf);
1202 fdump(stderr, "z'", zz.P);
1203 }
1204 }
1205
1206 return (ok);
1207}
1208
266efb73
MW
1209static int vrf_sub_mulc_add_sub_mul(dstr dv[])
1210{
1211 fgoldi *u = (fgoldi *)dv[0].buf, *v = (fgoldi *)dv[1].buf,
1212 *w = (fgoldi *)dv[3].buf, *x = (fgoldi *)dv[4].buf,
1213 *y = (fgoldi *)dv[5].buf;
1214 long a = *(const long *)dv[2].buf;
1215 fgoldi umv, aumv, wpaumv, xmy, z, zz;
1216 int ok = 1;
1217
1218 fgoldi_sub(&umv, u, v);
1219 fgoldi_mulconst(&aumv, &umv, a);
1220 fgoldi_add(&wpaumv, w, &aumv);
1221 fgoldi_sub(&xmy, x, y);
1222 fgoldi_mul(&z, &wpaumv, &xmy);
1223
1224 if (!eq(&z, &dv[6])) {
1225 ok = 0;
1226 fprintf(stderr, "failed!\n");
1227 fdump(stderr, "u", u->P);
1228 fdump(stderr, "v", v->P);
1229 fdump(stderr, "u - v", umv.P);
1230 fprintf(stderr, "a = %ld\n", a);
1231 fdump(stderr, "a (u - v)", aumv.P);
1232 fdump(stderr, "w + a (u - v)", wpaumv.P);
1233 fdump(stderr, "x", x->P);
1234 fdump(stderr, "y", y->P);
1235 fdump(stderr, "x - y", xmy.P);
1236 fdump(stderr, "(x - y) (w + a (u - v))", z.P);
1237 fgoldi_load(&zz, (const octet *)dv[6].buf); fdump(stderr, "z", zz.P);
1238 }
1239
1240 return (ok);
1241}
1242
1243static test_chunk tests[] = {
1244 { "add", vrf_add, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1245 { "sub", vrf_sub, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1bc00e2a
MW
1246 { "neg", vrf_neg, { &type_fgoldi, &type_fgoldi_ref } },
1247 { "condneg", vrf_condneg,
1248 { &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
266efb73
MW
1249 { "mul", vrf_mul, { &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1250 { "mulconst", vrf_mulc, { &type_fgoldi, &type_long, &type_fgoldi_ref } },
1bc00e2a
MW
1251 { "pick2", vrf_pick2,
1252 { &type_fgoldi, &type_fgoldi, &type_uint32, &type_fgoldi_ref } },
1253 { "pickn", vrf_pickn,
1254 { &type_string, &type_uint32, &type_fgoldi_ref } },
266efb73
MW
1255 { "condswap", vrf_condswap,
1256 { &type_fgoldi, &type_fgoldi, &type_uint32,
1257 &type_fgoldi_ref, &type_fgoldi_ref } },
1258 { "sqr", vrf_sqr, { &type_fgoldi, &type_fgoldi_ref } },
1259 { "inv", vrf_inv, { &type_fgoldi, &type_fgoldi_ref } },
1bc00e2a
MW
1260 { "quosqrt", vrf_quosqrt,
1261 { &type_fgoldi, &type_fgoldi, &type_hex, &type_hex } },
266efb73
MW
1262 { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul,
1263 { &type_fgoldi, &type_fgoldi, &type_long, &type_fgoldi,
1264 &type_fgoldi, &type_fgoldi, &type_fgoldi_ref } },
1265 { 0, 0, { 0 } }
1266};
1267
1268int main(int argc, char *argv[])
1269{
1270 test_run(argc, argv, tests, SRCDIR "/t/fgoldi");
1271 return (0);
1272}
1273
1274#endif
1275
1276/*----- That's all, folks -------------------------------------------------*/