3 * Arithmetic in the Goldilocks field GF(2^448 - 2^224 - 1)
5 * (c) 2017 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of Catacomb.
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
28 /*----- Header files ------------------------------------------------------*/
35 /*----- Basic setup -------------------------------------------------------*
37 * Let φ = 2^224; then p = φ^2 - φ - 1, and, in GF(p), we have φ^2 = φ + 1
41 typedef fgoldi_piece piece
;
44 /* We represent an element of GF(p) as 16 28-bit signed integer pieces x_i:
45 * x = SUM_{0<=i<16} x_i 2^(28i).
48 typedef int64 dblpiece
;
49 typedef uint32 upiece
; typedef uint64 udblpiece
;
54 #define B27 0x08000000u
55 #define M28 0x0fffffffu
56 #define M32 0xffffffffu
58 #elif FGOLDI_IMPL == 12
59 /* We represent an element of GF(p) as 40 signed integer pieces x_i: x =
60 * SUM_{0<=i<40} x_i 2^ceil(224i/20). Pieces i with i == 0 (mod 5) are 12
61 * bits wide; the others are 11 bits wide, so they form eight groups of 56
65 typedef int32 dblpiece
;
66 typedef uint16 upiece
; typedef uint32 udblpiece
;
67 #define PIECEWD(i) ((i)%5 ? 11 : 12)
79 /*----- Debugging machinery -----------------------------------------------*/
81 #if defined(FGOLDI_DEBUG) || defined(TEST_RIG)
88 static mp
*get_pgoldi(void)
90 mp
*p
= MP_NEW
, *t
= MP_NEW
;
92 p
= mp_setbit(p
, MP_ZERO
, 448);
93 t
= mp_setbit(t
, MP_ZERO
, 224);
95 p
= mp_sub(p
, p
, MP_ONE
);
100 DEF_FDUMP(fdump
, piece
, PIECEWD
, NPIECE
, 56, get_pgoldi())
104 /*----- Loading and storing -----------------------------------------------*/
106 /* --- @fgoldi_load@ --- *
108 * Arguments: @fgoldi *z@ = where to store the result
109 * @const octet xv[56]@ = source to read
113 * Use: Reads an element of %$\gf{2^{448} - 2^{224} - 19}$% in
114 * external representation from @xv@ and stores it in @z@.
116 * External representation is little-endian base-256. Some
117 * elements have multiple encodings, which are not produced by
118 * correct software; use of noncanonical encodings is not an
119 * error, and toleration of them is considered a performance
123 void fgoldi_load(fgoldi
*z
, const octet xv
[56])
125 #if FGOLDI_IMPL == 28
131 /* First, read the input value as words. */
132 for (i
= 0; i
< 14; i
++) xw
[i
] = LOAD32_L(xv
+ 4*i
);
134 /* Extract unsigned 28-bit pieces from the words. */
135 z
->P
[ 0] = (xw
[ 0] >> 0)&M28
;
136 z
->P
[ 7] = (xw
[ 6] >> 4)&M28
;
137 z
->P
[ 8] = (xw
[ 7] >> 0)&M28
;
138 z
->P
[15] = (xw
[13] >> 4)&M28
;
139 for (i
= 1; i
< 7; i
++) {
140 z
->P
[i
+ 0] = ((xw
[i
+ 0] << (4*i
)) | (xw
[i
- 1] >> (32 - 4*i
)))&M28
;
141 z
->P
[i
+ 8] = ((xw
[i
+ 7] << (4*i
)) | (xw
[i
+ 6] >> (32 - 4*i
)))&M28
;
144 /* Convert the nonnegative pieces into a balanced signed representation, so
145 * each piece ends up in the interval |z_i| <= 2^27. For each piece, if
146 * its top bit is set, lend a bit leftwards; in the case of z_15, reduce
147 * this bit by adding it onto z_0 and z_8, since this is the φ^2 bit, and
148 * φ^2 = φ + 1. We delay this carry until after all of the pieces have
149 * been balanced. If we don't do this, then we have to do a more expensive
150 * test for nonzeroness to decide whether to lend a bit leftwards rather
151 * than just testing a single bit.
153 * Note that we don't try for a canonical representation here: both upper
154 * and lower bounds are achievable.
156 b
= z
->P
[15]&B27
; z
->P
[15] -= b
<< 1; c
= b
>> 27;
157 for (i
= NPIECE
- 1; i
--; )
158 { b
= z
->P
[i
]&B27
; z
->P
[i
] -= b
<< 1; z
->P
[i
+ 1] += b
>> 27; }
159 z
->P
[0] += c
; z
->P
[8] += c
;
161 #elif FGOLDI_IMPL == 12
163 unsigned i
, j
, n
, w
, b
;
167 /* First, convert the bytes into nonnegative pieces. */
168 for (i
= j
= a
= n
= 0, w
= PIECEWD(0); i
< 56; i
++) {
169 a
|= (uint32
)xv
[i
] << n
; n
+= 8;
171 z
->P
[j
++] = a
&MASK(w
);
172 a
>>= w
; n
-= w
; w
= PIECEWD(j
);
176 /* Convert the nonnegative pieces into a balanced signed representation, so
177 * each piece ends up in the interval |z_i| <= 2^11 + 1.
179 b
= z
->P
[39]&B10
; z
->P
[39] -= b
<< 1; c
= b
>> 10;
180 for (i
= NPIECE
- 1; i
--; ) {
184 z
->P
[i
+ 1] += b
>> w
;
186 z
->P
[0] += c
; z
->P
[20] += c
;
191 /* --- @fgoldi_store@ --- *
193 * Arguments: @octet zv[56]@ = where to write the result
194 * @const fgoldi *x@ = the field element to write
198 * Use: Stores a field element in the given octet vector in external
199 * representation. A canonical encoding is always stored.
202 void fgoldi_store(octet zv
[56], const fgoldi
*x
)
204 #if FGOLDI_IMPL == 28
206 piece y
[NPIECE
], yy
[NPIECE
], c
, d
;
211 for (i
= 0; i
< NPIECE
; i
++) y
[i
] = x
->P
[i
];
213 /* First, propagate the carries. By the end of this, we'll have all of the
214 * the pieces canonically sized and positive, and maybe there'll be
215 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
216 * value will be in the half-open interval [0, φ^2). The whole represented
217 * value is then y + φ^2 c.
219 * Assume that we start out with |y_i| <= 2^30. We start off by cutting
220 * off and reducing the carry c_15 from the topmost piece, y_15. This
221 * leaves 0 <= y_15 < 2^28; and we'll have |c_15| <= 4. We'll add this
222 * onto y_0 and y_8, and propagate the carries. It's very clear that we'll
223 * end up with |y + (φ + 1) c_15 - φ^2/2| << φ^2.
225 * Here, the y_i are signed, so we must be cautious about bithacking them.
227 c
= ASR(piece
, y
[15], 28); y
[15] = (upiece
)y
[15]&M28
; y
[8] += c
;
228 for (i
= 0; i
< NPIECE
; i
++)
229 { y
[i
] += c
; c
= ASR(piece
, y
[i
], 28); y
[i
] = (upiece
)y
[i
]&M28
; }
231 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
232 * y >= p, then we should subtract p from the whole value; if c = -1 then
233 * we should add p; and otherwise we should do nothing.
235 * But conditional behaviour is bad, m'kay. So here's what we do instead.
237 * The first job is to sort out what we wanted to do. If c = -1 then we
238 * want to (a) invert the constant addend and (b) feed in a carry-in;
239 * otherwise, we don't.
244 /* Now do the addition/subtraction. Remember that all of the y_i are
245 * nonnegative, so shifting and masking are safe and easy.
247 d
+= y
[0] + (1 ^ m
); yy
[0] = d
&M28
; d
>>= 28;
248 for (i
= 1; i
< 8; i
++)
249 { d
+= y
[i
] + m
; yy
[i
] = d
&M28
; d
>>= 28; }
250 d
+= y
[8] + (1 ^ m
); yy
[8] = d
&M28
; d
>>= 28;
251 for (i
= 9; i
< 16; i
++)
252 { d
+= y
[i
] + m
; yy
[i
] = d
&M28
; d
>>= 28; }
254 /* The final carry-out is in d; since we only did addition, and the y_i are
255 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
256 * if (a) c /= 0 (in which case we know that the old value was
257 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
258 * the subtraction didn't cause a borrow, so we must be in the case where
261 m
= NONZEROP(c
) | ~NONZEROP(d
- 1);
262 for (i
= 0; i
< NPIECE
; i
++) y
[i
] = (yy
[i
]&m
) | (y
[i
]&~m
);
264 /* Extract 32-bit words from the value. */
265 for (i
= 0; i
< 7; i
++) {
266 u
= ((y
[i
+ 0] >> (4*i
)) | ((uint32
)y
[i
+ 1] << (28 - 4*i
)))&M32
;
267 v
= ((y
[i
+ 8] >> (4*i
)) | ((uint32
)y
[i
+ 9] << (28 - 4*i
)))&M32
;
268 STORE32_L(zv
+ 4*i
, u
);
269 STORE32_L(zv
+ 4*i
+ 28, v
);
272 #elif FGOLDI_IMPL == 12
274 piece y
[NPIECE
], yy
[NPIECE
], c
, d
;
279 for (i
= 0; i
< NPIECE
; i
++) y
[i
] = x
->P
[i
];
281 /* First, propagate the carries. By the end of this, we'll have all of the
282 * the pieces canonically sized and positive, and maybe there'll be
283 * (signed) carry out. The carry c is in { -1, 0, +1 }, and the remaining
284 * value will be in the half-open interval [0, φ^2). The whole represented
285 * value is then y + φ^2 c.
287 * Assume that we start out with |y_i| <= 2^14. We start off by cutting
288 * off and reducing the carry c_39 from the topmost piece, y_39. This
289 * leaves 0 <= y_39 < 2^11; and we'll have |c_39| <= 16. We'll add this
290 * onto y_0 and y_20, and propagate the carries. It's very clear that
291 * we'll end up with |y + (φ + 1) c_39 - φ^2/2| << φ^2.
293 * Here, the y_i are signed, so we must be cautious about bithacking them.
295 c
= ASR(piece
, y
[39], 11); y
[39] = (piece
)y
[39]&M11
; y
[20] += c
;
296 for (i
= 0; i
< NPIECE
; i
++) {
297 w
= PIECEWD(i
); m
= (1 << w
) - 1;
298 y
[i
] += c
; c
= ASR(piece
, y
[i
], w
); y
[i
] = (upiece
)y
[i
]&m
;
301 /* Now we have a slightly fiddly job to do. If c = +1, or if c = 0 and
302 * y >= p, then we should subtract p from the whole value; if c = -1 then
303 * we should add p; and otherwise we should do nothing.
305 * But conditional behaviour is bad, m'kay. So here's what we do instead.
307 * The first job is to sort out what we wanted to do. If c = -1 then we
308 * want to (a) invert the constant addend and (b) feed in a carry-in;
309 * otherwise, we don't.
314 /* Now do the addition/subtraction. Remember that all of the y_i are
315 * nonnegative, so shifting and masking are safe and easy.
317 d
+= y
[ 0] + (1 ^ (mm
&M12
)); yy
[ 0] = d
&M12
; d
>>= 12;
318 for (i
= 1; i
< 20; i
++) {
319 w
= PIECEWD(i
); m
= MASK(w
);
320 d
+= y
[ i
] + (mm
&m
); yy
[ i
] = d
&m
; d
>>= w
;
322 d
+= y
[20] + (1 ^ (mm
&M12
)); yy
[20] = d
&M12
; d
>>= 12;
323 for (i
= 21; i
< 40; i
++) {
324 w
= PIECEWD(i
); m
= MASK(w
);
325 d
+= y
[ i
] + (mm
&m
); yy
[ i
] = d
&m
; d
>>= w
;
328 /* The final carry-out is in d; since we only did addition, and the y_i are
329 * nonnegative, then d is in { 0, 1 }. We want to keep y', rather than y,
330 * if (a) c /= 0 (in which case we know that the old value was
331 * unsatisfactory), or (b) if d = 1 (in which case, if c = 0, we know that
332 * the subtraction didn't cause a borrow, so we must be in the case where
335 m
= NONZEROP(c
) | ~NONZEROP(d
- 1);
336 for (i
= 0; i
< NPIECE
; i
++) y
[i
] = (yy
[i
]&m
) | (y
[i
]&~m
);
338 /* Convert that back into octets. */
339 for (i
= j
= a
= n
= 0; i
< NPIECE
; i
++) {
340 a
|= (uint32
)y
[i
] << n
; n
+= PIECEWD(i
);
341 while (n
>= 8) { zv
[j
++] = a
&M8
; a
>>= 8; n
-= 8; }
347 /* --- @fgoldi_set@ --- *
349 * Arguments: @fgoldi *z@ = where to write the result
350 * @int a@ = a small-ish constant
354 * Use: Sets @z@ to equal @a@.
357 void fgoldi_set(fgoldi
*x
, int a
)
362 for (i
= 1; i
< NPIECE
; i
++) x
->P
[i
] = 0;
365 /*----- Basic arithmetic --------------------------------------------------*/
367 /* --- @fgoldi_add@ --- *
369 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
370 * @const fgoldi *x, *y@ = two operands
374 * Use: Set @z@ to the sum %$x + y$%.
377 void fgoldi_add(fgoldi
*z
, const fgoldi
*x
, const fgoldi
*y
)
380 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = x
->P
[i
] + y
->P
[i
];
383 /* --- @fgoldi_sub@ --- *
385 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
386 * @const fgoldi *x, *y@ = two operands
390 * Use: Set @z@ to the difference %$x - y$%.
393 void fgoldi_sub(fgoldi
*z
, const fgoldi
*x
, const fgoldi
*y
)
396 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = x
->P
[i
] - y
->P
[i
];
399 /* --- @fgoldi_neg@ --- *
401 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
402 * @const fgoldi *x@ = an operand
409 void fgoldi_neg(fgoldi
*z
, const fgoldi
*x
)
412 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = -x
->P
[i
];
415 /*----- Constant-time utilities -------------------------------------------*/
417 /* --- @fgoldi_pick2@ --- *
419 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
420 * @const fgoldi *x, *y@ = two operands
421 * @uint32 m@ = a mask
425 * Use: If @m@ is zero, set @z = y@; if @m@ is all-bits-set, then set
426 * @z = x@. If @m@ has some other value, then scramble @z@ in
430 void fgoldi_pick2(fgoldi
*z
, const fgoldi
*x
, const fgoldi
*y
, uint32 m
)
432 mask32 mm
= FIX_MASK32(m
);
434 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = PICK2(x
->P
[i
], y
->P
[i
], mm
);
437 /* --- @fgoldi_pickn@ --- *
439 * Arguments: @fgoldi *z@ = where to put the result
440 * @const fgoldi *v@ = a table of entries
441 * @size_t n@ = the number of entries in @v@
442 * @size_t i@ = an index
446 * Use: If @0 <= i < n < 32@ then set @z = v[i]@. If @n >= 32@ then
447 * do something unhelpful; otherwise, if @i >= n@ then set @z@
451 void fgoldi_pickn(fgoldi
*z
, const fgoldi
*v
, size_t n
, size_t i
)
453 uint32 b
= (uint32
)1 << (31 - i
);
457 for (j
= 0; j
< NPIECE
; j
++) z
->P
[j
] = 0;
460 for (j
= 0; j
< NPIECE
; j
++) CONDPICK(z
->P
[j
], v
->P
[j
], m
);
465 /* --- @fgoldi_condswap@ --- *
467 * Arguments: @fgoldi *x, *y@ = two operands
468 * @uint32 m@ = a mask
472 * Use: If @m@ is zero, do nothing; if @m@ is all-bits-set, then
473 * exchange @x@ and @y@. If @m@ has some other value, then
474 * scramble @x@ and @y@ in an unhelpful way.
477 void fgoldi_condswap(fgoldi
*x
, fgoldi
*y
, uint32 m
)
480 mask32 mm
= FIX_MASK32(m
);
482 for (i
= 0; i
< NPIECE
; i
++) CONDSWAP(x
->P
[i
], y
->P
[i
], mm
);
485 /* --- @fgoldi_condneg@ --- *
487 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
488 * @const fgoldi *x@ = an operand
489 * @uint32 m@ = a mask
493 * Use: If @m@ is zero, set @z = x@; if @m@ is all-bits-set, then set
494 * @z = -x@. If @m@ has some other value then scramble @z@ in
498 void fgoldi_condneg(fgoldi
*z
, const fgoldi
*x
, uint32 m
)
501 mask32 m_xor
= FIX_MASK32(m
);
503 # define CONDNEG(x) (((x) ^ m_xor) + m_add)
505 int s
= PICK2(-1, +1, m
);
506 # define CONDNEG(x) (s*(x))
510 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = CONDNEG(x
->P
[i
]);
515 /*----- Multiplication ----------------------------------------------------*/
517 #if FGOLDI_IMPL == 28
519 /* Let B = 2^63 - 1 be the largest value such that +B and -B can be
520 * represented in a double-precision piece. On entry, it must be the case
521 * that |X_i| <= M <= B - 2^27 for some M. If this is the case, then, on
522 * exit, we will have |Z_i| <= 2^27 + M/2^27.
524 #define CARRY_REDUCE(z, x) do { \
525 dblpiece _t[NPIECE], _c; \
528 /* Bias the input pieces. This keeps the carries and so on centred \
529 * around zero rather than biased positive. \
531 for (_i = 0; _i < NPIECE; _i++) _t[_i] = (x)[_i] + B27; \
533 /* Calculate the reduced pieces. Careful with the bithacking. */ \
534 _c = ASR(dblpiece, _t[15], 28); \
535 (z)[0] = (dblpiece)((udblpiece)_t[0]&M28) - B27 + _c; \
536 for (_i = 1; _i < NPIECE; _i++) { \
537 (z)[_i] = (dblpiece)((udblpiece)_t[_i]&M28) - B27 + \
538 ASR(dblpiece, _t[_i - 1], 28); \
543 #elif FGOLDI_IMPL == 12
545 static void carry_reduce(dblpiece x
[NPIECE
])
547 /* Initial bounds: we assume |x_i| < 2^31 - 2^27. */
552 /* The result is nearly canonical, because we do sequential carry
553 * propagation, because smaller processors are more likely to prefer the
554 * smaller working set than the instruction-level parallelism.
556 * Start at x_37; truncate it to 10 bits, and propagate the carry to x_38.
557 * Truncate x_38 to 10 bits, and add the carry onto x_39. Truncate x_39 to
558 * 10 bits, and add the carry onto x_0 and x_20. And so on.
560 * Once we reach x_37 for the second time, we start with |x_37| <= 2^10.
561 * The carry into x_37 is at most 2^21; so the carry out into x_38 has
562 * magnitude at most 2^10. In turn, |x_38| <= 2^10 before the carry, so is
563 * now no more than 2^11 in magnitude, and the carry out into x_39 is at
564 * most 1. This leaves |x_39| <= 2^10 + 1 after carry propagation.
566 * Be careful with the bit hacking because the quantities involved are
570 /* For each piece, we bias it so that floor division (as done by an
571 * arithmetic right shift) and modulus (as done by bitwise-AND) does the
574 #define CARRY(i, wd, b, m) do { \
576 c = ASR(dblpiece, x[i], (wd)); \
577 x[i] = (dblpiece)((udblpiece)x[i]&(m)) - (b); \
580 { CARRY(37, 11, B10
, M11
); }
581 { x
[38] += c
; CARRY(38, 11, B10
, M11
); }
582 { x
[39] += c
; CARRY(39, 11, B10
, M11
); }
584 for (i
= 0; i
< 35; ) {
585 { x
[i
] += c
; CARRY( i
, 12, B11
, M12
); i
++; }
586 for (j
= i
+ 4; i
< j
; ) { x
[i
] += c
; CARRY( i
, 11, B10
, M11
); i
++; }
588 { x
[i
] += c
; CARRY( i
, 12, B11
, M12
); i
++; }
589 while (i
< 39) { x
[i
] += c
; CARRY( i
, 11, B10
, M11
); i
++; }
595 /* --- @fgoldi_mulconst@ --- *
597 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
598 * @const fgoldi *x@ = an operand
599 * @long a@ = a small-ish constant; %$|a| < 2^{20}$%.
603 * Use: Set @z@ to the product %$a x$%.
606 void fgoldi_mulconst(fgoldi
*z
, const fgoldi
*x
, long a
)
609 dblpiece zz
[NPIECE
], aa
= a
;
611 for (i
= 0; i
< NPIECE
; i
++) zz
[i
] = aa
*x
->P
[i
];
612 #if FGOLDI_IMPL == 28
613 CARRY_REDUCE(z
->P
, zz
);
614 #elif FGOLDI_IMPL == 12
616 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = zz
[i
];
620 /* --- @fgoldi_mul@ --- *
622 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
623 * @const fgoldi *x, *y@ = two operands
627 * Use: Set @z@ to the product %$x y$%.
630 void fgoldi_mul(fgoldi
*z
, const fgoldi
*x
, const fgoldi
*y
)
632 dblpiece zz
[NPIECE
], u
[NPIECE
];
633 piece ab
[NPIECE
/2], cd
[NPIECE
/2];
635 *a
= x
->P
+ NPIECE
/2, *b
= x
->P
,
636 *c
= y
->P
+ NPIECE
/2, *d
= y
->P
;
639 #if FGOLDI_IMPL == 28
641 # define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
643 #elif FGOLDI_IMPL == 12
645 static const unsigned short off
[39] = {
646 0, 12, 23, 34, 45, 56, 68, 79, 90, 101,
647 112, 124, 135, 146, 157, 168, 180, 191, 202, 213,
648 224, 236, 247, 258, 269, 280, 292, 303, 314, 325,
649 336, 348, 359, 370, 381, 392, 404, 415, 426
652 #define M(x,i, y,j) \
653 (((dblpiece)(x)[i]*(y)[j]) << (off[i] + off[j] - off[(i) + (j)]))
659 * Write x = a φ + b, and y = c φ + d. Then x y = a c φ^2 +
660 * (a d + b c) φ + b d. Karatsuba and Ofman observed that a d + b c =
661 * (a + b) (c + d) - a c - b d, saving a multiplication, and Hamburg chose
662 * the prime p so that φ^2 = φ + 1. So
664 * x y = ((a + b) (c + d) - b d) φ + a c + b d
667 for (i
= 0; i
< NPIECE
; i
++) zz
[i
] = 0;
669 /* Our first job will be to calculate (1 - φ) b d, and write the result
670 * into z. As we do this, an interesting thing will happen. Write
671 * b d = u φ + v; then (1 - φ) b d = u φ + v - u φ^2 - v φ = (1 - φ) v - u.
672 * So, what we do is to write the product end-swapped and negated, and then
673 * we'll subtract the (negated, remember) high half from the low half.
675 for (i
= 0; i
< NPIECE
/2; i
++) {
676 for (j
= 0; j
< NPIECE
/2 - i
; j
++)
677 zz
[i
+ j
+ NPIECE
/2] -= M(b
,i
, d
,j
);
678 for (; j
< NPIECE
/2; j
++)
679 zz
[i
+ j
- NPIECE
/2] -= M(b
,i
, d
,j
);
681 for (i
= 0; i
< NPIECE
/2; i
++)
682 zz
[i
] -= zz
[i
+ NPIECE
/2];
684 /* Next, we add on a c. There are no surprises here. */
685 for (i
= 0; i
< NPIECE
/2; i
++)
686 for (j
= 0; j
< NPIECE
/2; j
++)
687 zz
[i
+ j
] += M(a
,i
, c
,j
);
689 /* Now, calculate a + b and c + d. */
690 for (i
= 0; i
< NPIECE
/2; i
++)
691 { ab
[i
] = a
[i
] + b
[i
]; cd
[i
] = c
[i
] + d
[i
]; }
693 /* Finally (for the multiplication) we must add on (a + b) (c + d) φ.
694 * Write (a + b) (c + d) as u φ + v; then we actually want u φ^2 + v φ =
695 * v φ + (1 + φ) u. We'll store u in a temporary place and add it on
698 for (i
= 0; i
< NPIECE
; i
++) u
[i
] = 0;
699 for (i
= 0; i
< NPIECE
/2; i
++) {
700 for (j
= 0; j
< NPIECE
/2 - i
; j
++)
701 zz
[i
+ j
+ NPIECE
/2] += M(ab
,i
, cd
,j
);
702 for (; j
< NPIECE
/2; j
++)
703 u
[i
+ j
- NPIECE
/2] += M(ab
,i
, cd
,j
);
705 for (i
= 0; i
< NPIECE
/2; i
++)
706 { zz
[i
] += u
[i
]; zz
[i
+ NPIECE
/2] += u
[i
]; }
710 #if FGOLDI_IMPL == 28
711 /* That wraps it up for the multiplication. Let's figure out some bounds.
712 * Fortunately, Karatsuba is a polynomial identity, so all of the pieces
713 * end up the way they'd be if we'd done the thing the easy way, which
714 * simplifies the analysis. Suppose we started with |x_i|, |y_i| <= 9/5
715 * 2^28. The overheads in the result are given by the coefficients of
717 * ((u^16 - 1)/(u - 1))^2 mod u^16 - u^8 - 1
719 * the greatest of which is 38. So |z_i| <= 38*81/25*2^56 < 2^63.
721 * Anyway, a round of `CARRY_REDUCE' will leave us with |z_i| < 2^27 +
722 * 2^36; and a second round will leave us with |z_i| < 2^27 + 512.
724 for (i
= 0; i
< 2; i
++) CARRY_REDUCE(zz
, zz
);
725 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = zz
[i
];
726 #elif FGOLDI_IMPL == 12
728 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = zz
[i
];
732 /* --- @fgoldi_sqr@ --- *
734 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
735 * @const fgoldi *x@ = an operand
739 * Use: Set @z@ to the square %$x^2$%.
742 void fgoldi_sqr(fgoldi
*z
, const fgoldi
*x
)
744 #if FGOLDI_IMPL == 28
746 dblpiece zz
[NPIECE
], u
[NPIECE
];
748 const piece
*a
= x
->P
+ NPIECE
/2, *b
= x
->P
;
751 # define M(x,i, y,j) ((dblpiece)(x)[i]*(y)[j])
753 /* The magic is basically the same as `fgoldi_mul' above. We write
754 * x = a φ + b and use Karatsuba and the special prime shape. This time,
757 * x^2 = ((a + b)^2 - b^2) φ + a^2 + b^2
760 for (i
= 0; i
< NPIECE
; i
++) zz
[i
] = 0;
762 /* Our first job will be to calculate (1 - φ) b^2, and write the result
763 * into z. Again, this interacts pleasantly with the prime shape.
765 for (i
= 0; i
< NPIECE
/4; i
++) {
766 zz
[2*i
+ NPIECE
/2] -= M(b
,i
, b
,i
);
767 for (j
= i
+ 1; j
< NPIECE
/2 - i
; j
++)
768 zz
[i
+ j
+ NPIECE
/2] -= 2*M(b
,i
, b
,j
);
769 for (; j
< NPIECE
/2; j
++)
770 zz
[i
+ j
- NPIECE
/2] -= 2*M(b
,i
, b
,j
);
772 for (; i
< NPIECE
/2; i
++) {
773 zz
[2*i
- NPIECE
/2] -= M(b
,i
, b
,i
);
774 for (j
= i
+ 1; j
< NPIECE
/2; j
++)
775 zz
[i
+ j
- NPIECE
/2] -= 2*M(b
,i
, b
,j
);
777 for (i
= 0; i
< NPIECE
/2; i
++)
778 zz
[i
] -= zz
[i
+ NPIECE
/2];
780 /* Next, we add on a^2. There are no surprises here. */
781 for (i
= 0; i
< NPIECE
/2; i
++) {
782 zz
[2*i
] += M(a
,i
, a
,i
);
783 for (j
= i
+ 1; j
< NPIECE
/2; j
++)
784 zz
[i
+ j
] += 2*M(a
,i
, a
,j
);
787 /* Now, calculate a + b. */
788 for (i
= 0; i
< NPIECE
/2; i
++)
791 /* Finally (for the multiplication) we must add on (a + b)^2 φ.
792 * Write (a + b)^2 as u φ + v; then we actually want (u + v) φ + u. We'll
793 * store u in a temporary place and add it on twice.
795 for (i
= 0; i
< NPIECE
; i
++) u
[i
] = 0;
796 for (i
= 0; i
< NPIECE
/4; i
++) {
797 zz
[2*i
+ NPIECE
/2] += M(ab
,i
, ab
,i
);
798 for (j
= i
+ 1; j
< NPIECE
/2 - i
; j
++)
799 zz
[i
+ j
+ NPIECE
/2] += 2*M(ab
,i
, ab
,j
);
800 for (; j
< NPIECE
/2; j
++)
801 u
[i
+ j
- NPIECE
/2] += 2*M(ab
,i
, ab
,j
);
803 for (; i
< NPIECE
/2; i
++) {
804 u
[2*i
- NPIECE
/2] += M(ab
,i
, ab
,i
);
805 for (j
= i
+ 1; j
< NPIECE
/2; j
++)
806 u
[i
+ j
- NPIECE
/2] += 2*M(ab
,i
, ab
,j
);
808 for (i
= 0; i
< NPIECE
/2; i
++)
809 { zz
[i
] += u
[i
]; zz
[i
+ NPIECE
/2] += u
[i
]; }
813 /* Finally, carrying. */
814 for (i
= 0; i
< 2; i
++) CARRY_REDUCE(zz
, zz
);
815 for (i
= 0; i
< NPIECE
; i
++) z
->P
[i
] = zz
[i
];
817 #elif FGOLDI_IMPL == 12
822 /*----- More advanced operations ------------------------------------------*/
824 /* --- @fgoldi_inv@ --- *
826 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@)
827 * @const fgoldi *x@ = an operand
831 * Use: Stores in @z@ the multiplicative inverse %$x^{-1}$%. If
832 * %$x = 0$% then @z@ is set to zero. This is considered a
836 void fgoldi_inv(fgoldi
*z
, const fgoldi
*x
)
841 #define SQRN(z, x, n) do { \
842 fgoldi_sqr((z), (x)); \
843 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
846 /* Calculate x^-1 = x^(p - 2) = x^(2^448 - 2^224 - 3), which also handles
847 * x = 0 as intended. The addition chain is home-made.
848 */ /* step | value */
849 fgoldi_sqr(&u
, x
); /* 1 | 2 */
850 fgoldi_mul(&t
, &u
, x
); /* 2 | 3 */
851 SQRN(&u
, &t
, 2); /* 4 | 12 */
852 fgoldi_mul(&t
, &u
, &t
); /* 5 | 15 */
853 SQRN(&u
, &t
, 4); /* 9 | 240 */
854 fgoldi_mul(&u
, &u
, &t
); /* 10 | 255 = 2^8 - 1 */
855 SQRN(&u
, &u
, 4); /* 14 | 2^12 - 16 */
856 fgoldi_mul(&t
, &u
, &t
); /* 15 | 2^12 - 1 */
857 SQRN(&u
, &t
, 12); /* 27 | 2^24 - 2^12 */
858 fgoldi_mul(&u
, &u
, &t
); /* 28 | 2^24 - 1 */
859 SQRN(&u
, &u
, 12); /* 40 | 2^36 - 2^12 */
860 fgoldi_mul(&t
, &u
, &t
); /* 41 | 2^36 - 1 */
861 fgoldi_sqr(&t
, &t
); /* 42 | 2^37 - 2 */
862 fgoldi_mul(&t
, &t
, x
); /* 43 | 2^37 - 1 */
863 SQRN(&u
, &t
, 37); /* 80 | 2^74 - 2^37 */
864 fgoldi_mul(&u
, &u
, &t
); /* 81 | 2^74 - 1 */
865 SQRN(&u
, &u
, 37); /* 118 | 2^111 - 2^37 */
866 fgoldi_mul(&t
, &u
, &t
); /* 119 | 2^111 - 1 */
867 SQRN(&u
, &t
, 111); /* 230 | 2^222 - 2^111 */
868 fgoldi_mul(&t
, &u
, &t
); /* 231 | 2^222 - 1 */
869 fgoldi_sqr(&u
, &t
); /* 232 | 2^223 - 2 */
870 fgoldi_mul(&u
, &u
, x
); /* 233 | 2^223 - 1 */
871 SQRN(&u
, &u
, 223); /* 456 | 2^446 - 2^223 */
872 fgoldi_mul(&t
, &u
, &t
); /* 457 | 2^446 - 2^222 - 1 */
873 SQRN(&t
, &t
, 2); /* 459 | 2^448 - 2^224 - 4 */
874 fgoldi_mul(z
, &t
, x
); /* 460 | 2^448 - 2^224 - 3 */
879 /* --- @fgoldi_quosqrt@ --- *
881 * Arguments: @fgoldi *z@ = where to put the result (may alias @x@ or @y@)
882 * @const fgoldi *x, *y@ = two operands
884 * Returns: Zero if successful, @-1@ if %$x/y$% is not a square.
886 * Use: Stores in @z@ the one of the square roots %$\pm\sqrt{x/y}$%.
887 * If %$x = y = 0% then the result is zero; if %$y = 0$% but %$x
888 * \ne 0$% then the operation fails. If you wanted a specific
889 * square root then you'll have to pick it yourself.
892 int fgoldi_quosqrt(fgoldi
*z
, const fgoldi
*x
, const fgoldi
*y
)
895 octet xb
[56], b0
[56];
900 #define SQRN(z, x, n) do { \
901 fgoldi_sqr((z), (x)); \
902 for (i = 1; i < (n); i++) fgoldi_sqr((z), (z)); \
905 /* This is, fortunately, significantly easier than the equivalent problem
906 * in GF(2^255 - 19), since p == 3 (mod 4).
908 * If x/y is square, then so is v = y^2 x/y = x y, and therefore u has
909 * order r = (p - 1)/2. Let w = v^{(p-3)/4}. Then w^2 = v^{(p-3)/2} =
910 * u^{r-1} = 1/v = 1/x y. Clearly, then, (x w)^2 = x^2/x y = x/y, so x w
911 * is one of the square roots we seek.
913 * The addition chain, then, is a prefix of the previous one.
915 fgoldi_mul(&v
, x
, y
);
917 fgoldi_sqr(&u
, &v
); /* 1 | 2 */
918 fgoldi_mul(&t
, &u
, &v
); /* 2 | 3 */
919 SQRN(&u
, &t
, 2); /* 4 | 12 */
920 fgoldi_mul(&t
, &u
, &t
); /* 5 | 15 */
921 SQRN(&u
, &t
, 4); /* 9 | 240 */
922 fgoldi_mul(&u
, &u
, &t
); /* 10 | 255 = 2^8 - 1 */
923 SQRN(&u
, &u
, 4); /* 14 | 2^12 - 16 */
924 fgoldi_mul(&t
, &u
, &t
); /* 15 | 2^12 - 1 */
925 SQRN(&u
, &t
, 12); /* 27 | 2^24 - 2^12 */
926 fgoldi_mul(&u
, &u
, &t
); /* 28 | 2^24 - 1 */
927 SQRN(&u
, &u
, 12); /* 40 | 2^36 - 2^12 */
928 fgoldi_mul(&t
, &u
, &t
); /* 41 | 2^36 - 1 */
929 fgoldi_sqr(&t
, &t
); /* 42 | 2^37 - 2 */
930 fgoldi_mul(&t
, &t
, &v
); /* 43 | 2^37 - 1 */
931 SQRN(&u
, &t
, 37); /* 80 | 2^74 - 2^37 */
932 fgoldi_mul(&u
, &u
, &t
); /* 81 | 2^74 - 1 */
933 SQRN(&u
, &u
, 37); /* 118 | 2^111 - 2^37 */
934 fgoldi_mul(&t
, &u
, &t
); /* 119 | 2^111 - 1 */
935 SQRN(&u
, &t
, 111); /* 230 | 2^222 - 2^111 */
936 fgoldi_mul(&t
, &u
, &t
); /* 231 | 2^222 - 1 */
937 fgoldi_sqr(&u
, &t
); /* 232 | 2^223 - 2 */
938 fgoldi_mul(&u
, &u
, &v
); /* 233 | 2^223 - 1 */
939 SQRN(&u
, &u
, 223); /* 456 | 2^446 - 2^223 */
940 fgoldi_mul(&t
, &u
, &t
); /* 457 | 2^446 - 2^222 - 1 */
944 /* Now we must decide whether the answer was right. We should have z^2 =
947 * The easiest way to compare is to encode. This isn't as wasteful as it
948 * sounds: the hard part is normalizing the representations, which we have
951 fgoldi_mul(z
, x
, &t
);
953 fgoldi_mul(&t
, &t
, y
);
955 fgoldi_store(b0
, &t
);
956 m
= -ct_memeq(xb
, b0
, 56);
957 rc
= PICK2(0, rc
, m
);
961 /*----- Test rig ----------------------------------------------------------*/
965 #include <mLib/macros.h>
966 #include <mLib/report.h>
967 #include <mLib/str.h>
968 #include <mLib/testrig.h>
970 static void fixdstr(dstr
*d
)
973 die(1, "invalid length for fgoldi");
974 else if (d
->len
< 56) {
976 memset(d
->buf
+ d
->len
, 0, 56 - d
->len
);
981 static void cvt_fgoldi(const char *buf
, dstr
*d
)
985 type_hex
.cvt(buf
, &dd
); fixdstr(&dd
);
986 dstr_ensure(d
, sizeof(fgoldi
)); d
->len
= sizeof(fgoldi
);
987 fgoldi_load((fgoldi
*)d
->buf
, (const octet
*)dd
.buf
);
991 static void dump_fgoldi(dstr
*d
, FILE *fp
)
992 { fdump(stderr
, "???", (const piece
*)d
->buf
); }
994 static void cvt_fgoldi_ref(const char *buf
, dstr
*d
)
995 { type_hex
.cvt(buf
, d
); fixdstr(d
); }
997 static void dump_fgoldi_ref(dstr
*d
, FILE *fp
)
1001 fgoldi_load(&x
, (const octet
*)d
->buf
);
1002 fdump(stderr
, "???", x
.P
);
1005 static int eq(const fgoldi
*x
, dstr
*d
)
1006 { octet b
[56]; fgoldi_store(b
, x
); return (MEMCMP(b
, ==, d
->buf
, 56)); }
1008 static const test_type
1009 type_fgoldi
= { cvt_fgoldi
, dump_fgoldi
},
1010 type_fgoldi_ref
= { cvt_fgoldi_ref
, dump_fgoldi_ref
};
1012 #define TEST_UNOP(op) \
1013 static int vrf_##op(dstr dv[]) \
1015 fgoldi *x = (fgoldi *)dv[0].buf; \
1019 fgoldi_##op(&z, x); \
1020 if (!eq(&z, &dv[1])) { \
1022 fprintf(stderr, "failed!\n"); \
1023 fdump(stderr, "x", x->P); \
1024 fdump(stderr, "calc", z.P); \
1025 fgoldi_load(&zz, (const octet *)dv[1].buf); \
1026 fdump(stderr, "z", zz.P); \
1036 #define TEST_BINOP(op) \
1037 static int vrf_##op(dstr dv[]) \
1039 fgoldi *x = (fgoldi *)dv[0].buf, *y = (fgoldi *)dv[1].buf; \
1043 fgoldi_##op(&z, x, y); \
1044 if (!eq(&z, &dv[2])) { \
1046 fprintf(stderr, "failed!\n"); \
1047 fdump(stderr, "x", x->P); \
1048 fdump(stderr, "y", y->P); \
1049 fdump(stderr, "calc", z.P); \
1050 fgoldi_load(&zz, (const octet *)dv[2].buf); \
1051 fdump(stderr, "z", zz.P); \
1061 static int vrf_mulc(dstr dv
[])
1063 fgoldi
*x
= (fgoldi
*)dv
[0].buf
;
1064 long a
= *(const long *)dv
[1].buf
;
1068 fgoldi_mulconst(&z
, x
, a
);
1069 if (!eq(&z
, &dv
[2])) {
1071 fprintf(stderr
, "failed!\n");
1072 fdump(stderr
, "x", x
->P
);
1073 fprintf(stderr
, "a = %ld\n", a
);
1074 fdump(stderr
, "calc", z
.P
);
1075 fgoldi_load(&zz
, (const octet
*)dv
[2].buf
);
1076 fdump(stderr
, "z", zz
.P
);
1082 static int vrf_condneg(dstr dv
[])
1084 fgoldi
*x
= (fgoldi
*)dv
[0].buf
;
1085 uint32 m
= *(uint32
*)dv
[1].buf
;
1089 fgoldi_condneg(&z
, x
, m
);
1090 if (!eq(&z
, &dv
[2])) {
1092 fprintf(stderr
, "failed!\n");
1093 fdump(stderr
, "x", x
->P
);
1094 fprintf(stderr
, "m = 0x%08lx\n", (unsigned long)m
);
1095 fdump(stderr
, "calc z", z
.P
);
1096 fgoldi_load(&z
, (const octet
*)dv
[1].buf
);
1097 fdump(stderr
, "want z", z
.P
);
1103 static int vrf_pick2(dstr dv
[])
1105 fgoldi
*x
= (fgoldi
*)dv
[0].buf
, *y
= (fgoldi
*)dv
[1].buf
;
1106 uint32 m
= *(uint32
*)dv
[2].buf
;
1110 fgoldi_pick2(&z
, x
, y
, m
);
1111 if (!eq(&z
, &dv
[3])) {
1113 fprintf(stderr
, "failed!\n");
1114 fdump(stderr
, "x", x
->P
);
1115 fdump(stderr
, "y", y
->P
);
1116 fprintf(stderr
, "m = 0x%08lx\n", (unsigned long)m
);
1117 fdump(stderr
, "calc z", z
.P
);
1118 fgoldi_load(&z
, (const octet
*)dv
[3].buf
);
1119 fdump(stderr
, "want z", z
.P
);
1125 static int vrf_pickn(dstr dv
[])
1129 size_t i
= *(uint32
*)dv
[1].buf
, j
, n
;
1134 for (q
= dv
[0].buf
, n
= 0; (p
= str_qword(&q
, 0)) != 0; n
++)
1135 { cvt_fgoldi(p
, &d
); v
[n
] = *(fgoldi
*)d
.buf
; }
1137 fgoldi_pickn(&z
, v
, n
, i
);
1138 if (!eq(&z
, &dv
[2])) {
1140 fprintf(stderr
, "failed!\n");
1141 for (j
= 0; j
< n
; j
++) {
1142 fprintf(stderr
, "v[%2u]", (unsigned)j
);
1143 fdump(stderr
, "", v
[j
].P
);
1145 fprintf(stderr
, "i = %u\n", (unsigned)i
);
1146 fdump(stderr
, "calc z", z
.P
);
1147 fgoldi_load(&z
, (const octet
*)dv
[2].buf
);
1148 fdump(stderr
, "want z", z
.P
);
1155 static int vrf_condswap(dstr dv
[])
1157 fgoldi
*x
= (fgoldi
*)dv
[0].buf
, *y
= (fgoldi
*)dv
[1].buf
;
1158 fgoldi xx
= *x
, yy
= *y
;
1159 uint32 m
= *(uint32
*)dv
[2].buf
;
1162 fgoldi_condswap(&xx
, &yy
, m
);
1163 if (!eq(&xx
, &dv
[3]) || !eq(&yy
, &dv
[4])) {
1165 fprintf(stderr
, "failed!\n");
1166 fdump(stderr
, "x", x
->P
);
1167 fdump(stderr
, "y", y
->P
);
1168 fprintf(stderr
, "m = 0x%08lx\n", (unsigned long)m
);
1169 fdump(stderr
, "calc xx", xx
.P
);
1170 fdump(stderr
, "calc yy", yy
.P
);
1171 fgoldi_load(&xx
, (const octet
*)dv
[3].buf
);
1172 fgoldi_load(&yy
, (const octet
*)dv
[4].buf
);
1173 fdump(stderr
, "want xx", xx
.P
);
1174 fdump(stderr
, "want yy", yy
.P
);
1180 static int vrf_quosqrt(dstr dv
[])
1182 fgoldi
*x
= (fgoldi
*)dv
[0].buf
, *y
= (fgoldi
*)dv
[1].buf
;
1187 if (dv
[2].len
) { fixdstr(&dv
[2]); fixdstr(&dv
[3]); }
1188 rc
= fgoldi_quosqrt(&z
, x
, y
);
1189 if (!dv
[2].len ?
!rc
: (rc
|| (!eq(&z
, &dv
[2]) && !eq(&z
, &dv
[3])))) {
1191 fprintf(stderr
, "failed!\n");
1192 fdump(stderr
, "x", x
->P
);
1193 fdump(stderr
, "y", y
->P
);
1194 if (rc
) fprintf(stderr
, "calc: FAIL\n");
1195 else fdump(stderr
, "calc", z
.P
);
1197 fprintf(stderr
, "exp: FAIL\n");
1199 fgoldi_load(&zz
, (const octet
*)dv
[2].buf
);
1200 fdump(stderr
, "z", zz
.P
);
1201 fgoldi_load(&zz
, (const octet
*)dv
[3].buf
);
1202 fdump(stderr
, "z'", zz
.P
);
1209 static int vrf_sub_mulc_add_sub_mul(dstr dv
[])
1211 fgoldi
*u
= (fgoldi
*)dv
[0].buf
, *v
= (fgoldi
*)dv
[1].buf
,
1212 *w
= (fgoldi
*)dv
[3].buf
, *x
= (fgoldi
*)dv
[4].buf
,
1213 *y
= (fgoldi
*)dv
[5].buf
;
1214 long a
= *(const long *)dv
[2].buf
;
1215 fgoldi umv
, aumv
, wpaumv
, xmy
, z
, zz
;
1218 fgoldi_sub(&umv
, u
, v
);
1219 fgoldi_mulconst(&aumv
, &umv
, a
);
1220 fgoldi_add(&wpaumv
, w
, &aumv
);
1221 fgoldi_sub(&xmy
, x
, y
);
1222 fgoldi_mul(&z
, &wpaumv
, &xmy
);
1224 if (!eq(&z
, &dv
[6])) {
1226 fprintf(stderr
, "failed!\n");
1227 fdump(stderr
, "u", u
->P
);
1228 fdump(stderr
, "v", v
->P
);
1229 fdump(stderr
, "u - v", umv
.P
);
1230 fprintf(stderr
, "a = %ld\n", a
);
1231 fdump(stderr
, "a (u - v)", aumv
.P
);
1232 fdump(stderr
, "w + a (u - v)", wpaumv
.P
);
1233 fdump(stderr
, "x", x
->P
);
1234 fdump(stderr
, "y", y
->P
);
1235 fdump(stderr
, "x - y", xmy
.P
);
1236 fdump(stderr
, "(x - y) (w + a (u - v))", z
.P
);
1237 fgoldi_load(&zz
, (const octet
*)dv
[6].buf
); fdump(stderr
, "z", zz
.P
);
1243 static test_chunk tests
[] = {
1244 { "add", vrf_add
, { &type_fgoldi
, &type_fgoldi
, &type_fgoldi_ref
} },
1245 { "sub", vrf_sub
, { &type_fgoldi
, &type_fgoldi
, &type_fgoldi_ref
} },
1246 { "neg", vrf_neg
, { &type_fgoldi
, &type_fgoldi_ref
} },
1247 { "condneg", vrf_condneg
,
1248 { &type_fgoldi
, &type_uint32
, &type_fgoldi_ref
} },
1249 { "mul", vrf_mul
, { &type_fgoldi
, &type_fgoldi
, &type_fgoldi_ref
} },
1250 { "mulconst", vrf_mulc
, { &type_fgoldi
, &type_long
, &type_fgoldi_ref
} },
1251 { "pick2", vrf_pick2
,
1252 { &type_fgoldi
, &type_fgoldi
, &type_uint32
, &type_fgoldi_ref
} },
1253 { "pickn", vrf_pickn
,
1254 { &type_string
, &type_uint32
, &type_fgoldi_ref
} },
1255 { "condswap", vrf_condswap
,
1256 { &type_fgoldi
, &type_fgoldi
, &type_uint32
,
1257 &type_fgoldi_ref
, &type_fgoldi_ref
} },
1258 { "sqr", vrf_sqr
, { &type_fgoldi
, &type_fgoldi_ref
} },
1259 { "inv", vrf_inv
, { &type_fgoldi
, &type_fgoldi_ref
} },
1260 { "quosqrt", vrf_quosqrt
,
1261 { &type_fgoldi
, &type_fgoldi
, &type_hex
, &type_hex
} },
1262 { "sub-mulc-add-sub-mul", vrf_sub_mulc_add_sub_mul
,
1263 { &type_fgoldi
, &type_fgoldi
, &type_long
, &type_fgoldi
,
1264 &type_fgoldi
, &type_fgoldi
, &type_fgoldi_ref
} },
1268 int main(int argc
, char *argv
[])
1270 test_run(argc
, argv
, tests
, SRCDIR
"/t/fgoldi");
1276 /*----- That's all, folks -------------------------------------------------*/