ec-field-test.c: Make the field-element type use internal format.
[secnet] / scaf.c
1 /* -*-c-*-
2 *
3 * Simple scalar fields
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8 /*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of secnet.
11 * See README for full list of copyright holders.
12 *
13 * secnet is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version d of the License, or
16 * (at your option) any later version.
17 *
18 * secnet is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * version 3 along with secnet; if not, see
25 * https://www.gnu.org/licenses/gpl.html.
26 *
27 * This file was originally part of Catacomb, but has been automatically
28 * modified for incorporation into secnet: see `import-catacomb-crypto'
29 * for details.
30 *
31 * Catacomb is free software; you can redistribute it and/or modify
32 * it under the terms of the GNU Library General Public License as
33 * published by the Free Software Foundation; either version 2 of the
34 * License, or (at your option) any later version.
35 *
36 * Catacomb is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 * GNU Library General Public License for more details.
40 *
41 * You should have received a copy of the GNU Library General Public
42 * License along with Catacomb; if not, write to the Free
43 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
44 * MA 02111-1307, USA.
45 */
46
47 /*----- Header files ------------------------------------------------------*/
48
49 #include <string.h>
50
51 #include "scaf.h"
52
53 /*----- Debugging utilties ------------------------------------------------*/
54
55 #ifdef SCAF_DEBUG
56
57 #include <stdio.h>
58
59 #include "mp.h"
60 #include "mpint.h"
61 #include "mptext.h"
62
63 static void scaf_dump(const char *what, const scaf_piece *x,
64 size_t npiece, size_t piecewd)
65 {
66 mp *y = MP_ZERO, *t = MP_NEW;
67 size_t i;
68 unsigned o = 0;
69
70 for (i = 0; i < npiece; i++) {
71 t = mp_fromuint64(t, x[i]);
72 t = mp_lsl(t, t, o);
73 y = mp_add(y, y, t);
74 o += piecewd;
75 }
76 printf(";; %s", what); MP_PRINT("", y); putchar('\n');
77 mp_drop(y); mp_drop(t);
78 }
79
80 static void scaf_dumpdbl(const char *what, const scaf_dblpiece *x,
81 size_t npiece, size_t piecewd)
82 {
83 mp *y = MP_ZERO, *t = MP_NEW;
84 size_t i;
85 unsigned o = 0;
86
87 for (i = 0; i < npiece; i++) {
88 t = mp_fromuint64(t, x[i]);
89 t = mp_lsl(t, t, o);
90 y = mp_add(y, y, t);
91 o += piecewd;
92 }
93 printf(";; %s", what); MP_PRINT("", y); putchar('\n');
94 mp_drop(y); mp_drop(t);
95 }
96
97 #endif
98
99 /*----- Main code ---------------------------------------------------------*/
100
101 /* --- @scaf_load@ --- *
102 *
103 * Arguments: @scaf_piece *z@ = where to write the result
104 * @const octet *b@ = source buffer to read
105 * @size_t sz@ = size of the source buffer
106 * @size_t npiece@ = number of pieces to read
107 * @unsigned piecewd@ = nominal width of pieces in bits
108 *
109 * Returns: ---
110 *
111 * Use: Loads a little-endian encoded scalar into a vector @z@ of
112 * single-precision pieces.
113 */
114
115 void scaf_load(scaf_piece *z, const octet *b, size_t sz,
116 size_t npiece, unsigned piecewd)
117 {
118 uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
119 unsigned i, j, n;
120
121 for (i = j = n = 0, a = 0; i < sz; i++) {
122 a |= b[i] << n; n += 8;
123 if (n >= piecewd) {
124 z[j++] = a&m; a >>= piecewd; n -= piecewd;
125 if (j >= npiece) return;
126 }
127 }
128 z[j++] = a;
129 while (j < npiece) z[j++] = 0;
130 }
131
132 /* --- @scaf_loaddbl@ --- *
133 *
134 * Arguments: @scaf_dblpiece *z@ = where to write the result
135 * @const octet *b@ = source buffer to read
136 * @size_t sz@ = size of the source buffer
137 * @size_t npiece@ = number of pieces to read
138 * @unsigned piecewd@ = nominal width of pieces in bits
139 *
140 * Returns: ---
141 *
142 * Use: Loads a little-endian encoded scalar into a vector @z@ of
143 * double-precision pieces.
144 */
145
146 void scaf_loaddbl(scaf_dblpiece *z, const octet *b, size_t sz,
147 size_t npiece, unsigned piecewd)
148 {
149 uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
150 unsigned i, j, n;
151
152 for (i = j = n = 0, a = 0; i < sz; i++) {
153 a |= b[i] << n; n += 8;
154 if (n >= piecewd) {
155 z[j++] = a&m; a >>= piecewd; n -= piecewd;
156 if (j >= npiece) return;
157 }
158 }
159 z[j++] = a;
160 while (j < npiece) z[j++] = 0;
161 }
162
163 /* --- @scaf_store@ --- *
164 *
165 * Arguments: @octet *b@ = buffer to fill in
166 * @size_t sz@ = size of the buffer
167 * @const scaf_piece *x@ = scalar to store
168 * @size_t npiece@ = number of pieces in @x@
169 * @unsigned piecewd@ = nominal width of pieces in bits
170 *
171 * Returns: ---
172 *
173 * Use: Stores a scalar in a vector of single-precison pieces as a
174 * little-endian vector of bytes.
175 */
176
177 void scaf_store(octet *b, size_t sz, const scaf_piece *x,
178 size_t npiece, unsigned piecewd)
179 {
180 uint32 a;
181 unsigned i, j, n;
182
183 for (i = j = n = 0, a = 0; i < npiece; i++) {
184 a |= x[i] << n; n += piecewd;
185 while (n >= 8) {
186 b[j++] = a&0xffu; a >>= 8; n -= 8;
187 if (j >= sz) return;
188 }
189 }
190 b[j++] = a;
191 memset(b + j, 0, sz - j);
192 }
193
194 /* --- @scaf_mul@ --- *
195 *
196 * Arguments: @scaf_dblpiece *z@ = where to put the answer
197 * @const scaf_piece *x, *y@ = the operands
198 * @size_t npiece@ = the length of the operands
199 *
200 * Returns: ---
201 *
202 * Use: Multiply two scalars. The destination must have space for
203 * @2*npiece@ pieces (though the last one will always be zero).
204 * The result is not reduced.
205 */
206
207 void scaf_mul(scaf_dblpiece *z, const scaf_piece *x, const scaf_piece *y,
208 size_t npiece)
209 {
210 unsigned i, j;
211
212 for (i = 0; i < 2*npiece; i++) z[i] = 0;
213
214 for (i = 0; i < npiece; i++)
215 for (j = 0; j < npiece; j++)
216 z[i + j] += (scaf_dblpiece)x[i]*y[j];
217 }
218
219 /* --- @scaf_reduce@ --- *
220 *
221 * Arguments: @scaf_piece *z@ = where to write the result
222 * @const scaf_dblpiece *x@ = the operand to reduce
223 * @const scaf_piece *l@ = the modulus, in internal format
224 * @const scaf_piece *mu@ = scaled approximation to @1/l@
225 * @size_t npiece@ = number of pieces in @l@
226 * @unsigned piecewd@ = nominal width of a piece in bits
227 * @scaf_piece *scratch@ = @3*npiece@ scratch pieces
228 *
229 * Returns: ---
230 *
231 * Use: Reduce @x@ (a vector of @2*npiece@ double-precision pieces)
232 * modulo @l@ (a vector of @npiece@ single-precision pieces),
233 * writing the result to @z@.
234 *
235 * Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%. The
236 * operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in
237 * @npiece + 1@ pieces. Furthermore, we must have
238 * %$3 l < B^n$%. (Fiddle with %$w$% if necessary.)
239 */
240
241 void scaf_reduce(scaf_piece *z, const scaf_dblpiece *x,
242 const scaf_piece *l, const scaf_piece *mu,
243 size_t npiece, unsigned piecewd, scaf_piece *scratch)
244 {
245 unsigned i, j;
246 scaf_piece *t = scratch, *q = scratch + 2*npiece;
247 scaf_piece u, m = ((scaf_piece)1 << piecewd) - 1;
248 scaf_dblpiece c;
249
250 /* This here is the hard part.
251 *
252 * Let w = PIECEWD, let n = NPIECE, and let B = 2^w. We must have
253 * B^(n-1) <= l < B^n.
254 *
255 * The argument MU contains pieces of the quantity µ = floor(B^2n/l), which
256 * is a scaled approximation to 1/l. We'll calculate
257 *
258 * q = floor(µ floor(x/B^(n-1))/B^(n+1))
259 *
260 * which is an underestimate of x/l.
261 *
262 * With a bit more precision: by definition, u - 1 < floor(u) <= u. Hence,
263 *
264 * B^2n/l - 1 < µ <= B^2/l
265 *
266 * and
267 *
268 * x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1)
269 *
270 * Multiplying these together, and dividing through by B^(n+1), gives
271 *
272 * floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <=
273 * q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l)
274 *
275 * Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and
276 * B^(n-1)/l are each less than 1; hence
277 *
278 * floor(x/l) - 2 <= q <= floor(x/l) <= x/l
279 *
280 * Now we set r = x - q l. Certainly, r == x (mod l); and
281 *
282 * 0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n
283 */
284
285 /* Before we start on the fancy stuff, we need to resolve the pending
286 * carries in x. We'll be doing the floor division by just ignoring some
287 * of the pieces, and it would be bad if we missed some significant bits.
288 * Of course, this means that we don't actually have to store the low
289 * NPIECE - 1 pieces of the result.
290 */
291 for (i = 0, c = 0; i < 2*npiece; i++)
292 { c += x[i]; t[i] = c&m; c >>= piecewd; }
293
294 /* Now we calculate q. If we calculate this in product-scanning order, we
295 * can avoid having to store the low NPIECE + 1 pieces of the product as
296 * long as we keep track of the carry out properly. Conveniently, NMU =
297 * NPIECE + 1, which keeps the loop bounds easy in the first pass.
298 *
299 * Furthermore, because we know that r fits in NPIECE pieces, we only need
300 * the low NPIECE pieces of q.
301 */
302 for (i = 0, c = 0; i < npiece + 1; i++) {
303 for (j = 0; j <= i; j++)
304 c += (scaf_dblpiece)t[j + npiece - 1]*mu[i - j];
305 c >>= piecewd;
306 }
307 for (i = 0; i < npiece; i++) {
308 for (j = i + 1; j < npiece + 1; j++)
309 c += (scaf_dblpiece)t[j + npiece - 1]*mu[npiece + 1 + i - j];
310 q[i] = c&m; c >>= piecewd;
311 }
312
313 /* Next, we calculate r - q l in z. Product-scanning seems to be working
314 * out for us, and this time it will save us needing a large temporary
315 * space for the product q l as we go. On the downside, we have to track
316 * the carries from the multiplication and subtraction separately.
317 *
318 * Notice that the result r is at most NPIECE pieces long, so we can stop
319 * once we have that many.
320 */
321 u = 1; c = 0;
322 for (i = 0; i < npiece; i++) {
323 for (j = 0; j <= i; j++) c += (scaf_dblpiece)q[j]*l[i - j];
324 u += t[i] + ((scaf_piece)(c&m) ^ m);
325 z[i] = u&m; u >>= piecewd; c >>= piecewd;
326 }
327
328 /* Finally, two passes of conditional subtraction. Calculate t = z - l; if
329 * there's no borrow out the top, then update z = t; otherwise leave t
330 * alone.
331 */
332 for (i = 0; i < 2; i++) {
333 for (j = 0, u = 1; j < npiece; j++) {
334 u += z[j] + (l[j] ^ m);
335 t[j] = u&m; u >>= piecewd;
336 }
337 for (j = 0, u = -u; j < npiece; j++) z[j] = (t[j]&u) | (z[j]&~u);
338 }
339 }
340
341 /*----- That's all, folks -------------------------------------------------*/