pub/, progs/: Implement Bernstein's Ed25519 signature scheme.
[catacomb] / math / scaf.c
CommitLineData
d56fd9d1
MW
1/* -*-c-*-
2 *
3 * Simple scalar fields
4 *
5 * (c) 2017 Straylight/Edgeware
6 */
7
8/*----- Licensing notice --------------------------------------------------*
9 *
10 * This file is part of Catacomb.
11 *
12 * Catacomb is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Library General Public License as
14 * published by the Free Software Foundation; either version 2 of the
15 * License, or (at your option) any later version.
16 *
17 * Catacomb is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public
23 * License along with Catacomb; if not, write to the Free
24 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 * MA 02111-1307, USA.
26 */
27
28/*----- Header files ------------------------------------------------------*/
29
30#include <string.h>
31
32#include "scaf.h"
33
34/*----- Main code ---------------------------------------------------------*/
35
36/* --- @scaf_load@ --- *
37 *
38 * Arguments: @scaf_piece *z@ = where to write the result
39 * @const octet *b@ = source buffer to read
40 * @size_t sz@ = size of the source buffer
41 * @size_t npiece@ = number of pieces to read
42 * @unsigned piecewd@ = nominal width of pieces in bits
43 *
44 * Returns: ---
45 *
46 * Use: Loads a little-endian encoded scalar into a vector @z@ of
47 * single-precision pieces.
48 */
49
50void scaf_load(scaf_piece *z, const octet *b, size_t sz,
51 size_t npiece, unsigned piecewd)
52{
53 uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
54 unsigned i, j, n;
55
56 for (i = j = n = 0, a = 0; i < sz; i++) {
57 a |= b[i] << n; n += 8;
58 if (n >= piecewd) {
59 z[j++] = a&m; a >>= piecewd; n -= piecewd;
60 if (j >= npiece) return;
61 }
62 }
63 z[j++] = a;
64 while (j < npiece) z[j++] = 0;
65}
66
67/* --- @scaf_loaddbl@ --- *
68 *
69 * Arguments: @scaf_dblpiece *z@ = where to write the result
70 * @const octet *b@ = source buffer to read
71 * @size_t sz@ = size of the source buffer
72 * @size_t npiece@ = number of pieces to read
73 * @unsigned piecewd@ = nominal width of pieces in bits
74 *
75 * Returns: ---
76 *
77 * Use: Loads a little-endian encoded scalar into a vector @z@ of
78 * double-precision pieces.
79 */
80
81void scaf_loaddbl(scaf_dblpiece *z, const octet *b, size_t sz,
82 size_t npiece, unsigned piecewd)
83{
84 uint32 a, m = ((scaf_piece)1 << piecewd) - 1;
85 unsigned i, j, n;
86
87 for (i = j = n = 0, a = 0; i < sz; i++) {
88 a |= b[i] << n; n += 8;
89 if (n >= piecewd) {
90 z[j++] = a&m; a >>= piecewd; n -= piecewd;
91 if (j >= npiece) return;
92 }
93 }
94 z[j++] = a;
95 while (j < npiece) z[j++] = 0;
96}
97
98/* --- @scaf_store@ --- *
99 *
100 * Arguments: @octet *b@ = buffer to fill in
101 * @size_t sz@ = size of the buffer
102 * @const scaf_piece *x@ = scalar to store
103 * @size_t npiece@ = number of pieces in @x@
104 * @unsigned piecewd@ = nominal width of pieces in bits
105 *
106 * Returns: ---
107 *
108 * Use: Stores a scalar in a vector of single-precison pieces as a
109 * little-endian vector of bytes.
110 */
111
112void scaf_store(octet *b, size_t sz, const scaf_piece *x,
113 size_t npiece, unsigned piecewd)
114{
115 uint32 a;
116 unsigned i, j, n;
117
118 for (i = j = n = 0, a = 0; i < npiece; i++) {
119 a |= x[i] << n; n += piecewd;
120 while (n >= 8) {
121 b[j++] = a&0xffu; a >>= 8; n -= 8;
122 if (j >= sz) return;
123 }
124 }
125 b[j++] = a;
126 memset(b + j, 0, sz - j);
127}
128
129/* --- @scaf_mul@ --- *
130 *
131 * Arguments: @scaf_dblpiece *z@ = where to put the answer
132 * @const scaf_piece *x, *y@ = the operands
133 * @size_t npiece@ = the length of the operands
134 *
135 * Returns: ---
136 *
137 * Use: Multiply two scalars. The destination must have space for
138 * @2*npiece@ pieces (though the last one will always be zero).
139 * The result is not reduced.
140 */
141
142void scaf_mul(scaf_dblpiece *z, const scaf_piece *x, const scaf_piece *y,
143 size_t npiece)
144{
145 unsigned i, j;
146
147 for (i = 0; i < 2*npiece; i++) z[i] = 0;
148
149 for (i = 0; i < npiece; i++)
150 for (j = 0; j < npiece; j++)
151 z[i + j] += (scaf_dblpiece)x[i]*y[j];
152}
153
154/* --- @scaf_reduce@ --- *
155 *
156 * Arguments: @scaf_piece *z@ = where to write the result
157 * @const scaf_dblpiece *x@ = the operand to reduce
158 * @const scaf_piece *l@ = the modulus, in internal format
159 * @const scaf_piece *mu@ = scaled approximation to @1/l@
160 * @size_t npiece@ = number of pieces in @l@
161 * @unsigned piecewd@ = nominal width of a piece in bits
162 * @scaf_piece *scratch@ = @3*npiece + 1@ scratch pieces
163 *
164 * Returns: ---
165 *
166 * Use: Reduce @x@ (a vector of @2*npiece@ double-precision pieces)
167 * modulo @l@ (a vector of @npiece@ single-precision pieces),
168 * writing the result to @z@.
169 *
170 * Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%. The
171 * operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in
172 * @npiece + 1@ pieces. Furthermore, we must have
173 * %$3 l < B^n$%. (Fiddle with %$w$% if necessary.)
174 */
175
176void scaf_reduce(scaf_piece *z, const scaf_dblpiece *x,
177 const scaf_piece *l, const scaf_piece *mu,
178 size_t npiece, unsigned piecewd, scaf_piece *scratch)
179{
180 unsigned i, j;
181 scaf_piece *t = scratch, *q = scratch + 2*npiece;
182 scaf_piece u, m = ((scaf_piece)1 << piecewd) - 1;
183 scaf_dblpiece c;
184
185 /* This here is the hard part.
186 *
187 * Let w = PIECEWD, let n = NPIECE, and let B = 2^w. Wwe must have
188 * B^(n-1) <= l < B^n.
189 *
190 * The argument MU contains pieces of the quantity µ = floor(B^2n/l), which
191 * is a scaled approximation to 1/l. We'll calculate
192 *
193 * q = floor(µ floor(x/B^(n-1))/B^(n+1))
194 *
195 * which is an underestimate of x/l.
196 *
197 * With a bit more precision: by definition, u - 1 < floor(u) <= u. Hence,
198 *
199 * B^2n/l - 1 < µ <= B^2/l
200 *
201 * and
202 *
203 * x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1)
204 *
205 * Multiplying these together, and dividing through by B^(n+1), gives
206 *
207 * floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <=
208 * q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l)
209 *
210 * Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and
211 * B^(n-1)/l are each less than 1; hence
212 *
213 * floor(x/l) - 2 <= q <= floor(x/l) <= x/l
214 *
215 * Now we set r = x - q l. Certainly, r == x (mod l); and
216 *
217 * 0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n
218 */
219
220 /* Before we start on the fancy stuff, we need to resolve the pending
221 * carries in x. We'll be doing the floor division by just ignoring some
222 * of the pieces, and it would be bad if we missed some significant bits.
223 * Of course, this means that we don't actually have to store the low
224 * NPIECE - 1 pieces of the result.
225 */
226 for (i = 0, c = 0; i < 2*npiece; i++)
227 { c += x[i]; t[i] = c&m; c >>= piecewd; }
228
229 /* Now we calculate q. If we calculate this in product-scanning order, we
230 * can avoid having to store the low NPIECE + 1 pieces of the product as
231 * long as we keep track of the carry out properly. Conveniently, NMU =
232 * NPIECE + 1, which keeps the loop bounds easy in the first pass.
233 *
234 * Furthermore, because we know that r fits in NPIECE pieces, we only need
235 * the low NPIECE pieces of q.
236 */
237 for (i = 0, c = 0; i < npiece + 1; i++) {
238 for (j = 0; j <= i; j++)
239 c += (scaf_dblpiece)t[j + npiece - 1]*mu[i - j];
240 c >>= piecewd;
241 }
242 for (i = 0; i < npiece; i++) {
243 for (j = i + 1; j < npiece + 1; j++)
244 c += (scaf_dblpiece)t[j + npiece - 1]*mu[npiece + 1 + i - j];
245 q[i] = c&m; c >>= piecewd;
246 }
247
248 /* Next, we calculate r - q l in z. Product-scanning seems to be working
249 * out for us, and this time it will save us needing a large temporary
250 * space for the product q l as we go. On the downside, we have to track
251 * the carries from the multiplication and subtraction separately.
252 *
253 * Notice that the result r is at most NPIECE pieces long, so we can stop
254 * once we have that many.
255 */
256 u = 1; c = 0;
257 for (i = 0; i < npiece; i++) {
258 for (j = 0; j <= i; j++) c += (scaf_dblpiece)q[j]*l[i - j];
259 u += t[i] + ((scaf_piece)(c&m) ^ m);
260 z[i] = u&m; u >>= piecewd; c >>= piecewd;
261 }
262
263 /* Finally, two passes of conditional subtraction. Calculate t = z - l; if
264 * there's no borrow out the top, then update z = t; otherwise leave t
265 * alone.
266 */
267 for (i = 0; i < 2; i++) {
268 for (j = 0, u = 1; j < npiece; j++) {
269 u += z[j] + (l[j] ^ m);
270 t[j] = u&m; u >>= piecewd;
271 }
272 for (j = 0, u = -u; j < npiece; j++) z[i] = (t[i]&u) | (z[i]&~u);
273 }
274}
275
276/*----- That's all, folks -------------------------------------------------*/