5 * (c) 2017 Straylight/Edgeware
8 /*----- Licensing notice --------------------------------------------------*
10 * This file is part of secnet.
11 * See README for full list of copyright holders.
13 * secnet is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version d of the License, or
16 * (at your option) any later version.
18 * secnet is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * version 3 along with secnet; if not, see
25 * https://www.gnu.org/licenses/gpl.html.
27 * This file was originally part of Catacomb, but has been automatically
28 * modified for incorporation into secnet: see `import-catacomb-crypto'
31 * Catacomb is free software; you can redistribute it and/or modify
32 * it under the terms of the GNU Library General Public License as
33 * published by the Free Software Foundation; either version 2 of the
34 * License, or (at your option) any later version.
36 * Catacomb is distributed in the hope that it will be useful,
37 * but WITHOUT ANY WARRANTY; without even the implied warranty of
38 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39 * GNU Library General Public License for more details.
41 * You should have received a copy of the GNU Library General Public
42 * License along with Catacomb; if not, write to the Free
43 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
47 /*----- Header files ------------------------------------------------------*/
53 /*----- Debugging utilties ------------------------------------------------*/
63 static void scaf_dump(const char *what
, const scaf_piece
*x
,
64 size_t npiece
, size_t piecewd
)
66 mp
*y
= MP_ZERO
, *t
= MP_NEW
;
70 for (i
= 0; i
< npiece
; i
++) {
71 t
= mp_fromuint64(t
, x
[i
]);
76 printf(";; %s", what
); MP_PRINT("", y
); putchar('\n');
77 mp_drop(y
); mp_drop(t
);
80 static void scaf_dumpdbl(const char *what
, const scaf_dblpiece
*x
,
81 size_t npiece
, size_t piecewd
)
83 mp
*y
= MP_ZERO
, *t
= MP_NEW
;
87 for (i
= 0; i
< npiece
; i
++) {
88 t
= mp_fromuint64(t
, x
[i
]);
93 printf(";; %s", what
); MP_PRINT("", y
); putchar('\n');
94 mp_drop(y
); mp_drop(t
);
99 /*----- Main code ---------------------------------------------------------*/
101 /* --- @scaf_load@ --- *
103 * Arguments: @scaf_piece *z@ = where to write the result
104 * @const octet *b@ = source buffer to read
105 * @size_t sz@ = size of the source buffer
106 * @size_t npiece@ = number of pieces to read
107 * @unsigned piecewd@ = nominal width of pieces in bits
111 * Use: Loads a little-endian encoded scalar into a vector @z@ of
112 * single-precision pieces.
115 void scaf_load(scaf_piece
*z
, const octet
*b
, size_t sz
,
116 size_t npiece
, unsigned piecewd
)
118 uint32 a
, m
= ((scaf_piece
)1 << piecewd
) - 1;
121 for (i
= j
= n
= 0, a
= 0; i
< sz
; i
++) {
122 a
|= b
[i
] << n
; n
+= 8;
124 z
[j
++] = a
&m
; a
>>= piecewd
; n
-= piecewd
;
125 if (j
>= npiece
) return;
129 while (j
< npiece
) z
[j
++] = 0;
132 /* --- @scaf_loaddbl@ --- *
134 * Arguments: @scaf_dblpiece *z@ = where to write the result
135 * @const octet *b@ = source buffer to read
136 * @size_t sz@ = size of the source buffer
137 * @size_t npiece@ = number of pieces to read
138 * @unsigned piecewd@ = nominal width of pieces in bits
142 * Use: Loads a little-endian encoded scalar into a vector @z@ of
143 * double-precision pieces.
146 void scaf_loaddbl(scaf_dblpiece
*z
, const octet
*b
, size_t sz
,
147 size_t npiece
, unsigned piecewd
)
149 uint32 a
, m
= ((scaf_piece
)1 << piecewd
) - 1;
152 for (i
= j
= n
= 0, a
= 0; i
< sz
; i
++) {
153 a
|= b
[i
] << n
; n
+= 8;
155 z
[j
++] = a
&m
; a
>>= piecewd
; n
-= piecewd
;
156 if (j
>= npiece
) return;
160 while (j
< npiece
) z
[j
++] = 0;
163 /* --- @scaf_store@ --- *
165 * Arguments: @octet *b@ = buffer to fill in
166 * @size_t sz@ = size of the buffer
167 * @const scaf_piece *x@ = scalar to store
168 * @size_t npiece@ = number of pieces in @x@
169 * @unsigned piecewd@ = nominal width of pieces in bits
173 * Use: Stores a scalar in a vector of single-precison pieces as a
174 * little-endian vector of bytes.
177 void scaf_store(octet
*b
, size_t sz
, const scaf_piece
*x
,
178 size_t npiece
, unsigned piecewd
)
183 for (i
= j
= n
= 0, a
= 0; i
< npiece
; i
++) {
184 a
|= x
[i
] << n
; n
+= piecewd
;
186 b
[j
++] = a
&0xffu
; a
>>= 8; n
-= 8;
191 memset(b
+ j
, 0, sz
- j
);
194 /* --- @scaf_mul@ --- *
196 * Arguments: @scaf_dblpiece *z@ = where to put the answer
197 * @const scaf_piece *x, *y@ = the operands
198 * @size_t npiece@ = the length of the operands
202 * Use: Multiply two scalars. The destination must have space for
203 * @2*npiece@ pieces (though the last one will always be zero).
204 * The result is not reduced.
207 void scaf_mul(scaf_dblpiece
*z
, const scaf_piece
*x
, const scaf_piece
*y
,
212 for (i
= 0; i
< 2*npiece
; i
++) z
[i
] = 0;
214 for (i
= 0; i
< npiece
; i
++)
215 for (j
= 0; j
< npiece
; j
++)
216 z
[i
+ j
] += (scaf_dblpiece
)x
[i
]*y
[j
];
219 /* --- @scaf_reduce@ --- *
221 * Arguments: @scaf_piece *z@ = where to write the result
222 * @const scaf_dblpiece *x@ = the operand to reduce
223 * @const scaf_piece *l@ = the modulus, in internal format
224 * @const scaf_piece *mu@ = scaled approximation to @1/l@
225 * @size_t npiece@ = number of pieces in @l@
226 * @unsigned piecewd@ = nominal width of a piece in bits
227 * @scaf_piece *scratch@ = @3*npiece@ scratch pieces
231 * Use: Reduce @x@ (a vector of @2*npiece@ double-precision pieces)
232 * modulo @l@ (a vector of @npiece@ single-precision pieces),
233 * writing the result to @z@.
235 * Write @n = npiece@, @w = piecewd@, and %$B = 2^w$%. The
236 * operand @mu@ must contain %$\lfloor B^{2n}/l \rfloor$%, in
237 * @npiece + 1@ pieces. Furthermore, we must have
238 * %$3 l < B^n$%. (Fiddle with %$w$% if necessary.)
241 void scaf_reduce(scaf_piece
*z
, const scaf_dblpiece
*x
,
242 const scaf_piece
*l
, const scaf_piece
*mu
,
243 size_t npiece
, unsigned piecewd
, scaf_piece
*scratch
)
246 scaf_piece
*t
= scratch
, *q
= scratch
+ 2*npiece
;
247 scaf_piece u
, m
= ((scaf_piece
)1 << piecewd
) - 1;
250 /* This here is the hard part.
252 * Let w = PIECEWD, let n = NPIECE, and let B = 2^w. We must have
253 * B^(n-1) <= l < B^n.
255 * The argument MU contains pieces of the quantity µ = floor(B^2n/l), which
256 * is a scaled approximation to 1/l. We'll calculate
258 * q = floor(µ floor(x/B^(n-1))/B^(n+1))
260 * which is an underestimate of x/l.
262 * With a bit more precision: by definition, u - 1 < floor(u) <= u. Hence,
264 * B^2n/l - 1 < µ <= B^2/l
268 * x/B^(n-1) - 1 < floor(x/B^(n-1)) <= x/B^(n-1)
270 * Multiplying these together, and dividing through by B^(n+1), gives
272 * floor(x/l - B^(n-1)/l - x/B^2n + 1/B^(n+1)) <=
273 * q <= µ floor(x/B^(n-1))/B^(n+1) <= floor(x/l)
275 * Now, noticing that x < B^2n and l > B^(n-1) shows that x/B^2n and
276 * B^(n-1)/l are each less than 1; hence
278 * floor(x/l) - 2 <= q <= floor(x/l) <= x/l
280 * Now we set r = x - q l. Certainly, r == x (mod l); and
282 * 0 <= r < x - l floor(x/l) + 2 l < 3 l < B^n
285 /* Before we start on the fancy stuff, we need to resolve the pending
286 * carries in x. We'll be doing the floor division by just ignoring some
287 * of the pieces, and it would be bad if we missed some significant bits.
288 * Of course, this means that we don't actually have to store the low
289 * NPIECE - 1 pieces of the result.
291 for (i
= 0, c
= 0; i
< 2*npiece
; i
++)
292 { c
+= x
[i
]; t
[i
] = c
&m
; c
>>= piecewd
; }
294 /* Now we calculate q. If we calculate this in product-scanning order, we
295 * can avoid having to store the low NPIECE + 1 pieces of the product as
296 * long as we keep track of the carry out properly. Conveniently, NMU =
297 * NPIECE + 1, which keeps the loop bounds easy in the first pass.
299 * Furthermore, because we know that r fits in NPIECE pieces, we only need
300 * the low NPIECE pieces of q.
302 for (i
= 0, c
= 0; i
< npiece
+ 1; i
++) {
303 for (j
= 0; j
<= i
; j
++)
304 c
+= (scaf_dblpiece
)t
[j
+ npiece
- 1]*mu
[i
- j
];
307 for (i
= 0; i
< npiece
; i
++) {
308 for (j
= i
+ 1; j
< npiece
+ 1; j
++)
309 c
+= (scaf_dblpiece
)t
[j
+ npiece
- 1]*mu
[npiece
+ 1 + i
- j
];
310 q
[i
] = c
&m
; c
>>= piecewd
;
313 /* Next, we calculate r - q l in z. Product-scanning seems to be working
314 * out for us, and this time it will save us needing a large temporary
315 * space for the product q l as we go. On the downside, we have to track
316 * the carries from the multiplication and subtraction separately.
318 * Notice that the result r is at most NPIECE pieces long, so we can stop
319 * once we have that many.
322 for (i
= 0; i
< npiece
; i
++) {
323 for (j
= 0; j
<= i
; j
++) c
+= (scaf_dblpiece
)q
[j
]*l
[i
- j
];
324 u
+= t
[i
] + ((scaf_piece
)(c
&m
) ^ m
);
325 z
[i
] = u
&m
; u
>>= piecewd
; c
>>= piecewd
;
328 /* Finally, two passes of conditional subtraction. Calculate t = z - l; if
329 * there's no borrow out the top, then update z = t; otherwise leave t
332 for (i
= 0; i
< 2; i
++) {
333 for (j
= 0, u
= 1; j
< npiece
; j
++) {
334 u
+= z
[j
] + (l
[j
] ^ m
);
335 t
[j
] = u
&m
; u
>>= piecewd
;
337 for (j
= 0, u
= -u
; j
< npiece
; j
++) z
[j
] = (t
[j
]&u
) | (z
[j
]&~u
);
341 /*----- That's all, folks -------------------------------------------------*/