Commit | Line | Data |
---|---|---|
d03ab969 | 1 | /* -*-c-*- |
2 | * | |
d03ab969 | 3 | * Low-level multiprecision arithmetic |
4 | * | |
5 | * (c) 1999 Straylight/Edgeware | |
6 | */ | |
7 | ||
45c0fd36 | 8 | /*----- Licensing notice --------------------------------------------------* |
d03ab969 | 9 | * |
10 | * This file is part of Catacomb. | |
11 | * | |
12 | * Catacomb is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU Library General Public License as | |
14 | * published by the Free Software Foundation; either version 2 of the | |
15 | * License, or (at your option) any later version. | |
45c0fd36 | 16 | * |
d03ab969 | 17 | * Catacomb is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU Library General Public License for more details. | |
45c0fd36 | 21 | * |
d03ab969 | 22 | * You should have received a copy of the GNU Library General Public |
23 | * License along with Catacomb; if not, write to the Free | |
24 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
25 | * MA 02111-1307, USA. | |
26 | */ | |
27 | ||
d03ab969 | 28 | /*----- Header files ------------------------------------------------------*/ |
29 | ||
c8a2f9ef | 30 | #include <assert.h> |
d03ab969 | 31 | #include <stdio.h> |
32 | #include <stdlib.h> | |
33 | #include <string.h> | |
34 | ||
35 | #include <mLib/bits.h> | |
23bbea75 | 36 | #include <mLib/macros.h> |
d03ab969 | 37 | |
38 | #include "mptypes.h" | |
39 | #include "mpx.h" | |
75263f25 | 40 | #include "bitops.h" |
d03ab969 | 41 | |
42 | /*----- Loading and storing -----------------------------------------------*/ | |
43 | ||
44 | /* --- @mpx_storel@ --- * | |
45 | * | |
46 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 47 | * @void *pp@ = pointer to octet array |
d03ab969 | 48 | * @size_t sz@ = size of octet array |
49 | * | |
50 | * Returns: --- | |
51 | * | |
52 | * Use: Stores an MP in an octet array, least significant octet | |
53 | * first. High-end octets are silently discarded if there | |
54 | * isn't enough space for them. | |
55 | */ | |
56 | ||
c8a2f9ef | 57 | void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 | 58 | { |
59 | mpw n, w = 0; | |
c8a2f9ef | 60 | octet *p = pp, *q = p + sz; |
d03ab969 | 61 | unsigned bits = 0; |
62 | ||
63 | while (p < q) { | |
64 | if (bits < 8) { | |
65 | if (v >= vl) { | |
66 | *p++ = U8(w); | |
67 | break; | |
68 | } | |
69 | n = *v++; | |
70 | *p++ = U8(w | n << bits); | |
71 | w = n >> (8 - bits); | |
72 | bits += MPW_BITS - 8; | |
73 | } else { | |
74 | *p++ = U8(w); | |
75 | w >>= 8; | |
76 | bits -= 8; | |
77 | } | |
78 | } | |
79 | memset(p, 0, q - p); | |
80 | } | |
81 | ||
82 | /* --- @mpx_loadl@ --- * | |
83 | * | |
84 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 85 | * @const void *pp@ = pointer to octet array |
d03ab969 | 86 | * @size_t sz@ = size of octet array |
87 | * | |
88 | * Returns: --- | |
89 | * | |
90 | * Use: Loads an MP in an octet array, least significant octet | |
91 | * first. High-end octets are ignored if there isn't enough | |
92 | * space for them. | |
93 | */ | |
94 | ||
c8a2f9ef | 95 | void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 | 96 | { |
97 | unsigned n; | |
c8a2f9ef | 98 | mpw w = 0; |
99 | const octet *p = pp, *q = p + sz; | |
d03ab969 | 100 | unsigned bits = 0; |
101 | ||
102 | if (v >= vl) | |
103 | return; | |
104 | while (p < q) { | |
105 | n = U8(*p++); | |
106 | w |= n << bits; | |
107 | bits += 8; | |
108 | if (bits >= MPW_BITS) { | |
109 | *v++ = MPW(w); | |
110 | w = n >> (MPW_BITS - bits + 8); | |
111 | bits -= MPW_BITS; | |
112 | if (v >= vl) | |
113 | return; | |
114 | } | |
115 | } | |
116 | *v++ = w; | |
117 | MPX_ZERO(v, vl); | |
118 | } | |
119 | ||
120 | /* --- @mpx_storeb@ --- * | |
121 | * | |
122 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 123 | * @void *pp@ = pointer to octet array |
d03ab969 | 124 | * @size_t sz@ = size of octet array |
125 | * | |
126 | * Returns: --- | |
127 | * | |
128 | * Use: Stores an MP in an octet array, most significant octet | |
129 | * first. High-end octets are silently discarded if there | |
130 | * isn't enough space for them. | |
131 | */ | |
132 | ||
c8a2f9ef | 133 | void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 | 134 | { |
135 | mpw n, w = 0; | |
c8a2f9ef | 136 | octet *p = pp, *q = p + sz; |
d03ab969 | 137 | unsigned bits = 0; |
138 | ||
139 | while (q > p) { | |
140 | if (bits < 8) { | |
141 | if (v >= vl) { | |
142 | *--q = U8(w); | |
143 | break; | |
144 | } | |
145 | n = *v++; | |
146 | *--q = U8(w | n << bits); | |
147 | w = n >> (8 - bits); | |
148 | bits += MPW_BITS - 8; | |
149 | } else { | |
150 | *--q = U8(w); | |
151 | w >>= 8; | |
152 | bits -= 8; | |
153 | } | |
154 | } | |
155 | memset(p, 0, q - p); | |
156 | } | |
157 | ||
158 | /* --- @mpx_loadb@ --- * | |
159 | * | |
160 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 161 | * @const void *pp@ = pointer to octet array |
d03ab969 | 162 | * @size_t sz@ = size of octet array |
163 | * | |
164 | * Returns: --- | |
165 | * | |
166 | * Use: Loads an MP in an octet array, most significant octet | |
167 | * first. High-end octets are ignored if there isn't enough | |
168 | * space for them. | |
169 | */ | |
170 | ||
c8a2f9ef | 171 | void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 | 172 | { |
173 | unsigned n; | |
c8a2f9ef | 174 | mpw w = 0; |
175 | const octet *p = pp, *q = p + sz; | |
d03ab969 | 176 | unsigned bits = 0; |
177 | ||
178 | if (v >= vl) | |
179 | return; | |
180 | while (q > p) { | |
181 | n = U8(*--q); | |
182 | w |= n << bits; | |
183 | bits += 8; | |
184 | if (bits >= MPW_BITS) { | |
185 | *v++ = MPW(w); | |
186 | w = n >> (MPW_BITS - bits + 8); | |
187 | bits -= MPW_BITS; | |
188 | if (v >= vl) | |
189 | return; | |
190 | } | |
191 | } | |
192 | *v++ = w; | |
193 | MPX_ZERO(v, vl); | |
194 | } | |
195 | ||
f09e814a | 196 | /* --- @mpx_storel2cn@ --- * |
197 | * | |
198 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
199 | * @void *pp@ = pointer to octet array | |
200 | * @size_t sz@ = size of octet array | |
201 | * | |
202 | * Returns: --- | |
203 | * | |
204 | * Use: Stores a negative MP in an octet array, least significant | |
205 | * octet first, as two's complement. High-end octets are | |
206 | * silently discarded if there isn't enough space for them. | |
207 | * This obviously makes the output bad. | |
208 | */ | |
209 | ||
210 | void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) | |
211 | { | |
212 | unsigned c = 1; | |
213 | unsigned b = 0; | |
214 | mpw n, w = 0; | |
215 | octet *p = pp, *q = p + sz; | |
216 | unsigned bits = 0; | |
217 | ||
218 | while (p < q) { | |
219 | if (bits < 8) { | |
220 | if (v >= vl) { | |
221 | b = w; | |
222 | break; | |
223 | } | |
224 | n = *v++; | |
225 | b = w | n << bits; | |
226 | w = n >> (8 - bits); | |
227 | bits += MPW_BITS - 8; | |
228 | } else { | |
229 | b = w; | |
230 | w >>= 8; | |
231 | bits -= 8; | |
232 | } | |
233 | b = U8(~b + c); | |
2bd53494 | 234 | c = c && !b; |
f09e814a | 235 | *p++ = b; |
236 | } | |
237 | while (p < q) { | |
238 | b = U8(~b + c); | |
2bd53494 | 239 | c = c && !b; |
f09e814a | 240 | *p++ = b; |
241 | b = 0; | |
242 | } | |
243 | } | |
244 | ||
245 | /* --- @mpx_loadl2cn@ --- * | |
246 | * | |
247 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
248 | * @const void *pp@ = pointer to octet array | |
249 | * @size_t sz@ = size of octet array | |
250 | * | |
251 | * Returns: --- | |
252 | * | |
253 | * Use: Loads a negative MP in an octet array, least significant | |
254 | * octet first, as two's complement. High-end octets are | |
255 | * ignored if there isn't enough space for them. This probably | |
256 | * means you made the wrong choice coming here. | |
257 | */ | |
258 | ||
259 | void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) | |
260 | { | |
261 | unsigned n; | |
262 | unsigned c = 1; | |
263 | mpw w = 0; | |
264 | const octet *p = pp, *q = p + sz; | |
265 | unsigned bits = 0; | |
266 | ||
267 | if (v >= vl) | |
268 | return; | |
269 | while (p < q) { | |
270 | n = U8(~(*p++) + c); | |
2bd53494 | 271 | c = c && !n; |
f09e814a | 272 | w |= n << bits; |
273 | bits += 8; | |
274 | if (bits >= MPW_BITS) { | |
275 | *v++ = MPW(w); | |
276 | w = n >> (MPW_BITS - bits + 8); | |
277 | bits -= MPW_BITS; | |
278 | if (v >= vl) | |
279 | return; | |
280 | } | |
281 | } | |
282 | *v++ = w; | |
283 | MPX_ZERO(v, vl); | |
284 | } | |
285 | ||
286 | /* --- @mpx_storeb2cn@ --- * | |
287 | * | |
288 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
289 | * @void *pp@ = pointer to octet array | |
290 | * @size_t sz@ = size of octet array | |
291 | * | |
292 | * Returns: --- | |
293 | * | |
294 | * Use: Stores a negative MP in an octet array, most significant | |
295 | * octet first, as two's complement. High-end octets are | |
296 | * silently discarded if there isn't enough space for them, | |
297 | * which probably isn't what you meant. | |
298 | */ | |
299 | ||
300 | void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) | |
301 | { | |
302 | mpw n, w = 0; | |
303 | unsigned b = 0; | |
304 | unsigned c = 1; | |
305 | octet *p = pp, *q = p + sz; | |
306 | unsigned bits = 0; | |
307 | ||
308 | while (q > p) { | |
309 | if (bits < 8) { | |
310 | if (v >= vl) { | |
311 | b = w; | |
312 | break; | |
313 | } | |
314 | n = *v++; | |
315 | b = w | n << bits; | |
316 | w = n >> (8 - bits); | |
317 | bits += MPW_BITS - 8; | |
318 | } else { | |
319 | b = w; | |
320 | w >>= 8; | |
321 | bits -= 8; | |
322 | } | |
323 | b = U8(~b + c); | |
2bd53494 | 324 | c = c && !b; |
f09e814a | 325 | *--q = b; |
326 | } | |
327 | while (q > p) { | |
328 | b = ~b + c; | |
2bd53494 | 329 | c = c && !(b & 0xff); |
f09e814a | 330 | *--q = b; |
331 | b = 0; | |
332 | } | |
333 | } | |
334 | ||
335 | /* --- @mpx_loadb2cn@ --- * | |
336 | * | |
337 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
338 | * @const void *pp@ = pointer to octet array | |
339 | * @size_t sz@ = size of octet array | |
340 | * | |
341 | * Returns: --- | |
342 | * | |
343 | * Use: Loads a negative MP in an octet array, most significant octet | |
344 | * first as two's complement. High-end octets are ignored if | |
345 | * there isn't enough space for them. This probably means you | |
346 | * chose this function wrongly. | |
347 | */ | |
348 | ||
349 | void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz) | |
350 | { | |
351 | unsigned n; | |
352 | unsigned c = 1; | |
353 | mpw w = 0; | |
354 | const octet *p = pp, *q = p + sz; | |
355 | unsigned bits = 0; | |
356 | ||
357 | if (v >= vl) | |
358 | return; | |
359 | while (q > p) { | |
360 | n = U8(~(*--q) + c); | |
2bd53494 | 361 | c = c && !n; |
f09e814a | 362 | w |= n << bits; |
363 | bits += 8; | |
364 | if (bits >= MPW_BITS) { | |
365 | *v++ = MPW(w); | |
366 | w = n >> (MPW_BITS - bits + 8); | |
367 | bits -= MPW_BITS; | |
368 | if (v >= vl) | |
369 | return; | |
370 | } | |
371 | } | |
372 | *v++ = w; | |
373 | MPX_ZERO(v, vl); | |
374 | } | |
375 | ||
d03ab969 | 376 | /*----- Logical shifting --------------------------------------------------*/ |
377 | ||
5ee480b5 | 378 | /* --- @MPX_SHIFT1@ --- * |
d03ab969 | 379 | * |
5ee480b5 MW |
380 | * Arguments: @init@ = initial accumulator value |
381 | * @out@ = expression to store in each output word | |
382 | * @next@ = expression for next accumulator value | |
d03ab969 | 383 | * |
5ee480b5 MW |
384 | * Use: Performs a single-position shift. The input is scanned |
385 | * right-to-left. In the expressions @out@ and @next@, the | |
386 | * accumulator is available in @w@ and the current input word is | |
387 | * in @t@. | |
d03ab969 | 388 | * |
5ee480b5 MW |
389 | * This macro is intended to be used in the @shift1@ argument of |
390 | * @MPX_SHIFTOP@, and expects variables describing the operation | |
391 | * to be set up accordingly. | |
d03ab969 | 392 | */ |
393 | ||
5ee480b5 MW |
394 | #define MPX_SHIFT1(init, out, next) do { \ |
395 | mpw t, w = (init); \ | |
396 | while (av < avl) { \ | |
397 | if (dv >= dvl) break; \ | |
398 | t = MPW(*av++); \ | |
399 | *dv++ = (out); \ | |
400 | w = (next); \ | |
401 | } \ | |
402 | if (dv < dvl) { *dv++ = MPW(w); MPX_ZERO(dv, dvl); } \ | |
403 | } while (0) | |
404 | ||
405 | /* --- @MPX_SHIFTW@ --- * | |
406 | * | |
407 | * Arguments: @max@ = the maximum shift (in words) which is nontrivial | |
408 | * @clear@ = function (or macro) to clear low-order output words | |
409 | * @copy@ = statement to copy words from input to output | |
410 | * | |
411 | * Use: Performs a shift by a whole number of words. If the shift | |
412 | * amount is @max@ or more words, then the destination is | |
413 | * @clear@ed entirely; otherwise, @copy@ is executed. | |
414 | * | |
415 | * This macro is intended to be used in the @shiftw@ argument of | |
416 | * @MPX_SHIFTOP@, and expects variables describing the operation | |
417 | * to be set up accordingly. | |
418 | */ | |
d03ab969 | 419 | |
5ee480b5 MW |
420 | #define MPX_SHIFTW(max, clear, copy) do { \ |
421 | if (nw >= (max)) clear(dv, dvl); \ | |
422 | else copy \ | |
423 | } while (0) | |
d03ab969 | 424 | |
5ee480b5 MW |
425 | /* --- @MPX_SHIFTOP@ --- * |
426 | * | |
427 | * Arguments: @name@ = name of function to define (without `@mpx_@' prefix) | |
428 | * @shift1@ = statement to shift by a single bit | |
429 | * @shiftw@ = statement to shift by a whole number of words | |
430 | * @shift@ = statement to perform a general shift | |
431 | * | |
432 | * Use: Emits a shift operation. The input is @av@..@avl@; the | |
433 | * output is @dv@..@dvl@; and the shift amount (in bits) is | |
434 | * @n@. In @shiftw@ and @shift@, @nw@ and @nb@ are set up such | |
435 | * that @n = nw*MPW_BITS + nb@ and @nb < MPW_BITS@. | |
436 | */ | |
d03ab969 | 437 | |
5ee480b5 MW |
438 | #define MPX_SHIFTOP(name, shift1, shiftw, shift) \ |
439 | \ | |
440 | void mpx_##name(mpw *dv, mpw *dvl, \ | |
441 | const mpw *av, const mpw *avl, \ | |
442 | size_t n) \ | |
443 | { \ | |
444 | \ | |
445 | if (n == 0) \ | |
446 | MPX_COPY(dv, dvl, av, avl); \ | |
447 | else if (n == 1) \ | |
448 | do shift1 while (0); \ | |
449 | else { \ | |
450 | size_t nw = n/MPW_BITS; \ | |
451 | unsigned nb = n%MPW_BITS; \ | |
452 | if (!nb) do shiftw while (0); \ | |
453 | else do shift while (0); \ | |
454 | } \ | |
455 | } | |
d03ab969 | 456 | |
5ee480b5 MW |
457 | /* --- @MPX_SHIFT_LEFT@ --- * |
458 | * | |
459 | * Arguments: @name@ = name of function to define (without `@mpx_@' prefix) | |
460 | * @init1@ = initializer for single-bit shift accumulator | |
461 | * @clear@ = function (or macro) to clear low-order output words | |
462 | * @flush@ = expression for low-order nontrivial output word | |
463 | * | |
464 | * Use: Emits a left-shift operation. This expands to a call on | |
465 | * @MPX_SHIFTOP@, but implements the complicated @shift@ | |
466 | * statement. | |
467 | * | |
468 | * The @init1@ argument is as for @MPX_SHIFT1@, and @clear@ is | |
469 | * as for @MPX_SHIFTW@ (though is used elsewhere). In a general | |
470 | * shift, @nw@ whole low-order output words are set using | |
471 | * @clear@; high-order words are zeroed; and the remaining words | |
472 | * set with a left-to-right pass across the input; at the end of | |
473 | * the operation, the least significant output word above those | |
474 | * @clear@ed is set using @flush@, which may use the accumulator | |
475 | * @w@ = @av[0] << nb@. | |
476 | */ | |
d03ab969 | 477 | |
5ee480b5 MW |
478 | #define MPX_SHIFT_LEFT(name, init1, clear, flush) \ |
479 | MPX_SHIFTOP(name, { \ | |
480 | MPX_SHIFT1(init1, \ | |
481 | w | (t << 1), \ | |
482 | t >> (MPW_BITS - 1)); \ | |
483 | }, { \ | |
484 | MPX_SHIFTW(dvl - dv, clear, { \ | |
485 | MPX_COPY(dv + nw, dvl, av, avl); \ | |
486 | clear(dv, dv + nw); \ | |
487 | }); \ | |
488 | }, { \ | |
489 | size_t nr = MPW_BITS - nb; \ | |
490 | size_t dvn = dvl - dv; \ | |
491 | size_t avn = avl - av; \ | |
492 | mpw w; \ | |
493 | \ | |
494 | if (dvn <= nw) { \ | |
495 | clear(dv, dvl); \ | |
496 | break; \ | |
497 | } \ | |
498 | \ | |
499 | if (dvn <= avn + nw) { \ | |
500 | avl = av + dvn - nw; \ | |
501 | w = *--avl << nb; \ | |
502 | } else { \ | |
503 | size_t off = avn + nw + 1; \ | |
504 | MPX_ZERO(dv + off, dvl); \ | |
505 | dvl = dv + off; \ | |
506 | w = 0; \ | |
507 | } \ | |
508 | \ | |
509 | while (avl > av) { \ | |
510 | mpw t = *--avl; \ | |
511 | *--dvl = MPW(w | (t >> nr)); \ | |
512 | w = t << nb; \ | |
513 | } \ | |
514 | \ | |
515 | *--dvl = MPW(flush); \ | |
516 | clear(dv, dvl); \ | |
517 | }) | |
c8a2f9ef | 518 | |
5ee480b5 MW |
519 | /* --- @mpx_lsl@ --- * |
520 | * | |
521 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
522 | * @const mpw *av, *avl@ = source vector base and limit | |
523 | * @size_t n@ = number of bit positions to shift by | |
524 | * | |
525 | * Returns: --- | |
526 | * | |
527 | * Use: Performs a logical shift left operation on an integer. | |
528 | */ | |
d03ab969 | 529 | |
5ee480b5 | 530 | MPX_SHIFT_LEFT(lsl, 0, MPX_ZERO, w) |
d03ab969 | 531 | |
81578196 | 532 | /* --- @mpx_lslc@ --- * |
533 | * | |
534 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
535 | * @const mpw *av, *avl@ = source vector base and limit | |
536 | * @size_t n@ = number of bit positions to shift by | |
537 | * | |
538 | * Returns: --- | |
539 | * | |
540 | * Use: Performs a logical shift left operation on an integer, only | |
541 | * it fills in the bits with ones instead of zeroes. | |
542 | */ | |
543 | ||
5ee480b5 | 544 | MPX_SHIFT_LEFT(lslc, 1, MPX_ONE, w | (MPW_MAX >> nr)) |
81578196 | 545 | |
d03ab969 | 546 | /* --- @mpx_lsr@ --- * |
547 | * | |
548 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
549 | * @const mpw *av, *avl@ = source vector base and limit | |
550 | * @size_t n@ = number of bit positions to shift by | |
551 | * | |
552 | * Returns: --- | |
553 | * | |
554 | * Use: Performs a logical shift right operation on an integer. | |
555 | */ | |
556 | ||
5ee480b5 MW |
557 | MPX_SHIFTOP(lsr, { |
558 | MPX_SHIFT1(av < avl ? *av++ >> 1 : 0, | |
559 | w | (t << (MPW_BITS - 1)), | |
560 | t >> 1); | |
561 | }, { | |
562 | MPX_SHIFTW(avl - av, MPX_ZERO, | |
563 | { MPX_COPY(dv, dvl, av + nw, avl); }); | |
564 | }, { | |
565 | size_t nr = MPW_BITS - nb; | |
566 | mpw w; | |
567 | ||
568 | av += nw; | |
569 | w = av < avl ? *av++ : 0; | |
570 | while (av < avl) { | |
571 | mpw t; | |
572 | if (dv >= dvl) goto done; | |
573 | t = *av++; | |
574 | *dv++ = MPW((w >> nb) | (t << nr)); | |
575 | w = t; | |
d03ab969 | 576 | } |
5ee480b5 MW |
577 | if (dv < dvl) { |
578 | *dv++ = MPW(w >> nb); | |
579 | MPX_ZERO(dv, dvl); | |
d03ab969 | 580 | } |
d03ab969 | 581 | done:; |
5ee480b5 | 582 | }) |
d03ab969 | 583 | |
0f32e0f8 | 584 | /*----- Bitwise operations ------------------------------------------------*/ |
585 | ||
f09e814a | 586 | /* --- @mpx_bitop@ --- * |
0f32e0f8 | 587 | * |
588 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
589 | * @const mpw *av, *avl@ = first source vector | |
590 | * @const mpw *bv, *bvl@ = second source vector | |
591 | * | |
592 | * Returns: --- | |
593 | * | |
f09e814a | 594 | * Use; Provides the dyadic boolean functions. |
0f32e0f8 | 595 | */ |
596 | ||
f09e814a | 597 | #define MPX_BITBINOP(string) \ |
0f32e0f8 | 598 | \ |
f09e814a | 599 | void mpx_bit##string(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, \ |
600 | const mpw *bv, const mpw *bvl) \ | |
0f32e0f8 | 601 | { \ |
602 | MPX_SHRINK(av, avl); \ | |
603 | MPX_SHRINK(bv, bvl); \ | |
604 | \ | |
605 | while (dv < dvl) { \ | |
606 | mpw a, b; \ | |
607 | a = (av < avl) ? *av++ : 0; \ | |
608 | b = (bv < bvl) ? *bv++ : 0; \ | |
75263f25 | 609 | *dv++ = B##string(a, b); \ |
23bbea75 | 610 | IGNORE(a); IGNORE(b); \ |
0f32e0f8 | 611 | } \ |
612 | } | |
613 | ||
f09e814a | 614 | MPX_DOBIN(MPX_BITBINOP) |
0f32e0f8 | 615 | |
616 | void mpx_not(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
617 | { | |
618 | MPX_SHRINK(av, avl); | |
619 | ||
620 | while (dv < dvl) { | |
621 | mpw a; | |
622 | a = (av < avl) ? *av++ : 0; | |
623 | *dv++ = ~a; | |
624 | } | |
625 | } | |
626 | ||
d03ab969 | 627 | /*----- Unsigned arithmetic -----------------------------------------------*/ |
628 | ||
f45a00c6 | 629 | /* --- @mpx_2c@ --- * |
630 | * | |
631 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
632 | * @const mpw *v, *vl@ = source vector | |
633 | * | |
634 | * Returns: --- | |
635 | * | |
636 | * Use: Calculates the two's complement of @v@. | |
637 | */ | |
638 | ||
639 | void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl) | |
640 | { | |
641 | mpw c = 0; | |
642 | while (dv < dvl && v < vl) | |
643 | *dv++ = c = MPW(~*v++); | |
644 | if (dv < dvl) { | |
645 | if (c > MPW_MAX / 2) | |
646 | c = MPW(~0); | |
647 | while (dv < dvl) | |
648 | *dv++ = c; | |
649 | } | |
650 | MPX_UADDN(dv, dvl, 1); | |
651 | } | |
652 | ||
1a05a8ef | 653 | /* --- @mpx_ueq@ --- * |
654 | * | |
655 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
656 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
657 | * | |
658 | * Returns: Nonzero if the two vectors are equal. | |
659 | * | |
660 | * Use: Performs an unsigned integer test for equality. | |
661 | */ | |
662 | ||
663 | int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
664 | { | |
665 | MPX_SHRINK(av, avl); | |
666 | MPX_SHRINK(bv, bvl); | |
667 | if (avl - av != bvl - bv) | |
668 | return (0); | |
669 | while (av < avl) { | |
670 | if (*av++ != *bv++) | |
671 | return (0); | |
672 | } | |
673 | return (1); | |
674 | } | |
675 | ||
d03ab969 | 676 | /* --- @mpx_ucmp@ --- * |
677 | * | |
678 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
679 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
680 | * | |
681 | * Returns: Less than, equal to, or greater than zero depending on | |
682 | * whether @a@ is less than, equal to or greater than @b@, | |
683 | * respectively. | |
684 | * | |
685 | * Use: Performs an unsigned integer comparison. | |
686 | */ | |
687 | ||
688 | int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
689 | { | |
690 | MPX_SHRINK(av, avl); | |
691 | MPX_SHRINK(bv, bvl); | |
692 | ||
693 | if (avl - av > bvl - bv) | |
694 | return (+1); | |
695 | else if (avl - av < bvl - bv) | |
696 | return (-1); | |
697 | else while (avl > av) { | |
698 | mpw a = *--avl, b = *--bvl; | |
699 | if (a > b) | |
700 | return (+1); | |
701 | else if (a < b) | |
702 | return (-1); | |
703 | } | |
704 | return (0); | |
705 | } | |
1a05a8ef | 706 | |
d03ab969 | 707 | /* --- @mpx_uadd@ --- * |
708 | * | |
709 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
710 | * @const mpw *av, *avl@ = first addend vector base and limit | |
711 | * @const mpw *bv, *bvl@ = second addend vector base and limit | |
712 | * | |
713 | * Returns: --- | |
714 | * | |
715 | * Use: Performs unsigned integer addition. If the result overflows | |
716 | * the destination vector, high-order bits are discarded. This | |
717 | * means that two's complement addition happens more or less for | |
718 | * free, although that's more a side-effect than anything else. | |
719 | * The result vector may be equal to either or both source | |
720 | * vectors, but may not otherwise overlap them. | |
721 | */ | |
722 | ||
723 | void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
724 | const mpw *bv, const mpw *bvl) | |
725 | { | |
726 | mpw c = 0; | |
727 | ||
728 | while (av < avl || bv < bvl) { | |
729 | mpw a, b; | |
730 | mpd x; | |
731 | if (dv >= dvl) | |
732 | return; | |
733 | a = (av < avl) ? *av++ : 0; | |
734 | b = (bv < bvl) ? *bv++ : 0; | |
735 | x = (mpd)a + (mpd)b + c; | |
736 | *dv++ = MPW(x); | |
737 | c = x >> MPW_BITS; | |
738 | } | |
739 | if (dv < dvl) { | |
740 | *dv++ = c; | |
741 | MPX_ZERO(dv, dvl); | |
742 | } | |
743 | } | |
744 | ||
dd517851 | 745 | /* --- @mpx_uaddn@ --- * |
746 | * | |
747 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
748 | * @mpw n@ = other addend | |
749 | * | |
750 | * Returns: --- | |
751 | * | |
752 | * Use: Adds a small integer to a multiprecision number. | |
753 | */ | |
754 | ||
755 | void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); } | |
756 | ||
f46efa79 | 757 | /* --- @mpx_uaddnlsl@ --- * |
758 | * | |
759 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
760 | * @mpw a@ = second argument | |
761 | * @unsigned o@ = offset in bits | |
762 | * | |
763 | * Returns: --- | |
764 | * | |
765 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
766 | * high-order bits are discarded, as usual. We must have | |
767 | * @0 < o < MPW_BITS@. | |
768 | */ | |
769 | ||
770 | void mpx_uaddnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
771 | { | |
772 | mpd x = (mpd)a << o; | |
773 | ||
774 | while (x && dv < dvl) { | |
775 | x += *dv; | |
776 | *dv++ = MPW(x); | |
777 | x >>= MPW_BITS; | |
778 | } | |
779 | } | |
780 | ||
d03ab969 | 781 | /* --- @mpx_usub@ --- * |
782 | * | |
783 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
784 | * @const mpw *av, *avl@ = first argument vector base and limit | |
785 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
786 | * | |
787 | * Returns: --- | |
788 | * | |
789 | * Use: Performs unsigned integer subtraction. If the result | |
790 | * overflows the destination vector, high-order bits are | |
791 | * discarded. This means that two's complement subtraction | |
792 | * happens more or less for free, althuogh that's more a side- | |
793 | * effect than anything else. The result vector may be equal to | |
794 | * either or both source vectors, but may not otherwise overlap | |
795 | * them. | |
796 | */ | |
797 | ||
798 | void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
799 | const mpw *bv, const mpw *bvl) | |
800 | { | |
801 | mpw c = 0; | |
802 | ||
803 | while (av < avl || bv < bvl) { | |
804 | mpw a, b; | |
805 | mpd x; | |
806 | if (dv >= dvl) | |
807 | return; | |
808 | a = (av < avl) ? *av++ : 0; | |
809 | b = (bv < bvl) ? *bv++ : 0; | |
c8a2f9ef | 810 | x = (mpd)a - (mpd)b - c; |
d03ab969 | 811 | *dv++ = MPW(x); |
c8a2f9ef | 812 | if (x >> MPW_BITS) |
813 | c = 1; | |
814 | else | |
815 | c = 0; | |
d03ab969 | 816 | } |
c8a2f9ef | 817 | if (c) |
818 | c = MPW_MAX; | |
d03ab969 | 819 | while (dv < dvl) |
c8a2f9ef | 820 | *dv++ = c; |
d03ab969 | 821 | } |
822 | ||
dd517851 | 823 | /* --- @mpx_usubn@ --- * |
824 | * | |
825 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
826 | * @n@ = subtrahend | |
827 | * | |
828 | * Returns: --- | |
829 | * | |
830 | * Use: Subtracts a small integer from a multiprecision number. | |
831 | */ | |
832 | ||
833 | void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } | |
834 | ||
f46efa79 | 835 | /* --- @mpx_uaddnlsl@ --- * |
836 | * | |
837 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
838 | * @mpw a@ = second argument | |
839 | * @unsigned o@ = offset in bits | |
840 | * | |
841 | * Returns: --- | |
842 | * | |
843 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
844 | * high-order bits are discarded, as usual. We must have | |
845 | * @0 < o < MPW_BITS@. | |
846 | */ | |
847 | ||
848 | void mpx_usubnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
849 | { | |
850 | mpw b = a >> (MPW_BITS - o); | |
851 | a <<= o; | |
852 | ||
853 | if (dv < dvl) { | |
c29970a7 | 854 | mpd x = (mpd)*dv - MPW(a); |
f46efa79 | 855 | *dv++ = MPW(x); |
856 | if (x >> MPW_BITS) | |
857 | b++; | |
858 | MPX_USUBN(dv, dvl, b); | |
859 | } | |
860 | } | |
861 | ||
d03ab969 | 862 | /* --- @mpx_umul@ --- * |
863 | * | |
864 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
865 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
866 | * @const mpw *bv, *bvl@ = multiplier vector base and limit | |
867 | * | |
868 | * Returns: --- | |
869 | * | |
870 | * Use: Performs unsigned integer multiplication. If the result | |
871 | * overflows the desination vector, high-order bits are | |
872 | * discarded. The result vector may not overlap the argument | |
873 | * vectors in any way. | |
874 | */ | |
875 | ||
876 | void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
877 | const mpw *bv, const mpw *bvl) | |
878 | { | |
879 | /* --- This is probably worthwhile on a multiply --- */ | |
880 | ||
881 | MPX_SHRINK(av, avl); | |
882 | MPX_SHRINK(bv, bvl); | |
883 | ||
884 | /* --- Deal with a multiply by zero --- */ | |
45c0fd36 | 885 | |
d03ab969 | 886 | if (bv == bvl) { |
c8a2f9ef | 887 | MPX_ZERO(dv, dvl); |
d03ab969 | 888 | return; |
889 | } | |
890 | ||
891 | /* --- Do the initial multiply and initialize the accumulator --- */ | |
892 | ||
893 | MPX_UMULN(dv, dvl, av, avl, *bv++); | |
894 | ||
895 | /* --- Do the remaining multiply/accumulates --- */ | |
896 | ||
c8a2f9ef | 897 | while (dv < dvl && bv < bvl) { |
d03ab969 | 898 | mpw m = *bv++; |
c8a2f9ef | 899 | mpw c = 0; |
d03ab969 | 900 | const mpw *avv = av; |
901 | mpw *dvv = ++dv; | |
902 | ||
903 | while (avv < avl) { | |
904 | mpd x; | |
905 | if (dvv >= dvl) | |
906 | goto next; | |
c8a2f9ef | 907 | x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c; |
908 | *dvv++ = MPW(x); | |
d03ab969 | 909 | c = x >> MPW_BITS; |
910 | } | |
c8a2f9ef | 911 | MPX_UADDN(dvv, dvl, c); |
d03ab969 | 912 | next:; |
913 | } | |
914 | } | |
915 | ||
dd517851 | 916 | /* --- @mpx_umuln@ --- * |
917 | * | |
918 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
919 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
920 | * @mpw m@ = multiplier | |
921 | * | |
922 | * Returns: --- | |
923 | * | |
924 | * Use: Multiplies a multiprecision integer by a single-word value. | |
925 | * The destination and source may be equal. The destination | |
926 | * is completely cleared after use. | |
927 | */ | |
928 | ||
929 | void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 930 | { MPX_UMULN(dv, dvl, av, avl, m); } |
dd517851 | 931 | |
932 | /* --- @mpx_umlan@ --- * | |
933 | * | |
934 | * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit | |
935 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
936 | * @mpw m@ = multiplier | |
937 | * | |
938 | * Returns: --- | |
939 | * | |
940 | * Use: Multiplies a multiprecision integer by a single-word value | |
941 | * and adds the result to an accumulator. | |
942 | */ | |
943 | ||
944 | void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 945 | { MPX_UMLAN(dv, dvl, av, avl, m); } |
dd517851 | 946 | |
c8a2f9ef | 947 | /* --- @mpx_usqr@ --- * |
948 | * | |
949 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
950 | * @const mpw *av, *av@ = source vector base and limit | |
951 | * | |
952 | * Returns: --- | |
953 | * | |
954 | * Use: Performs unsigned integer squaring. The result vector must | |
955 | * not overlap the source vector in any way. | |
956 | */ | |
957 | ||
958 | void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
959 | { | |
960 | MPX_ZERO(dv, dvl); | |
961 | ||
962 | /* --- Main loop --- */ | |
963 | ||
964 | while (av < avl) { | |
965 | const mpw *avv = av; | |
966 | mpw *dvv = dv; | |
967 | mpw a = *av; | |
968 | mpd c; | |
969 | ||
970 | /* --- Stop if I've run out of destination --- */ | |
971 | ||
972 | if (dvv >= dvl) | |
973 | break; | |
974 | ||
975 | /* --- Work out the square at this point in the proceedings --- */ | |
976 | ||
977 | { | |
c8a2f9ef | 978 | mpd x = (mpd)a * (mpd)a + *dvv; |
979 | *dvv++ = MPW(x); | |
980 | c = MPW(x >> MPW_BITS); | |
981 | } | |
982 | ||
983 | /* --- Now fix up the rest of the vector upwards --- */ | |
984 | ||
985 | avv++; | |
986 | while (dvv < dvl && avv < avl) { | |
c8a2f9ef | 987 | mpd x = (mpd)a * (mpd)*avv++; |
988 | mpd y = ((x << 1) & MPW_MAX) + c + *dvv; | |
989 | c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS); | |
990 | *dvv++ = MPW(y); | |
991 | } | |
992 | while (dvv < dvl && c) { | |
993 | mpd x = c + *dvv; | |
994 | *dvv++ = MPW(x); | |
995 | c = x >> MPW_BITS; | |
996 | } | |
997 | ||
998 | /* --- Get ready for the next round --- */ | |
999 | ||
1000 | av++; | |
1001 | dv += 2; | |
1002 | } | |
1003 | } | |
1004 | ||
d03ab969 | 1005 | /* --- @mpx_udiv@ --- * |
1006 | * | |
1007 | * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit | |
1008 | * @mpw *rv, *rvl@ = dividend/remainder vector base and limit | |
1009 | * @const mpw *dv, *dvl@ = divisor vector base and limit | |
c8a2f9ef | 1010 | * @mpw *sv, *svl@ = scratch workspace |
d03ab969 | 1011 | * |
1012 | * Returns: --- | |
1013 | * | |
1014 | * Use: Performs unsigned integer division. If the result overflows | |
1015 | * the quotient vector, high-order bits are discarded. (Clearly | |
1016 | * the remainder vector can't overflow.) The various vectors | |
1017 | * may not overlap in any way. Yes, I know it's a bit odd | |
1018 | * requiring the dividend to be in the result position but it | |
1019 | * does make some sense really. The remainder must have | |
c8a2f9ef | 1020 | * headroom for at least two extra words. The scratch space |
f45a00c6 | 1021 | * must be at least one word larger than the divisor. |
d03ab969 | 1022 | */ |
1023 | ||
1024 | void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl, | |
c8a2f9ef | 1025 | const mpw *dv, const mpw *dvl, |
1026 | mpw *sv, mpw *svl) | |
d03ab969 | 1027 | { |
d03ab969 | 1028 | unsigned norm = 0; |
1029 | size_t scale; | |
1030 | mpw d, dd; | |
1031 | ||
1032 | /* --- Initialize the quotient --- */ | |
1033 | ||
1034 | MPX_ZERO(qv, qvl); | |
1035 | ||
c8a2f9ef | 1036 | /* --- Perform some sanity checks --- */ |
1037 | ||
1038 | MPX_SHRINK(dv, dvl); | |
1039 | assert(((void)"division by zero in mpx_udiv", dv < dvl)); | |
1040 | ||
d03ab969 | 1041 | /* --- Normalize the divisor --- * |
1042 | * | |
1043 | * The algorithm requires that the divisor be at least two digits long. | |
1044 | * This is easy to fix. | |
1045 | */ | |
1046 | ||
c8a2f9ef | 1047 | { |
1048 | unsigned b; | |
d03ab969 | 1049 | |
c8a2f9ef | 1050 | d = dvl[-1]; |
c29970a7 | 1051 | for (b = MPW_P2; b; b >>= 1) { |
34e4f738 | 1052 | if (d <= (MPW_MAX >> b)) { |
c8a2f9ef | 1053 | d <<= b; |
1054 | norm += b; | |
1055 | } | |
1056 | } | |
1057 | if (dv + 1 == dvl) | |
1058 | norm += MPW_BITS; | |
d03ab969 | 1059 | } |
d03ab969 | 1060 | |
1061 | /* --- Normalize the dividend/remainder to match --- */ | |
1062 | ||
c8a2f9ef | 1063 | if (norm) { |
c8a2f9ef | 1064 | mpx_lsl(rv, rvl, rv, rvl, norm); |
f45a00c6 | 1065 | mpx_lsl(sv, svl, dv, dvl, norm); |
c8a2f9ef | 1066 | dv = sv; |
f45a00c6 | 1067 | dvl = svl; |
c8a2f9ef | 1068 | MPX_SHRINK(dv, dvl); |
1069 | } | |
1070 | ||
d03ab969 | 1071 | MPX_SHRINK(rv, rvl); |
c8a2f9ef | 1072 | d = dvl[-1]; |
1073 | dd = dvl[-2]; | |
d03ab969 | 1074 | |
1075 | /* --- Work out the relative scales --- */ | |
1076 | ||
1077 | { | |
1078 | size_t rvn = rvl - rv; | |
c8a2f9ef | 1079 | size_t dvn = dvl - dv; |
d03ab969 | 1080 | |
1081 | /* --- If the divisor is clearly larger, notice this --- */ | |
1082 | ||
1083 | if (dvn > rvn) { | |
1084 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
1085 | return; | |
1086 | } | |
1087 | ||
1088 | scale = rvn - dvn; | |
1089 | } | |
1090 | ||
1091 | /* --- Calculate the most significant quotient digit --- * | |
1092 | * | |
1093 | * Because the divisor has its top bit set, this can only happen once. The | |
1094 | * pointer arithmetic is a little contorted, to make sure that the | |
1095 | * behaviour is defined. | |
1096 | */ | |
1097 | ||
1098 | if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) { | |
1099 | mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1100 | if (qvl - qv > scale) | |
1101 | qv[scale] = 1; | |
1102 | } | |
1103 | ||
1104 | /* --- Now for the main loop --- */ | |
1105 | ||
1106 | { | |
c8a2f9ef | 1107 | mpw *rvv = rvl - 2; |
d03ab969 | 1108 | |
1109 | while (scale) { | |
c8a2f9ef | 1110 | mpw q; |
1111 | mpd rh; | |
d03ab969 | 1112 | |
1113 | /* --- Get an estimate for the next quotient digit --- */ | |
1114 | ||
c8a2f9ef | 1115 | mpw r = rvv[1]; |
1116 | mpw rr = rvv[0]; | |
1117 | mpw rrr = *--rvv; | |
1118 | ||
1119 | scale--; | |
1120 | rh = ((mpd)r << MPW_BITS) | rr; | |
d03ab969 | 1121 | if (r == d) |
1122 | q = MPW_MAX; | |
c8a2f9ef | 1123 | else |
1124 | q = MPW(rh / d); | |
d03ab969 | 1125 | |
1126 | /* --- Refine the estimate --- */ | |
1127 | ||
1128 | { | |
1129 | mpd yh = (mpd)d * q; | |
ce76ff16 | 1130 | mpd yy = (mpd)dd * q; |
1131 | mpw yl; | |
c8a2f9ef | 1132 | |
ce76ff16 | 1133 | if (yy > MPW_MAX) |
1134 | yh += yy >> MPW_BITS; | |
1135 | yl = MPW(yy); | |
c8a2f9ef | 1136 | |
1137 | while (yh > rh || (yh == rh && yl > rrr)) { | |
1138 | q--; | |
1139 | yh -= d; | |
ce76ff16 | 1140 | if (yl < dd) |
1141 | yh--; | |
99b30c23 | 1142 | yl = MPW(yl - dd); |
c8a2f9ef | 1143 | } |
1144 | } | |
1145 | ||
1146 | /* --- Remove a chunk from the dividend --- */ | |
1147 | ||
1148 | { | |
1149 | mpw *svv; | |
1150 | const mpw *dvv; | |
f45a00c6 | 1151 | mpw mc = 0, sc = 0; |
c8a2f9ef | 1152 | |
f45a00c6 | 1153 | /* --- Calculate the size of the chunk --- * |
1154 | * | |
1155 | * This does the whole job of calculating @r >> scale - qd@. | |
1156 | */ | |
c8a2f9ef | 1157 | |
f45a00c6 | 1158 | for (svv = rv + scale, dvv = dv; |
1159 | dvv < dvl && svv < rvl; | |
1160 | svv++, dvv++) { | |
1161 | mpd x = (mpd)*dvv * (mpd)q + mc; | |
1162 | mc = x >> MPW_BITS; | |
1163 | x = (mpd)*svv - MPW(x) - sc; | |
c8a2f9ef | 1164 | *svv = MPW(x); |
f45a00c6 | 1165 | if (x >> MPW_BITS) |
1166 | sc = 1; | |
1167 | else | |
1168 | sc = 0; | |
1169 | } | |
1170 | ||
1171 | if (svv < rvl) { | |
1172 | mpd x = (mpd)*svv - mc - sc; | |
1173 | *svv++ = MPW(x); | |
1174 | if (x >> MPW_BITS) | |
1175 | sc = MPW_MAX; | |
1176 | else | |
1177 | sc = 0; | |
1178 | while (svv < rvl) | |
1179 | *svv++ = sc; | |
c8a2f9ef | 1180 | } |
c8a2f9ef | 1181 | |
f45a00c6 | 1182 | /* --- Fix if the quotient was too large --- * |
c8a2f9ef | 1183 | * |
f45a00c6 | 1184 | * This doesn't seem to happen very often. |
c8a2f9ef | 1185 | */ |
1186 | ||
c8a2f9ef | 1187 | if (rvl[-1] > MPW_MAX / 2) { |
1188 | mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1189 | q--; | |
1190 | } | |
1191 | } | |
1192 | ||
1193 | /* --- Done for another iteration --- */ | |
1194 | ||
1195 | if (qvl - qv > scale) | |
1196 | qv[scale] = q; | |
1197 | r = rr; | |
1198 | rr = rrr; | |
1199 | } | |
1200 | } | |
1201 | ||
1202 | /* --- Now fiddle with unnormalizing and things --- */ | |
1203 | ||
1204 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
d03ab969 | 1205 | } |
1206 | ||
698bd937 | 1207 | /* --- @mpx_udivn@ --- * |
1208 | * | |
1209 | * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap | |
1210 | * dividend) | |
1211 | * @const mpw *rv, *rvl@ = dividend | |
1212 | * @mpw d@ = single-precision divisor | |
1213 | * | |
1214 | * Returns: Remainder after divison. | |
1215 | * | |
1216 | * Use: Performs a single-precision division operation. | |
1217 | */ | |
1218 | ||
1219 | mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) | |
1220 | { | |
1221 | size_t i; | |
1222 | size_t ql = qvl - qv; | |
1223 | mpd r = 0; | |
1224 | ||
1225 | i = rvl - rv; | |
1226 | while (i > 0) { | |
1227 | i--; | |
1228 | r = (r << MPW_BITS) | rv[i]; | |
1229 | if (i < ql) | |
1230 | qv[i] = r / d; | |
1231 | r %= d; | |
1232 | } | |
1233 | return (MPW(r)); | |
1234 | } | |
1235 | ||
42684bdb | 1236 | /*----- Test rig ----------------------------------------------------------*/ |
1237 | ||
1238 | #ifdef TEST_RIG | |
1239 | ||
1240 | #include <mLib/alloc.h> | |
1241 | #include <mLib/dstr.h> | |
1242 | #include <mLib/quis.h> | |
1243 | #include <mLib/testrig.h> | |
1244 | ||
1245 | #include "mpscan.h" | |
1246 | ||
1247 | #define ALLOC(v, vl, sz) do { \ | |
1248 | size_t _sz = (sz); \ | |
1249 | mpw *_vv = xmalloc(MPWS(_sz)); \ | |
1250 | mpw *_vvl = _vv + _sz; \ | |
1251 | (v) = _vv; \ | |
1252 | (vl) = _vvl; \ | |
1253 | } while (0) | |
1254 | ||
1255 | #define LOAD(v, vl, d) do { \ | |
1256 | const dstr *_d = (d); \ | |
1257 | mpw *_v, *_vl; \ | |
1258 | ALLOC(_v, _vl, MPW_RQ(_d->len)); \ | |
1259 | mpx_loadb(_v, _vl, _d->buf, _d->len); \ | |
1260 | (v) = _v; \ | |
1261 | (vl) = _vl; \ | |
1262 | } while (0) | |
1263 | ||
1264 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
45c0fd36 | 1265 | |
42684bdb | 1266 | static void dumpbits(const char *msg, const void *pp, size_t sz) |
1267 | { | |
1268 | const octet *p = pp; | |
1269 | fputs(msg, stderr); | |
1270 | for (; sz; sz--) | |
1271 | fprintf(stderr, " %02x", *p++); | |
1272 | fputc('\n', stderr); | |
1273 | } | |
1274 | ||
1275 | static void dumpmp(const char *msg, const mpw *v, const mpw *vl) | |
1276 | { | |
1277 | fputs(msg, stderr); | |
1278 | MPX_SHRINK(v, vl); | |
1279 | while (v < vl) | |
1280 | fprintf(stderr, " %08lx", (unsigned long)*--vl); | |
1281 | fputc('\n', stderr); | |
1282 | } | |
1283 | ||
1284 | static int chkscan(const mpw *v, const mpw *vl, | |
1285 | const void *pp, size_t sz, int step) | |
1286 | { | |
1287 | mpscan mps; | |
1288 | const octet *p = pp; | |
1289 | unsigned bit = 0; | |
1290 | int ok = 1; | |
1291 | ||
1292 | mpscan_initx(&mps, v, vl); | |
1293 | while (sz) { | |
1294 | unsigned x = *p; | |
1295 | int i; | |
1296 | p += step; | |
1297 | for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) { | |
1298 | if (MPSCAN_BIT(&mps) != (x & 1)) { | |
1299 | fprintf(stderr, | |
1300 | "\n*** error, step %i, bit %u, expected %u, found %u\n", | |
1301 | step, bit, x & 1, MPSCAN_BIT(&mps)); | |
1302 | ok = 0; | |
1303 | } | |
1304 | x >>= 1; | |
1305 | bit++; | |
1306 | } | |
1307 | sz--; | |
1308 | } | |
1309 | ||
1310 | return (ok); | |
1311 | } | |
1312 | ||
1313 | static int loadstore(dstr *v) | |
1314 | { | |
1315 | dstr d = DSTR_INIT; | |
1316 | size_t sz = MPW_RQ(v->len) * 2, diff; | |
1317 | mpw *m, *ml; | |
1318 | int ok = 1; | |
1319 | ||
1320 | dstr_ensure(&d, v->len); | |
1321 | m = xmalloc(MPWS(sz)); | |
1322 | ||
1323 | for (diff = 0; diff < sz; diff += 5) { | |
1324 | size_t oct; | |
1325 | ||
1326 | ml = m + sz - diff; | |
1327 | ||
1328 | mpx_loadl(m, ml, v->buf, v->len); | |
1329 | if (!chkscan(m, ml, v->buf, v->len, +1)) | |
1330 | ok = 0; | |
1331 | MPX_OCTETS(oct, m, ml); | |
1332 | mpx_storel(m, ml, d.buf, d.sz); | |
1333 | if (memcmp(d.buf, v->buf, oct) != 0) { | |
1334 | dumpbits("\n*** storel failed", d.buf, d.sz); | |
1335 | ok = 0; | |
1336 | } | |
1337 | ||
1338 | mpx_loadb(m, ml, v->buf, v->len); | |
1339 | if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1)) | |
1340 | ok = 0; | |
1341 | MPX_OCTETS(oct, m, ml); | |
1342 | mpx_storeb(m, ml, d.buf, d.sz); | |
1343 | if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) { | |
1344 | dumpbits("\n*** storeb failed", d.buf, d.sz); | |
1345 | ok = 0; | |
1346 | } | |
1347 | } | |
1348 | ||
1349 | if (!ok) | |
1350 | dumpbits("input data", v->buf, v->len); | |
1351 | ||
12ed8a1f | 1352 | xfree(m); |
42684bdb | 1353 | dstr_destroy(&d); |
1354 | return (ok); | |
1355 | } | |
1356 | ||
f09e814a | 1357 | static int twocl(dstr *v) |
1358 | { | |
1359 | dstr d = DSTR_INIT; | |
1360 | mpw *m, *ml; | |
1361 | size_t sz; | |
1362 | int ok = 1; | |
1363 | ||
1364 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; | |
1365 | dstr_ensure(&d, sz); | |
1366 | ||
1367 | sz = MPW_RQ(sz); | |
1368 | m = xmalloc(MPWS(sz)); | |
1369 | ml = m + sz; | |
1370 | ||
1371 | mpx_loadl(m, ml, v[0].buf, v[0].len); | |
1372 | mpx_storel2cn(m, ml, d.buf, v[1].len); | |
1373 | if (memcmp(d.buf, v[1].buf, v[1].len)) { | |
1374 | dumpbits("\n*** storel2cn failed", d.buf, v[1].len); | |
1375 | ok = 0; | |
1376 | } | |
1377 | ||
1378 | mpx_loadl2cn(m, ml, v[1].buf, v[1].len); | |
1379 | mpx_storel(m, ml, d.buf, v[0].len); | |
1380 | if (memcmp(d.buf, v[0].buf, v[0].len)) { | |
1381 | dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); | |
1382 | ok = 0; | |
1383 | } | |
1384 | ||
1385 | if (!ok) { | |
1386 | dumpbits("pos", v[0].buf, v[0].len); | |
1387 | dumpbits("neg", v[1].buf, v[1].len); | |
1388 | } | |
1389 | ||
12ed8a1f | 1390 | xfree(m); |
f09e814a | 1391 | dstr_destroy(&d); |
1392 | ||
1393 | return (ok); | |
1394 | } | |
1395 | ||
1396 | static int twocb(dstr *v) | |
1397 | { | |
1398 | dstr d = DSTR_INIT; | |
1399 | mpw *m, *ml; | |
1400 | size_t sz; | |
1401 | int ok = 1; | |
1402 | ||
1403 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; | |
1404 | dstr_ensure(&d, sz); | |
1405 | ||
1406 | sz = MPW_RQ(sz); | |
1407 | m = xmalloc(MPWS(sz)); | |
1408 | ml = m + sz; | |
1409 | ||
1410 | mpx_loadb(m, ml, v[0].buf, v[0].len); | |
1411 | mpx_storeb2cn(m, ml, d.buf, v[1].len); | |
1412 | if (memcmp(d.buf, v[1].buf, v[1].len)) { | |
1413 | dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); | |
1414 | ok = 0; | |
1415 | } | |
1416 | ||
1417 | mpx_loadb2cn(m, ml, v[1].buf, v[1].len); | |
1418 | mpx_storeb(m, ml, d.buf, v[0].len); | |
1419 | if (memcmp(d.buf, v[0].buf, v[0].len)) { | |
1420 | dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); | |
1421 | ok = 0; | |
1422 | } | |
1423 | ||
1424 | if (!ok) { | |
1425 | dumpbits("pos", v[0].buf, v[0].len); | |
1426 | dumpbits("neg", v[1].buf, v[1].len); | |
1427 | } | |
1428 | ||
12ed8a1f | 1429 | xfree(m); |
f09e814a | 1430 | dstr_destroy(&d); |
1431 | ||
1432 | return (ok); | |
1433 | } | |
1434 | ||
42684bdb | 1435 | static int lsl(dstr *v) |
1436 | { | |
1437 | mpw *a, *al; | |
1438 | int n = *(int *)v[1].buf; | |
1439 | mpw *c, *cl; | |
1440 | mpw *d, *dl; | |
1441 | int ok = 1; | |
1442 | ||
1443 | LOAD(a, al, &v[0]); | |
1444 | LOAD(c, cl, &v[2]); | |
1445 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1446 | ||
1447 | mpx_lsl(d, dl, a, al, n); | |
1a05a8ef | 1448 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1449 | fprintf(stderr, "\n*** lsl(%i) failed\n", n); |
45c0fd36 | 1450 | dumpmp(" a", a, al); |
42684bdb | 1451 | dumpmp("expected", c, cl); |
1452 | dumpmp(" result", d, dl); | |
1453 | ok = 0; | |
1454 | } | |
1455 | ||
12ed8a1f | 1456 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1457 | return (ok); |
1458 | } | |
1459 | ||
81578196 | 1460 | static int lslc(dstr *v) |
1461 | { | |
1462 | mpw *a, *al; | |
1463 | int n = *(int *)v[1].buf; | |
1464 | mpw *c, *cl; | |
1465 | mpw *d, *dl; | |
1466 | int ok = 1; | |
1467 | ||
1468 | LOAD(a, al, &v[0]); | |
1469 | LOAD(c, cl, &v[2]); | |
1470 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1471 | ||
1472 | mpx_lslc(d, dl, a, al, n); | |
1473 | if (!mpx_ueq(d, dl, c, cl)) { | |
1474 | fprintf(stderr, "\n*** lslc(%i) failed\n", n); | |
45c0fd36 | 1475 | dumpmp(" a", a, al); |
81578196 | 1476 | dumpmp("expected", c, cl); |
1477 | dumpmp(" result", d, dl); | |
1478 | ok = 0; | |
1479 | } | |
1480 | ||
12ed8a1f | 1481 | xfree(a); xfree(c); xfree(d); |
81578196 | 1482 | return (ok); |
1483 | } | |
1484 | ||
42684bdb | 1485 | static int lsr(dstr *v) |
1486 | { | |
1487 | mpw *a, *al; | |
1488 | int n = *(int *)v[1].buf; | |
1489 | mpw *c, *cl; | |
1490 | mpw *d, *dl; | |
1491 | int ok = 1; | |
1492 | ||
1493 | LOAD(a, al, &v[0]); | |
1494 | LOAD(c, cl, &v[2]); | |
1495 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1); | |
1496 | ||
1497 | mpx_lsr(d, dl, a, al, n); | |
1a05a8ef | 1498 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1499 | fprintf(stderr, "\n*** lsr(%i) failed\n", n); |
45c0fd36 | 1500 | dumpmp(" a", a, al); |
42684bdb | 1501 | dumpmp("expected", c, cl); |
1502 | dumpmp(" result", d, dl); | |
1503 | ok = 0; | |
1504 | } | |
1505 | ||
12ed8a1f | 1506 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1507 | return (ok); |
1508 | } | |
1509 | ||
1510 | static int uadd(dstr *v) | |
1511 | { | |
1512 | mpw *a, *al; | |
1513 | mpw *b, *bl; | |
1514 | mpw *c, *cl; | |
1515 | mpw *d, *dl; | |
1516 | int ok = 1; | |
1517 | ||
1518 | LOAD(a, al, &v[0]); | |
1519 | LOAD(b, bl, &v[1]); | |
1520 | LOAD(c, cl, &v[2]); | |
1521 | ALLOC(d, dl, MAX(al - a, bl - b) + 1); | |
1522 | ||
1523 | mpx_uadd(d, dl, a, al, b, bl); | |
1a05a8ef | 1524 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1525 | fprintf(stderr, "\n*** uadd failed\n"); |
45c0fd36 MW |
1526 | dumpmp(" a", a, al); |
1527 | dumpmp(" b", b, bl); | |
42684bdb | 1528 | dumpmp("expected", c, cl); |
1529 | dumpmp(" result", d, dl); | |
1530 | ok = 0; | |
1531 | } | |
1532 | ||
12ed8a1f | 1533 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1534 | return (ok); |
1535 | } | |
1536 | ||
1537 | static int usub(dstr *v) | |
1538 | { | |
1539 | mpw *a, *al; | |
1540 | mpw *b, *bl; | |
1541 | mpw *c, *cl; | |
1542 | mpw *d, *dl; | |
1543 | int ok = 1; | |
1544 | ||
1545 | LOAD(a, al, &v[0]); | |
1546 | LOAD(b, bl, &v[1]); | |
1547 | LOAD(c, cl, &v[2]); | |
1548 | ALLOC(d, dl, al - a); | |
1549 | ||
1550 | mpx_usub(d, dl, a, al, b, bl); | |
1a05a8ef | 1551 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1552 | fprintf(stderr, "\n*** usub failed\n"); |
45c0fd36 MW |
1553 | dumpmp(" a", a, al); |
1554 | dumpmp(" b", b, bl); | |
42684bdb | 1555 | dumpmp("expected", c, cl); |
1556 | dumpmp(" result", d, dl); | |
1557 | ok = 0; | |
1558 | } | |
1559 | ||
12ed8a1f | 1560 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1561 | return (ok); |
1562 | } | |
1563 | ||
1564 | static int umul(dstr *v) | |
1565 | { | |
1566 | mpw *a, *al; | |
1567 | mpw *b, *bl; | |
1568 | mpw *c, *cl; | |
1569 | mpw *d, *dl; | |
1570 | int ok = 1; | |
1571 | ||
1572 | LOAD(a, al, &v[0]); | |
1573 | LOAD(b, bl, &v[1]); | |
1574 | LOAD(c, cl, &v[2]); | |
1575 | ALLOC(d, dl, (al - a) + (bl - b)); | |
1576 | ||
1577 | mpx_umul(d, dl, a, al, b, bl); | |
1a05a8ef | 1578 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1579 | fprintf(stderr, "\n*** umul failed\n"); |
45c0fd36 MW |
1580 | dumpmp(" a", a, al); |
1581 | dumpmp(" b", b, bl); | |
42684bdb | 1582 | dumpmp("expected", c, cl); |
1583 | dumpmp(" result", d, dl); | |
1584 | ok = 0; | |
1585 | } | |
1586 | ||
12ed8a1f | 1587 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1588 | return (ok); |
1589 | } | |
1590 | ||
1591 | static int usqr(dstr *v) | |
1592 | { | |
1593 | mpw *a, *al; | |
1594 | mpw *c, *cl; | |
1595 | mpw *d, *dl; | |
1596 | int ok = 1; | |
1597 | ||
1598 | LOAD(a, al, &v[0]); | |
1599 | LOAD(c, cl, &v[1]); | |
1600 | ALLOC(d, dl, 2 * (al - a)); | |
1601 | ||
1602 | mpx_usqr(d, dl, a, al); | |
1a05a8ef | 1603 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1604 | fprintf(stderr, "\n*** usqr failed\n"); |
45c0fd36 | 1605 | dumpmp(" a", a, al); |
42684bdb | 1606 | dumpmp("expected", c, cl); |
1607 | dumpmp(" result", d, dl); | |
1608 | ok = 0; | |
1609 | } | |
1610 | ||
12ed8a1f | 1611 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1612 | return (ok); |
1613 | } | |
1614 | ||
1615 | static int udiv(dstr *v) | |
1616 | { | |
1617 | mpw *a, *al; | |
1618 | mpw *b, *bl; | |
1619 | mpw *q, *ql; | |
1620 | mpw *r, *rl; | |
1621 | mpw *qq, *qql; | |
1622 | mpw *s, *sl; | |
1623 | int ok = 1; | |
1624 | ||
1625 | ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len); | |
1626 | LOAD(b, bl, &v[1]); | |
1627 | LOAD(q, ql, &v[2]); | |
1628 | LOAD(r, rl, &v[3]); | |
1629 | ALLOC(qq, qql, al - a); | |
1630 | ALLOC(s, sl, (bl - b) + 1); | |
1631 | ||
1632 | mpx_udiv(qq, qql, a, al, b, bl, s, sl); | |
1a05a8ef | 1633 | if (!mpx_ueq(qq, qql, q, ql) || |
1634 | !mpx_ueq(a, al, r, rl)) { | |
42684bdb | 1635 | fprintf(stderr, "\n*** udiv failed\n"); |
1636 | dumpmp(" divisor", b, bl); | |
1637 | dumpmp("expect r", r, rl); | |
1638 | dumpmp("result r", a, al); | |
1639 | dumpmp("expect q", q, ql); | |
1640 | dumpmp("result q", qq, qql); | |
1641 | ok = 0; | |
1642 | } | |
1643 | ||
12ed8a1f | 1644 | xfree(a); xfree(b); xfree(r); xfree(q); xfree(s); xfree(qq); |
42684bdb | 1645 | return (ok); |
1646 | } | |
1647 | ||
1648 | static test_chunk defs[] = { | |
1649 | { "load-store", loadstore, { &type_hex, 0 } }, | |
f09e814a | 1650 | { "2cl", twocl, { &type_hex, &type_hex, } }, |
1651 | { "2cb", twocb, { &type_hex, &type_hex, } }, | |
42684bdb | 1652 | { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } }, |
81578196 | 1653 | { "lslc", lslc, { &type_hex, &type_int, &type_hex, 0 } }, |
42684bdb | 1654 | { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } }, |
1655 | { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1656 | { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1657 | { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1658 | { "usqr", usqr, { &type_hex, &type_hex, 0 } }, | |
1659 | { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } }, | |
1660 | { 0, 0, { 0 } } | |
1661 | }; | |
1662 | ||
1663 | int main(int argc, char *argv[]) | |
1664 | { | |
0f00dc4c | 1665 | test_run(argc, argv, defs, SRCDIR"/t/mpx"); |
42684bdb | 1666 | return (0); |
1667 | } | |
1668 | ||
42684bdb | 1669 | #endif |
1670 | ||
d03ab969 | 1671 | /*----- That's all, folks -------------------------------------------------*/ |