Commit | Line | Data |
---|---|---|
d03ab969 | 1 | /* -*-c-*- |
2 | * | |
12ed8a1f | 3 | * $Id$ |
d03ab969 | 4 | * |
5 | * Low-level multiprecision arithmetic | |
6 | * | |
7 | * (c) 1999 Straylight/Edgeware | |
8 | */ | |
9 | ||
45c0fd36 | 10 | /*----- Licensing notice --------------------------------------------------* |
d03ab969 | 11 | * |
12 | * This file is part of Catacomb. | |
13 | * | |
14 | * Catacomb is free software; you can redistribute it and/or modify | |
15 | * it under the terms of the GNU Library General Public License as | |
16 | * published by the Free Software Foundation; either version 2 of the | |
17 | * License, or (at your option) any later version. | |
45c0fd36 | 18 | * |
d03ab969 | 19 | * Catacomb is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | * GNU Library General Public License for more details. | |
45c0fd36 | 23 | * |
d03ab969 | 24 | * You should have received a copy of the GNU Library General Public |
25 | * License along with Catacomb; if not, write to the Free | |
26 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, | |
27 | * MA 02111-1307, USA. | |
28 | */ | |
29 | ||
d03ab969 | 30 | /*----- Header files ------------------------------------------------------*/ |
31 | ||
c8a2f9ef | 32 | #include <assert.h> |
d03ab969 | 33 | #include <stdio.h> |
34 | #include <stdlib.h> | |
35 | #include <string.h> | |
36 | ||
37 | #include <mLib/bits.h> | |
38 | ||
39 | #include "mptypes.h" | |
40 | #include "mpx.h" | |
75263f25 | 41 | #include "bitops.h" |
d03ab969 | 42 | |
43 | /*----- Loading and storing -----------------------------------------------*/ | |
44 | ||
45 | /* --- @mpx_storel@ --- * | |
46 | * | |
47 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 48 | * @void *pp@ = pointer to octet array |
d03ab969 | 49 | * @size_t sz@ = size of octet array |
50 | * | |
51 | * Returns: --- | |
52 | * | |
53 | * Use: Stores an MP in an octet array, least significant octet | |
54 | * first. High-end octets are silently discarded if there | |
55 | * isn't enough space for them. | |
56 | */ | |
57 | ||
c8a2f9ef | 58 | void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 | 59 | { |
60 | mpw n, w = 0; | |
c8a2f9ef | 61 | octet *p = pp, *q = p + sz; |
d03ab969 | 62 | unsigned bits = 0; |
63 | ||
64 | while (p < q) { | |
65 | if (bits < 8) { | |
66 | if (v >= vl) { | |
67 | *p++ = U8(w); | |
68 | break; | |
69 | } | |
70 | n = *v++; | |
71 | *p++ = U8(w | n << bits); | |
72 | w = n >> (8 - bits); | |
73 | bits += MPW_BITS - 8; | |
74 | } else { | |
75 | *p++ = U8(w); | |
76 | w >>= 8; | |
77 | bits -= 8; | |
78 | } | |
79 | } | |
80 | memset(p, 0, q - p); | |
81 | } | |
82 | ||
83 | /* --- @mpx_loadl@ --- * | |
84 | * | |
85 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 86 | * @const void *pp@ = pointer to octet array |
d03ab969 | 87 | * @size_t sz@ = size of octet array |
88 | * | |
89 | * Returns: --- | |
90 | * | |
91 | * Use: Loads an MP in an octet array, least significant octet | |
92 | * first. High-end octets are ignored if there isn't enough | |
93 | * space for them. | |
94 | */ | |
95 | ||
c8a2f9ef | 96 | void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 | 97 | { |
98 | unsigned n; | |
c8a2f9ef | 99 | mpw w = 0; |
100 | const octet *p = pp, *q = p + sz; | |
d03ab969 | 101 | unsigned bits = 0; |
102 | ||
103 | if (v >= vl) | |
104 | return; | |
105 | while (p < q) { | |
106 | n = U8(*p++); | |
107 | w |= n << bits; | |
108 | bits += 8; | |
109 | if (bits >= MPW_BITS) { | |
110 | *v++ = MPW(w); | |
111 | w = n >> (MPW_BITS - bits + 8); | |
112 | bits -= MPW_BITS; | |
113 | if (v >= vl) | |
114 | return; | |
115 | } | |
116 | } | |
117 | *v++ = w; | |
118 | MPX_ZERO(v, vl); | |
119 | } | |
120 | ||
121 | /* --- @mpx_storeb@ --- * | |
122 | * | |
123 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
c8a2f9ef | 124 | * @void *pp@ = pointer to octet array |
d03ab969 | 125 | * @size_t sz@ = size of octet array |
126 | * | |
127 | * Returns: --- | |
128 | * | |
129 | * Use: Stores an MP in an octet array, most significant octet | |
130 | * first. High-end octets are silently discarded if there | |
131 | * isn't enough space for them. | |
132 | */ | |
133 | ||
c8a2f9ef | 134 | void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 | 135 | { |
136 | mpw n, w = 0; | |
c8a2f9ef | 137 | octet *p = pp, *q = p + sz; |
d03ab969 | 138 | unsigned bits = 0; |
139 | ||
140 | while (q > p) { | |
141 | if (bits < 8) { | |
142 | if (v >= vl) { | |
143 | *--q = U8(w); | |
144 | break; | |
145 | } | |
146 | n = *v++; | |
147 | *--q = U8(w | n << bits); | |
148 | w = n >> (8 - bits); | |
149 | bits += MPW_BITS - 8; | |
150 | } else { | |
151 | *--q = U8(w); | |
152 | w >>= 8; | |
153 | bits -= 8; | |
154 | } | |
155 | } | |
156 | memset(p, 0, q - p); | |
157 | } | |
158 | ||
159 | /* --- @mpx_loadb@ --- * | |
160 | * | |
161 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
c8a2f9ef | 162 | * @const void *pp@ = pointer to octet array |
d03ab969 | 163 | * @size_t sz@ = size of octet array |
164 | * | |
165 | * Returns: --- | |
166 | * | |
167 | * Use: Loads an MP in an octet array, most significant octet | |
168 | * first. High-end octets are ignored if there isn't enough | |
169 | * space for them. | |
170 | */ | |
171 | ||
c8a2f9ef | 172 | void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 | 173 | { |
174 | unsigned n; | |
c8a2f9ef | 175 | mpw w = 0; |
176 | const octet *p = pp, *q = p + sz; | |
d03ab969 | 177 | unsigned bits = 0; |
178 | ||
179 | if (v >= vl) | |
180 | return; | |
181 | while (q > p) { | |
182 | n = U8(*--q); | |
183 | w |= n << bits; | |
184 | bits += 8; | |
185 | if (bits >= MPW_BITS) { | |
186 | *v++ = MPW(w); | |
187 | w = n >> (MPW_BITS - bits + 8); | |
188 | bits -= MPW_BITS; | |
189 | if (v >= vl) | |
190 | return; | |
191 | } | |
192 | } | |
193 | *v++ = w; | |
194 | MPX_ZERO(v, vl); | |
195 | } | |
196 | ||
f09e814a | 197 | /* --- @mpx_storel2cn@ --- * |
198 | * | |
199 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
200 | * @void *pp@ = pointer to octet array | |
201 | * @size_t sz@ = size of octet array | |
202 | * | |
203 | * Returns: --- | |
204 | * | |
205 | * Use: Stores a negative MP in an octet array, least significant | |
206 | * octet first, as two's complement. High-end octets are | |
207 | * silently discarded if there isn't enough space for them. | |
208 | * This obviously makes the output bad. | |
209 | */ | |
210 | ||
211 | void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) | |
212 | { | |
213 | unsigned c = 1; | |
214 | unsigned b = 0; | |
215 | mpw n, w = 0; | |
216 | octet *p = pp, *q = p + sz; | |
217 | unsigned bits = 0; | |
218 | ||
219 | while (p < q) { | |
220 | if (bits < 8) { | |
221 | if (v >= vl) { | |
222 | b = w; | |
223 | break; | |
224 | } | |
225 | n = *v++; | |
226 | b = w | n << bits; | |
227 | w = n >> (8 - bits); | |
228 | bits += MPW_BITS - 8; | |
229 | } else { | |
230 | b = w; | |
231 | w >>= 8; | |
232 | bits -= 8; | |
233 | } | |
234 | b = U8(~b + c); | |
2bd53494 | 235 | c = c && !b; |
f09e814a | 236 | *p++ = b; |
237 | } | |
238 | while (p < q) { | |
239 | b = U8(~b + c); | |
2bd53494 | 240 | c = c && !b; |
f09e814a | 241 | *p++ = b; |
242 | b = 0; | |
243 | } | |
244 | } | |
245 | ||
246 | /* --- @mpx_loadl2cn@ --- * | |
247 | * | |
248 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
249 | * @const void *pp@ = pointer to octet array | |
250 | * @size_t sz@ = size of octet array | |
251 | * | |
252 | * Returns: --- | |
253 | * | |
254 | * Use: Loads a negative MP in an octet array, least significant | |
255 | * octet first, as two's complement. High-end octets are | |
256 | * ignored if there isn't enough space for them. This probably | |
257 | * means you made the wrong choice coming here. | |
258 | */ | |
259 | ||
260 | void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) | |
261 | { | |
262 | unsigned n; | |
263 | unsigned c = 1; | |
264 | mpw w = 0; | |
265 | const octet *p = pp, *q = p + sz; | |
266 | unsigned bits = 0; | |
267 | ||
268 | if (v >= vl) | |
269 | return; | |
270 | while (p < q) { | |
271 | n = U8(~(*p++) + c); | |
2bd53494 | 272 | c = c && !n; |
f09e814a | 273 | w |= n << bits; |
274 | bits += 8; | |
275 | if (bits >= MPW_BITS) { | |
276 | *v++ = MPW(w); | |
277 | w = n >> (MPW_BITS - bits + 8); | |
278 | bits -= MPW_BITS; | |
279 | if (v >= vl) | |
280 | return; | |
281 | } | |
282 | } | |
283 | *v++ = w; | |
284 | MPX_ZERO(v, vl); | |
285 | } | |
286 | ||
287 | /* --- @mpx_storeb2cn@ --- * | |
288 | * | |
289 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector | |
290 | * @void *pp@ = pointer to octet array | |
291 | * @size_t sz@ = size of octet array | |
292 | * | |
293 | * Returns: --- | |
294 | * | |
295 | * Use: Stores a negative MP in an octet array, most significant | |
296 | * octet first, as two's complement. High-end octets are | |
297 | * silently discarded if there isn't enough space for them, | |
298 | * which probably isn't what you meant. | |
299 | */ | |
300 | ||
301 | void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) | |
302 | { | |
303 | mpw n, w = 0; | |
304 | unsigned b = 0; | |
305 | unsigned c = 1; | |
306 | octet *p = pp, *q = p + sz; | |
307 | unsigned bits = 0; | |
308 | ||
309 | while (q > p) { | |
310 | if (bits < 8) { | |
311 | if (v >= vl) { | |
312 | b = w; | |
313 | break; | |
314 | } | |
315 | n = *v++; | |
316 | b = w | n << bits; | |
317 | w = n >> (8 - bits); | |
318 | bits += MPW_BITS - 8; | |
319 | } else { | |
320 | b = w; | |
321 | w >>= 8; | |
322 | bits -= 8; | |
323 | } | |
324 | b = U8(~b + c); | |
2bd53494 | 325 | c = c && !b; |
f09e814a | 326 | *--q = b; |
327 | } | |
328 | while (q > p) { | |
329 | b = ~b + c; | |
2bd53494 | 330 | c = c && !(b & 0xff); |
f09e814a | 331 | *--q = b; |
332 | b = 0; | |
333 | } | |
334 | } | |
335 | ||
336 | /* --- @mpx_loadb2cn@ --- * | |
337 | * | |
338 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector | |
339 | * @const void *pp@ = pointer to octet array | |
340 | * @size_t sz@ = size of octet array | |
341 | * | |
342 | * Returns: --- | |
343 | * | |
344 | * Use: Loads a negative MP in an octet array, most significant octet | |
345 | * first as two's complement. High-end octets are ignored if | |
346 | * there isn't enough space for them. This probably means you | |
347 | * chose this function wrongly. | |
348 | */ | |
349 | ||
350 | void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz) | |
351 | { | |
352 | unsigned n; | |
353 | unsigned c = 1; | |
354 | mpw w = 0; | |
355 | const octet *p = pp, *q = p + sz; | |
356 | unsigned bits = 0; | |
357 | ||
358 | if (v >= vl) | |
359 | return; | |
360 | while (q > p) { | |
361 | n = U8(~(*--q) + c); | |
2bd53494 | 362 | c = c && !n; |
f09e814a | 363 | w |= n << bits; |
364 | bits += 8; | |
365 | if (bits >= MPW_BITS) { | |
366 | *v++ = MPW(w); | |
367 | w = n >> (MPW_BITS - bits + 8); | |
368 | bits -= MPW_BITS; | |
369 | if (v >= vl) | |
370 | return; | |
371 | } | |
372 | } | |
373 | *v++ = w; | |
374 | MPX_ZERO(v, vl); | |
375 | } | |
376 | ||
d03ab969 | 377 | /*----- Logical shifting --------------------------------------------------*/ |
378 | ||
379 | /* --- @mpx_lsl@ --- * | |
380 | * | |
381 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
382 | * @const mpw *av, *avl@ = source vector base and limit | |
383 | * @size_t n@ = number of bit positions to shift by | |
384 | * | |
385 | * Returns: --- | |
386 | * | |
387 | * Use: Performs a logical shift left operation on an integer. | |
388 | */ | |
389 | ||
390 | void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) | |
391 | { | |
392 | size_t nw; | |
393 | unsigned nb; | |
394 | ||
395 | /* --- Trivial special case --- */ | |
396 | ||
397 | if (n == 0) | |
398 | MPX_COPY(dv, dvl, av, avl); | |
399 | ||
400 | /* --- Single bit shifting --- */ | |
401 | ||
402 | else if (n == 1) { | |
403 | mpw w = 0; | |
404 | while (av < avl) { | |
405 | mpw t; | |
406 | if (dv >= dvl) | |
407 | goto done; | |
408 | t = *av++; | |
409 | *dv++ = MPW((t << 1) | w); | |
410 | w = t >> (MPW_BITS - 1); | |
411 | } | |
412 | if (dv >= dvl) | |
413 | goto done; | |
414 | *dv++ = MPW(w); | |
415 | MPX_ZERO(dv, dvl); | |
c8a2f9ef | 416 | goto done; |
d03ab969 | 417 | } |
418 | ||
419 | /* --- Break out word and bit shifts for more sophisticated work --- */ | |
45c0fd36 | 420 | |
d03ab969 | 421 | nw = n / MPW_BITS; |
422 | nb = n % MPW_BITS; | |
423 | ||
424 | /* --- Handle a shift by a multiple of the word size --- */ | |
425 | ||
426 | if (nb == 0) { | |
4f29a732 | 427 | if (nw >= dvl - dv) |
428 | MPX_ZERO(dv, dvl); | |
429 | else { | |
430 | MPX_COPY(dv + nw, dvl, av, avl); | |
431 | memset(dv, 0, MPWS(nw)); | |
432 | } | |
d03ab969 | 433 | } |
434 | ||
c8a2f9ef | 435 | /* --- And finally the difficult case --- * |
436 | * | |
437 | * This is a little convoluted, because I have to start from the end and | |
438 | * work backwards to avoid overwriting the source, if they're both the same | |
439 | * block of memory. | |
440 | */ | |
d03ab969 | 441 | |
442 | else { | |
443 | mpw w; | |
444 | size_t nr = MPW_BITS - nb; | |
c8a2f9ef | 445 | size_t dvn = dvl - dv; |
446 | size_t avn = avl - av; | |
d03ab969 | 447 | |
c8a2f9ef | 448 | if (dvn <= nw) { |
d03ab969 | 449 | MPX_ZERO(dv, dvl); |
450 | goto done; | |
451 | } | |
d03ab969 | 452 | |
c8a2f9ef | 453 | if (dvn > avn + nw) { |
454 | size_t off = avn + nw + 1; | |
455 | MPX_ZERO(dv + off, dvl); | |
456 | dvl = dv + off; | |
457 | w = 0; | |
458 | } else { | |
459 | avl = av + dvn - nw; | |
460 | w = *--avl << nb; | |
d03ab969 | 461 | } |
462 | ||
c8a2f9ef | 463 | while (avl > av) { |
464 | mpw t = *--avl; | |
c29970a7 | 465 | *--dvl = MPW((t >> nr) | w); |
c8a2f9ef | 466 | w = t << nb; |
d03ab969 | 467 | } |
c8a2f9ef | 468 | |
c29970a7 | 469 | *--dvl = MPW(w); |
c8a2f9ef | 470 | MPX_ZERO(dv, dvl); |
d03ab969 | 471 | } |
472 | ||
473 | done:; | |
474 | } | |
475 | ||
81578196 | 476 | /* --- @mpx_lslc@ --- * |
477 | * | |
478 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
479 | * @const mpw *av, *avl@ = source vector base and limit | |
480 | * @size_t n@ = number of bit positions to shift by | |
481 | * | |
482 | * Returns: --- | |
483 | * | |
484 | * Use: Performs a logical shift left operation on an integer, only | |
485 | * it fills in the bits with ones instead of zeroes. | |
486 | */ | |
487 | ||
488 | void mpx_lslc(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) | |
489 | { | |
490 | size_t nw; | |
491 | unsigned nb; | |
492 | ||
493 | /* --- Trivial special case --- */ | |
494 | ||
495 | if (n == 0) | |
496 | MPX_COPY(dv, dvl, av, avl); | |
497 | ||
498 | /* --- Single bit shifting --- */ | |
499 | ||
500 | else if (n == 1) { | |
501 | mpw w = 1; | |
502 | while (av < avl) { | |
503 | mpw t; | |
504 | if (dv >= dvl) | |
505 | goto done; | |
506 | t = *av++; | |
507 | *dv++ = MPW((t << 1) | w); | |
508 | w = t >> (MPW_BITS - 1); | |
509 | } | |
510 | if (dv >= dvl) | |
511 | goto done; | |
512 | *dv++ = MPW(w); | |
513 | MPX_ZERO(dv, dvl); | |
514 | goto done; | |
515 | } | |
516 | ||
517 | /* --- Break out word and bit shifts for more sophisticated work --- */ | |
45c0fd36 | 518 | |
81578196 | 519 | nw = n / MPW_BITS; |
520 | nb = n % MPW_BITS; | |
521 | ||
522 | /* --- Handle a shift by a multiple of the word size --- */ | |
523 | ||
524 | if (nb == 0) { | |
525 | if (nw >= dvl - dv) | |
526 | MPX_ONE(dv, dvl); | |
527 | else { | |
528 | MPX_COPY(dv + nw, dvl, av, avl); | |
529 | MPX_ONE(dv, dv + nw); | |
530 | } | |
531 | } | |
532 | ||
533 | /* --- And finally the difficult case --- * | |
534 | * | |
535 | * This is a little convoluted, because I have to start from the end and | |
536 | * work backwards to avoid overwriting the source, if they're both the same | |
537 | * block of memory. | |
538 | */ | |
539 | ||
540 | else { | |
541 | mpw w; | |
542 | size_t nr = MPW_BITS - nb; | |
543 | size_t dvn = dvl - dv; | |
544 | size_t avn = avl - av; | |
545 | ||
546 | if (dvn <= nw) { | |
547 | MPX_ONE(dv, dvl); | |
548 | goto done; | |
549 | } | |
550 | ||
551 | if (dvn > avn + nw) { | |
552 | size_t off = avn + nw + 1; | |
553 | MPX_ZERO(dv + off, dvl); | |
554 | dvl = dv + off; | |
555 | w = 0; | |
556 | } else { | |
557 | avl = av + dvn - nw; | |
558 | w = *--avl << nb; | |
559 | } | |
560 | ||
561 | while (avl > av) { | |
562 | mpw t = *--avl; | |
c29970a7 | 563 | *--dvl = MPW((t >> nr) | w); |
81578196 | 564 | w = t << nb; |
565 | } | |
566 | ||
c29970a7 | 567 | *--dvl = MPW((MPW_MAX >> nr) | w); |
81578196 | 568 | MPX_ONE(dv, dvl); |
569 | } | |
570 | ||
571 | done:; | |
572 | } | |
573 | ||
d03ab969 | 574 | /* --- @mpx_lsr@ --- * |
575 | * | |
576 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
577 | * @const mpw *av, *avl@ = source vector base and limit | |
578 | * @size_t n@ = number of bit positions to shift by | |
579 | * | |
580 | * Returns: --- | |
581 | * | |
582 | * Use: Performs a logical shift right operation on an integer. | |
583 | */ | |
584 | ||
585 | void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) | |
586 | { | |
587 | size_t nw; | |
588 | unsigned nb; | |
589 | ||
590 | /* --- Trivial special case --- */ | |
591 | ||
592 | if (n == 0) | |
593 | MPX_COPY(dv, dvl, av, avl); | |
594 | ||
595 | /* --- Single bit shifting --- */ | |
596 | ||
597 | else if (n == 1) { | |
f40868de | 598 | mpw w = av < avl ? *av++ >> 1 : 0; |
d03ab969 | 599 | while (av < avl) { |
600 | mpw t; | |
601 | if (dv >= dvl) | |
602 | goto done; | |
603 | t = *av++; | |
604 | *dv++ = MPW((t << (MPW_BITS - 1)) | w); | |
605 | w = t >> 1; | |
606 | } | |
607 | if (dv >= dvl) | |
608 | goto done; | |
609 | *dv++ = MPW(w); | |
610 | MPX_ZERO(dv, dvl); | |
c8a2f9ef | 611 | goto done; |
d03ab969 | 612 | } |
613 | ||
614 | /* --- Break out word and bit shifts for more sophisticated work --- */ | |
615 | ||
616 | nw = n / MPW_BITS; | |
617 | nb = n % MPW_BITS; | |
618 | ||
619 | /* --- Handle a shift by a multiple of the word size --- */ | |
620 | ||
4f29a732 | 621 | if (nb == 0) { |
622 | if (nw >= avl - av) | |
623 | MPX_ZERO(dv, dvl); | |
624 | else | |
625 | MPX_COPY(dv, dvl, av + nw, avl); | |
626 | } | |
d03ab969 | 627 | |
628 | /* --- And finally the difficult case --- */ | |
629 | ||
630 | else { | |
631 | mpw w; | |
632 | size_t nr = MPW_BITS - nb; | |
633 | ||
634 | av += nw; | |
4f29a732 | 635 | w = av < avl ? *av++ : 0; |
d03ab969 | 636 | while (av < avl) { |
637 | mpw t; | |
638 | if (dv >= dvl) | |
639 | goto done; | |
640 | t = *av++; | |
641 | *dv++ = MPW((w >> nb) | (t << nr)); | |
642 | w = t; | |
643 | } | |
644 | if (dv < dvl) { | |
645 | *dv++ = MPW(w >> nb); | |
646 | MPX_ZERO(dv, dvl); | |
647 | } | |
648 | } | |
649 | ||
650 | done:; | |
651 | } | |
652 | ||
0f32e0f8 | 653 | /*----- Bitwise operations ------------------------------------------------*/ |
654 | ||
f09e814a | 655 | /* --- @mpx_bitop@ --- * |
0f32e0f8 | 656 | * |
657 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
658 | * @const mpw *av, *avl@ = first source vector | |
659 | * @const mpw *bv, *bvl@ = second source vector | |
660 | * | |
661 | * Returns: --- | |
662 | * | |
f09e814a | 663 | * Use; Provides the dyadic boolean functions. |
0f32e0f8 | 664 | */ |
665 | ||
f09e814a | 666 | #define MPX_BITBINOP(string) \ |
0f32e0f8 | 667 | \ |
f09e814a | 668 | void mpx_bit##string(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, \ |
669 | const mpw *bv, const mpw *bvl) \ | |
0f32e0f8 | 670 | { \ |
671 | MPX_SHRINK(av, avl); \ | |
672 | MPX_SHRINK(bv, bvl); \ | |
673 | \ | |
674 | while (dv < dvl) { \ | |
675 | mpw a, b; \ | |
676 | a = (av < avl) ? *av++ : 0; \ | |
677 | b = (bv < bvl) ? *bv++ : 0; \ | |
75263f25 | 678 | *dv++ = B##string(a, b); \ |
0f32e0f8 | 679 | } \ |
680 | } | |
681 | ||
f09e814a | 682 | MPX_DOBIN(MPX_BITBINOP) |
0f32e0f8 | 683 | |
684 | void mpx_not(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
685 | { | |
686 | MPX_SHRINK(av, avl); | |
687 | ||
688 | while (dv < dvl) { | |
689 | mpw a; | |
690 | a = (av < avl) ? *av++ : 0; | |
691 | *dv++ = ~a; | |
692 | } | |
693 | } | |
694 | ||
d03ab969 | 695 | /*----- Unsigned arithmetic -----------------------------------------------*/ |
696 | ||
f45a00c6 | 697 | /* --- @mpx_2c@ --- * |
698 | * | |
699 | * Arguments: @mpw *dv, *dvl@ = destination vector | |
700 | * @const mpw *v, *vl@ = source vector | |
701 | * | |
702 | * Returns: --- | |
703 | * | |
704 | * Use: Calculates the two's complement of @v@. | |
705 | */ | |
706 | ||
707 | void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl) | |
708 | { | |
709 | mpw c = 0; | |
710 | while (dv < dvl && v < vl) | |
711 | *dv++ = c = MPW(~*v++); | |
712 | if (dv < dvl) { | |
713 | if (c > MPW_MAX / 2) | |
714 | c = MPW(~0); | |
715 | while (dv < dvl) | |
716 | *dv++ = c; | |
717 | } | |
718 | MPX_UADDN(dv, dvl, 1); | |
719 | } | |
720 | ||
1a05a8ef | 721 | /* --- @mpx_ueq@ --- * |
722 | * | |
723 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
724 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
725 | * | |
726 | * Returns: Nonzero if the two vectors are equal. | |
727 | * | |
728 | * Use: Performs an unsigned integer test for equality. | |
729 | */ | |
730 | ||
731 | int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
732 | { | |
733 | MPX_SHRINK(av, avl); | |
734 | MPX_SHRINK(bv, bvl); | |
735 | if (avl - av != bvl - bv) | |
736 | return (0); | |
737 | while (av < avl) { | |
738 | if (*av++ != *bv++) | |
739 | return (0); | |
740 | } | |
741 | return (1); | |
742 | } | |
743 | ||
d03ab969 | 744 | /* --- @mpx_ucmp@ --- * |
745 | * | |
746 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit | |
747 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
748 | * | |
749 | * Returns: Less than, equal to, or greater than zero depending on | |
750 | * whether @a@ is less than, equal to or greater than @b@, | |
751 | * respectively. | |
752 | * | |
753 | * Use: Performs an unsigned integer comparison. | |
754 | */ | |
755 | ||
756 | int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) | |
757 | { | |
758 | MPX_SHRINK(av, avl); | |
759 | MPX_SHRINK(bv, bvl); | |
760 | ||
761 | if (avl - av > bvl - bv) | |
762 | return (+1); | |
763 | else if (avl - av < bvl - bv) | |
764 | return (-1); | |
765 | else while (avl > av) { | |
766 | mpw a = *--avl, b = *--bvl; | |
767 | if (a > b) | |
768 | return (+1); | |
769 | else if (a < b) | |
770 | return (-1); | |
771 | } | |
772 | return (0); | |
773 | } | |
1a05a8ef | 774 | |
d03ab969 | 775 | /* --- @mpx_uadd@ --- * |
776 | * | |
777 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
778 | * @const mpw *av, *avl@ = first addend vector base and limit | |
779 | * @const mpw *bv, *bvl@ = second addend vector base and limit | |
780 | * | |
781 | * Returns: --- | |
782 | * | |
783 | * Use: Performs unsigned integer addition. If the result overflows | |
784 | * the destination vector, high-order bits are discarded. This | |
785 | * means that two's complement addition happens more or less for | |
786 | * free, although that's more a side-effect than anything else. | |
787 | * The result vector may be equal to either or both source | |
788 | * vectors, but may not otherwise overlap them. | |
789 | */ | |
790 | ||
791 | void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
792 | const mpw *bv, const mpw *bvl) | |
793 | { | |
794 | mpw c = 0; | |
795 | ||
796 | while (av < avl || bv < bvl) { | |
797 | mpw a, b; | |
798 | mpd x; | |
799 | if (dv >= dvl) | |
800 | return; | |
801 | a = (av < avl) ? *av++ : 0; | |
802 | b = (bv < bvl) ? *bv++ : 0; | |
803 | x = (mpd)a + (mpd)b + c; | |
804 | *dv++ = MPW(x); | |
805 | c = x >> MPW_BITS; | |
806 | } | |
807 | if (dv < dvl) { | |
808 | *dv++ = c; | |
809 | MPX_ZERO(dv, dvl); | |
810 | } | |
811 | } | |
812 | ||
dd517851 | 813 | /* --- @mpx_uaddn@ --- * |
814 | * | |
815 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
816 | * @mpw n@ = other addend | |
817 | * | |
818 | * Returns: --- | |
819 | * | |
820 | * Use: Adds a small integer to a multiprecision number. | |
821 | */ | |
822 | ||
823 | void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); } | |
824 | ||
f46efa79 | 825 | /* --- @mpx_uaddnlsl@ --- * |
826 | * | |
827 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
828 | * @mpw a@ = second argument | |
829 | * @unsigned o@ = offset in bits | |
830 | * | |
831 | * Returns: --- | |
832 | * | |
833 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
834 | * high-order bits are discarded, as usual. We must have | |
835 | * @0 < o < MPW_BITS@. | |
836 | */ | |
837 | ||
838 | void mpx_uaddnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
839 | { | |
840 | mpd x = (mpd)a << o; | |
841 | ||
842 | while (x && dv < dvl) { | |
843 | x += *dv; | |
844 | *dv++ = MPW(x); | |
845 | x >>= MPW_BITS; | |
846 | } | |
847 | } | |
848 | ||
d03ab969 | 849 | /* --- @mpx_usub@ --- * |
850 | * | |
851 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
852 | * @const mpw *av, *avl@ = first argument vector base and limit | |
853 | * @const mpw *bv, *bvl@ = second argument vector base and limit | |
854 | * | |
855 | * Returns: --- | |
856 | * | |
857 | * Use: Performs unsigned integer subtraction. If the result | |
858 | * overflows the destination vector, high-order bits are | |
859 | * discarded. This means that two's complement subtraction | |
860 | * happens more or less for free, althuogh that's more a side- | |
861 | * effect than anything else. The result vector may be equal to | |
862 | * either or both source vectors, but may not otherwise overlap | |
863 | * them. | |
864 | */ | |
865 | ||
866 | void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
867 | const mpw *bv, const mpw *bvl) | |
868 | { | |
869 | mpw c = 0; | |
870 | ||
871 | while (av < avl || bv < bvl) { | |
872 | mpw a, b; | |
873 | mpd x; | |
874 | if (dv >= dvl) | |
875 | return; | |
876 | a = (av < avl) ? *av++ : 0; | |
877 | b = (bv < bvl) ? *bv++ : 0; | |
c8a2f9ef | 878 | x = (mpd)a - (mpd)b - c; |
d03ab969 | 879 | *dv++ = MPW(x); |
c8a2f9ef | 880 | if (x >> MPW_BITS) |
881 | c = 1; | |
882 | else | |
883 | c = 0; | |
d03ab969 | 884 | } |
c8a2f9ef | 885 | if (c) |
886 | c = MPW_MAX; | |
d03ab969 | 887 | while (dv < dvl) |
c8a2f9ef | 888 | *dv++ = c; |
d03ab969 | 889 | } |
890 | ||
dd517851 | 891 | /* --- @mpx_usubn@ --- * |
892 | * | |
893 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit | |
894 | * @n@ = subtrahend | |
895 | * | |
896 | * Returns: --- | |
897 | * | |
898 | * Use: Subtracts a small integer from a multiprecision number. | |
899 | */ | |
900 | ||
901 | void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } | |
902 | ||
f46efa79 | 903 | /* --- @mpx_uaddnlsl@ --- * |
904 | * | |
905 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector | |
906 | * @mpw a@ = second argument | |
907 | * @unsigned o@ = offset in bits | |
908 | * | |
909 | * Returns: --- | |
910 | * | |
911 | * Use: Computes %$d + 2^o a$%. If the result overflows then | |
912 | * high-order bits are discarded, as usual. We must have | |
913 | * @0 < o < MPW_BITS@. | |
914 | */ | |
915 | ||
916 | void mpx_usubnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) | |
917 | { | |
918 | mpw b = a >> (MPW_BITS - o); | |
919 | a <<= o; | |
920 | ||
921 | if (dv < dvl) { | |
c29970a7 | 922 | mpd x = (mpd)*dv - MPW(a); |
f46efa79 | 923 | *dv++ = MPW(x); |
924 | if (x >> MPW_BITS) | |
925 | b++; | |
926 | MPX_USUBN(dv, dvl, b); | |
927 | } | |
928 | } | |
929 | ||
d03ab969 | 930 | /* --- @mpx_umul@ --- * |
931 | * | |
932 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
933 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
934 | * @const mpw *bv, *bvl@ = multiplier vector base and limit | |
935 | * | |
936 | * Returns: --- | |
937 | * | |
938 | * Use: Performs unsigned integer multiplication. If the result | |
939 | * overflows the desination vector, high-order bits are | |
940 | * discarded. The result vector may not overlap the argument | |
941 | * vectors in any way. | |
942 | */ | |
943 | ||
944 | void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, | |
945 | const mpw *bv, const mpw *bvl) | |
946 | { | |
947 | /* --- This is probably worthwhile on a multiply --- */ | |
948 | ||
949 | MPX_SHRINK(av, avl); | |
950 | MPX_SHRINK(bv, bvl); | |
951 | ||
952 | /* --- Deal with a multiply by zero --- */ | |
45c0fd36 | 953 | |
d03ab969 | 954 | if (bv == bvl) { |
c8a2f9ef | 955 | MPX_ZERO(dv, dvl); |
d03ab969 | 956 | return; |
957 | } | |
958 | ||
959 | /* --- Do the initial multiply and initialize the accumulator --- */ | |
960 | ||
961 | MPX_UMULN(dv, dvl, av, avl, *bv++); | |
962 | ||
963 | /* --- Do the remaining multiply/accumulates --- */ | |
964 | ||
c8a2f9ef | 965 | while (dv < dvl && bv < bvl) { |
d03ab969 | 966 | mpw m = *bv++; |
c8a2f9ef | 967 | mpw c = 0; |
d03ab969 | 968 | const mpw *avv = av; |
969 | mpw *dvv = ++dv; | |
970 | ||
971 | while (avv < avl) { | |
972 | mpd x; | |
973 | if (dvv >= dvl) | |
974 | goto next; | |
c8a2f9ef | 975 | x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c; |
976 | *dvv++ = MPW(x); | |
d03ab969 | 977 | c = x >> MPW_BITS; |
978 | } | |
c8a2f9ef | 979 | MPX_UADDN(dvv, dvl, c); |
d03ab969 | 980 | next:; |
981 | } | |
982 | } | |
983 | ||
dd517851 | 984 | /* --- @mpx_umuln@ --- * |
985 | * | |
986 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
987 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
988 | * @mpw m@ = multiplier | |
989 | * | |
990 | * Returns: --- | |
991 | * | |
992 | * Use: Multiplies a multiprecision integer by a single-word value. | |
993 | * The destination and source may be equal. The destination | |
994 | * is completely cleared after use. | |
995 | */ | |
996 | ||
997 | void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 998 | { MPX_UMULN(dv, dvl, av, avl, m); } |
dd517851 | 999 | |
1000 | /* --- @mpx_umlan@ --- * | |
1001 | * | |
1002 | * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit | |
1003 | * @const mpw *av, *avl@ = multiplicand vector base and limit | |
1004 | * @mpw m@ = multiplier | |
1005 | * | |
1006 | * Returns: --- | |
1007 | * | |
1008 | * Use: Multiplies a multiprecision integer by a single-word value | |
1009 | * and adds the result to an accumulator. | |
1010 | */ | |
1011 | ||
1012 | void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) | |
106b481c | 1013 | { MPX_UMLAN(dv, dvl, av, avl, m); } |
dd517851 | 1014 | |
c8a2f9ef | 1015 | /* --- @mpx_usqr@ --- * |
1016 | * | |
1017 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit | |
1018 | * @const mpw *av, *av@ = source vector base and limit | |
1019 | * | |
1020 | * Returns: --- | |
1021 | * | |
1022 | * Use: Performs unsigned integer squaring. The result vector must | |
1023 | * not overlap the source vector in any way. | |
1024 | */ | |
1025 | ||
1026 | void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) | |
1027 | { | |
1028 | MPX_ZERO(dv, dvl); | |
1029 | ||
1030 | /* --- Main loop --- */ | |
1031 | ||
1032 | while (av < avl) { | |
1033 | const mpw *avv = av; | |
1034 | mpw *dvv = dv; | |
1035 | mpw a = *av; | |
1036 | mpd c; | |
1037 | ||
1038 | /* --- Stop if I've run out of destination --- */ | |
1039 | ||
1040 | if (dvv >= dvl) | |
1041 | break; | |
1042 | ||
1043 | /* --- Work out the square at this point in the proceedings --- */ | |
1044 | ||
1045 | { | |
c8a2f9ef | 1046 | mpd x = (mpd)a * (mpd)a + *dvv; |
1047 | *dvv++ = MPW(x); | |
1048 | c = MPW(x >> MPW_BITS); | |
1049 | } | |
1050 | ||
1051 | /* --- Now fix up the rest of the vector upwards --- */ | |
1052 | ||
1053 | avv++; | |
1054 | while (dvv < dvl && avv < avl) { | |
c8a2f9ef | 1055 | mpd x = (mpd)a * (mpd)*avv++; |
1056 | mpd y = ((x << 1) & MPW_MAX) + c + *dvv; | |
1057 | c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS); | |
1058 | *dvv++ = MPW(y); | |
1059 | } | |
1060 | while (dvv < dvl && c) { | |
1061 | mpd x = c + *dvv; | |
1062 | *dvv++ = MPW(x); | |
1063 | c = x >> MPW_BITS; | |
1064 | } | |
1065 | ||
1066 | /* --- Get ready for the next round --- */ | |
1067 | ||
1068 | av++; | |
1069 | dv += 2; | |
1070 | } | |
1071 | } | |
1072 | ||
d03ab969 | 1073 | /* --- @mpx_udiv@ --- * |
1074 | * | |
1075 | * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit | |
1076 | * @mpw *rv, *rvl@ = dividend/remainder vector base and limit | |
1077 | * @const mpw *dv, *dvl@ = divisor vector base and limit | |
c8a2f9ef | 1078 | * @mpw *sv, *svl@ = scratch workspace |
d03ab969 | 1079 | * |
1080 | * Returns: --- | |
1081 | * | |
1082 | * Use: Performs unsigned integer division. If the result overflows | |
1083 | * the quotient vector, high-order bits are discarded. (Clearly | |
1084 | * the remainder vector can't overflow.) The various vectors | |
1085 | * may not overlap in any way. Yes, I know it's a bit odd | |
1086 | * requiring the dividend to be in the result position but it | |
1087 | * does make some sense really. The remainder must have | |
c8a2f9ef | 1088 | * headroom for at least two extra words. The scratch space |
f45a00c6 | 1089 | * must be at least one word larger than the divisor. |
d03ab969 | 1090 | */ |
1091 | ||
1092 | void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl, | |
c8a2f9ef | 1093 | const mpw *dv, const mpw *dvl, |
1094 | mpw *sv, mpw *svl) | |
d03ab969 | 1095 | { |
d03ab969 | 1096 | unsigned norm = 0; |
1097 | size_t scale; | |
1098 | mpw d, dd; | |
1099 | ||
1100 | /* --- Initialize the quotient --- */ | |
1101 | ||
1102 | MPX_ZERO(qv, qvl); | |
1103 | ||
c8a2f9ef | 1104 | /* --- Perform some sanity checks --- */ |
1105 | ||
1106 | MPX_SHRINK(dv, dvl); | |
1107 | assert(((void)"division by zero in mpx_udiv", dv < dvl)); | |
1108 | ||
d03ab969 | 1109 | /* --- Normalize the divisor --- * |
1110 | * | |
1111 | * The algorithm requires that the divisor be at least two digits long. | |
1112 | * This is easy to fix. | |
1113 | */ | |
1114 | ||
c8a2f9ef | 1115 | { |
1116 | unsigned b; | |
d03ab969 | 1117 | |
c8a2f9ef | 1118 | d = dvl[-1]; |
c29970a7 | 1119 | for (b = MPW_P2; b; b >>= 1) { |
34e4f738 | 1120 | if (d <= (MPW_MAX >> b)) { |
c8a2f9ef | 1121 | d <<= b; |
1122 | norm += b; | |
1123 | } | |
1124 | } | |
1125 | if (dv + 1 == dvl) | |
1126 | norm += MPW_BITS; | |
d03ab969 | 1127 | } |
d03ab969 | 1128 | |
1129 | /* --- Normalize the dividend/remainder to match --- */ | |
1130 | ||
c8a2f9ef | 1131 | if (norm) { |
c8a2f9ef | 1132 | mpx_lsl(rv, rvl, rv, rvl, norm); |
f45a00c6 | 1133 | mpx_lsl(sv, svl, dv, dvl, norm); |
c8a2f9ef | 1134 | dv = sv; |
f45a00c6 | 1135 | dvl = svl; |
c8a2f9ef | 1136 | MPX_SHRINK(dv, dvl); |
1137 | } | |
1138 | ||
d03ab969 | 1139 | MPX_SHRINK(rv, rvl); |
c8a2f9ef | 1140 | d = dvl[-1]; |
1141 | dd = dvl[-2]; | |
d03ab969 | 1142 | |
1143 | /* --- Work out the relative scales --- */ | |
1144 | ||
1145 | { | |
1146 | size_t rvn = rvl - rv; | |
c8a2f9ef | 1147 | size_t dvn = dvl - dv; |
d03ab969 | 1148 | |
1149 | /* --- If the divisor is clearly larger, notice this --- */ | |
1150 | ||
1151 | if (dvn > rvn) { | |
1152 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
1153 | return; | |
1154 | } | |
1155 | ||
1156 | scale = rvn - dvn; | |
1157 | } | |
1158 | ||
1159 | /* --- Calculate the most significant quotient digit --- * | |
1160 | * | |
1161 | * Because the divisor has its top bit set, this can only happen once. The | |
1162 | * pointer arithmetic is a little contorted, to make sure that the | |
1163 | * behaviour is defined. | |
1164 | */ | |
1165 | ||
1166 | if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) { | |
1167 | mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1168 | if (qvl - qv > scale) | |
1169 | qv[scale] = 1; | |
1170 | } | |
1171 | ||
1172 | /* --- Now for the main loop --- */ | |
1173 | ||
1174 | { | |
c8a2f9ef | 1175 | mpw *rvv = rvl - 2; |
d03ab969 | 1176 | |
1177 | while (scale) { | |
c8a2f9ef | 1178 | mpw q; |
1179 | mpd rh; | |
d03ab969 | 1180 | |
1181 | /* --- Get an estimate for the next quotient digit --- */ | |
1182 | ||
c8a2f9ef | 1183 | mpw r = rvv[1]; |
1184 | mpw rr = rvv[0]; | |
1185 | mpw rrr = *--rvv; | |
1186 | ||
1187 | scale--; | |
1188 | rh = ((mpd)r << MPW_BITS) | rr; | |
d03ab969 | 1189 | if (r == d) |
1190 | q = MPW_MAX; | |
c8a2f9ef | 1191 | else |
1192 | q = MPW(rh / d); | |
d03ab969 | 1193 | |
1194 | /* --- Refine the estimate --- */ | |
1195 | ||
1196 | { | |
1197 | mpd yh = (mpd)d * q; | |
ce76ff16 | 1198 | mpd yy = (mpd)dd * q; |
1199 | mpw yl; | |
c8a2f9ef | 1200 | |
ce76ff16 | 1201 | if (yy > MPW_MAX) |
1202 | yh += yy >> MPW_BITS; | |
1203 | yl = MPW(yy); | |
c8a2f9ef | 1204 | |
1205 | while (yh > rh || (yh == rh && yl > rrr)) { | |
1206 | q--; | |
1207 | yh -= d; | |
ce76ff16 | 1208 | if (yl < dd) |
1209 | yh--; | |
99b30c23 | 1210 | yl = MPW(yl - dd); |
c8a2f9ef | 1211 | } |
1212 | } | |
1213 | ||
1214 | /* --- Remove a chunk from the dividend --- */ | |
1215 | ||
1216 | { | |
1217 | mpw *svv; | |
1218 | const mpw *dvv; | |
f45a00c6 | 1219 | mpw mc = 0, sc = 0; |
c8a2f9ef | 1220 | |
f45a00c6 | 1221 | /* --- Calculate the size of the chunk --- * |
1222 | * | |
1223 | * This does the whole job of calculating @r >> scale - qd@. | |
1224 | */ | |
c8a2f9ef | 1225 | |
f45a00c6 | 1226 | for (svv = rv + scale, dvv = dv; |
1227 | dvv < dvl && svv < rvl; | |
1228 | svv++, dvv++) { | |
1229 | mpd x = (mpd)*dvv * (mpd)q + mc; | |
1230 | mc = x >> MPW_BITS; | |
1231 | x = (mpd)*svv - MPW(x) - sc; | |
c8a2f9ef | 1232 | *svv = MPW(x); |
f45a00c6 | 1233 | if (x >> MPW_BITS) |
1234 | sc = 1; | |
1235 | else | |
1236 | sc = 0; | |
1237 | } | |
1238 | ||
1239 | if (svv < rvl) { | |
1240 | mpd x = (mpd)*svv - mc - sc; | |
1241 | *svv++ = MPW(x); | |
1242 | if (x >> MPW_BITS) | |
1243 | sc = MPW_MAX; | |
1244 | else | |
1245 | sc = 0; | |
1246 | while (svv < rvl) | |
1247 | *svv++ = sc; | |
c8a2f9ef | 1248 | } |
c8a2f9ef | 1249 | |
f45a00c6 | 1250 | /* --- Fix if the quotient was too large --- * |
c8a2f9ef | 1251 | * |
f45a00c6 | 1252 | * This doesn't seem to happen very often. |
c8a2f9ef | 1253 | */ |
1254 | ||
c8a2f9ef | 1255 | if (rvl[-1] > MPW_MAX / 2) { |
1256 | mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl); | |
1257 | q--; | |
1258 | } | |
1259 | } | |
1260 | ||
1261 | /* --- Done for another iteration --- */ | |
1262 | ||
1263 | if (qvl - qv > scale) | |
1264 | qv[scale] = q; | |
1265 | r = rr; | |
1266 | rr = rrr; | |
1267 | } | |
1268 | } | |
1269 | ||
1270 | /* --- Now fiddle with unnormalizing and things --- */ | |
1271 | ||
1272 | mpx_lsr(rv, rvl, rv, rvl, norm); | |
d03ab969 | 1273 | } |
1274 | ||
698bd937 | 1275 | /* --- @mpx_udivn@ --- * |
1276 | * | |
1277 | * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap | |
1278 | * dividend) | |
1279 | * @const mpw *rv, *rvl@ = dividend | |
1280 | * @mpw d@ = single-precision divisor | |
1281 | * | |
1282 | * Returns: Remainder after divison. | |
1283 | * | |
1284 | * Use: Performs a single-precision division operation. | |
1285 | */ | |
1286 | ||
1287 | mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) | |
1288 | { | |
1289 | size_t i; | |
1290 | size_t ql = qvl - qv; | |
1291 | mpd r = 0; | |
1292 | ||
1293 | i = rvl - rv; | |
1294 | while (i > 0) { | |
1295 | i--; | |
1296 | r = (r << MPW_BITS) | rv[i]; | |
1297 | if (i < ql) | |
1298 | qv[i] = r / d; | |
1299 | r %= d; | |
1300 | } | |
1301 | return (MPW(r)); | |
1302 | } | |
1303 | ||
42684bdb | 1304 | /*----- Test rig ----------------------------------------------------------*/ |
1305 | ||
1306 | #ifdef TEST_RIG | |
1307 | ||
1308 | #include <mLib/alloc.h> | |
1309 | #include <mLib/dstr.h> | |
1310 | #include <mLib/quis.h> | |
1311 | #include <mLib/testrig.h> | |
1312 | ||
1313 | #include "mpscan.h" | |
1314 | ||
1315 | #define ALLOC(v, vl, sz) do { \ | |
1316 | size_t _sz = (sz); \ | |
1317 | mpw *_vv = xmalloc(MPWS(_sz)); \ | |
1318 | mpw *_vvl = _vv + _sz; \ | |
1319 | (v) = _vv; \ | |
1320 | (vl) = _vvl; \ | |
1321 | } while (0) | |
1322 | ||
1323 | #define LOAD(v, vl, d) do { \ | |
1324 | const dstr *_d = (d); \ | |
1325 | mpw *_v, *_vl; \ | |
1326 | ALLOC(_v, _vl, MPW_RQ(_d->len)); \ | |
1327 | mpx_loadb(_v, _vl, _d->buf, _d->len); \ | |
1328 | (v) = _v; \ | |
1329 | (vl) = _vl; \ | |
1330 | } while (0) | |
1331 | ||
1332 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
45c0fd36 | 1333 | |
42684bdb | 1334 | static void dumpbits(const char *msg, const void *pp, size_t sz) |
1335 | { | |
1336 | const octet *p = pp; | |
1337 | fputs(msg, stderr); | |
1338 | for (; sz; sz--) | |
1339 | fprintf(stderr, " %02x", *p++); | |
1340 | fputc('\n', stderr); | |
1341 | } | |
1342 | ||
1343 | static void dumpmp(const char *msg, const mpw *v, const mpw *vl) | |
1344 | { | |
1345 | fputs(msg, stderr); | |
1346 | MPX_SHRINK(v, vl); | |
1347 | while (v < vl) | |
1348 | fprintf(stderr, " %08lx", (unsigned long)*--vl); | |
1349 | fputc('\n', stderr); | |
1350 | } | |
1351 | ||
1352 | static int chkscan(const mpw *v, const mpw *vl, | |
1353 | const void *pp, size_t sz, int step) | |
1354 | { | |
1355 | mpscan mps; | |
1356 | const octet *p = pp; | |
1357 | unsigned bit = 0; | |
1358 | int ok = 1; | |
1359 | ||
1360 | mpscan_initx(&mps, v, vl); | |
1361 | while (sz) { | |
1362 | unsigned x = *p; | |
1363 | int i; | |
1364 | p += step; | |
1365 | for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) { | |
1366 | if (MPSCAN_BIT(&mps) != (x & 1)) { | |
1367 | fprintf(stderr, | |
1368 | "\n*** error, step %i, bit %u, expected %u, found %u\n", | |
1369 | step, bit, x & 1, MPSCAN_BIT(&mps)); | |
1370 | ok = 0; | |
1371 | } | |
1372 | x >>= 1; | |
1373 | bit++; | |
1374 | } | |
1375 | sz--; | |
1376 | } | |
1377 | ||
1378 | return (ok); | |
1379 | } | |
1380 | ||
1381 | static int loadstore(dstr *v) | |
1382 | { | |
1383 | dstr d = DSTR_INIT; | |
1384 | size_t sz = MPW_RQ(v->len) * 2, diff; | |
1385 | mpw *m, *ml; | |
1386 | int ok = 1; | |
1387 | ||
1388 | dstr_ensure(&d, v->len); | |
1389 | m = xmalloc(MPWS(sz)); | |
1390 | ||
1391 | for (diff = 0; diff < sz; diff += 5) { | |
1392 | size_t oct; | |
1393 | ||
1394 | ml = m + sz - diff; | |
1395 | ||
1396 | mpx_loadl(m, ml, v->buf, v->len); | |
1397 | if (!chkscan(m, ml, v->buf, v->len, +1)) | |
1398 | ok = 0; | |
1399 | MPX_OCTETS(oct, m, ml); | |
1400 | mpx_storel(m, ml, d.buf, d.sz); | |
1401 | if (memcmp(d.buf, v->buf, oct) != 0) { | |
1402 | dumpbits("\n*** storel failed", d.buf, d.sz); | |
1403 | ok = 0; | |
1404 | } | |
1405 | ||
1406 | mpx_loadb(m, ml, v->buf, v->len); | |
1407 | if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1)) | |
1408 | ok = 0; | |
1409 | MPX_OCTETS(oct, m, ml); | |
1410 | mpx_storeb(m, ml, d.buf, d.sz); | |
1411 | if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) { | |
1412 | dumpbits("\n*** storeb failed", d.buf, d.sz); | |
1413 | ok = 0; | |
1414 | } | |
1415 | } | |
1416 | ||
1417 | if (!ok) | |
1418 | dumpbits("input data", v->buf, v->len); | |
1419 | ||
12ed8a1f | 1420 | xfree(m); |
42684bdb | 1421 | dstr_destroy(&d); |
1422 | return (ok); | |
1423 | } | |
1424 | ||
f09e814a | 1425 | static int twocl(dstr *v) |
1426 | { | |
1427 | dstr d = DSTR_INIT; | |
1428 | mpw *m, *ml; | |
1429 | size_t sz; | |
1430 | int ok = 1; | |
1431 | ||
1432 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; | |
1433 | dstr_ensure(&d, sz); | |
1434 | ||
1435 | sz = MPW_RQ(sz); | |
1436 | m = xmalloc(MPWS(sz)); | |
1437 | ml = m + sz; | |
1438 | ||
1439 | mpx_loadl(m, ml, v[0].buf, v[0].len); | |
1440 | mpx_storel2cn(m, ml, d.buf, v[1].len); | |
1441 | if (memcmp(d.buf, v[1].buf, v[1].len)) { | |
1442 | dumpbits("\n*** storel2cn failed", d.buf, v[1].len); | |
1443 | ok = 0; | |
1444 | } | |
1445 | ||
1446 | mpx_loadl2cn(m, ml, v[1].buf, v[1].len); | |
1447 | mpx_storel(m, ml, d.buf, v[0].len); | |
1448 | if (memcmp(d.buf, v[0].buf, v[0].len)) { | |
1449 | dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); | |
1450 | ok = 0; | |
1451 | } | |
1452 | ||
1453 | if (!ok) { | |
1454 | dumpbits("pos", v[0].buf, v[0].len); | |
1455 | dumpbits("neg", v[1].buf, v[1].len); | |
1456 | } | |
1457 | ||
12ed8a1f | 1458 | xfree(m); |
f09e814a | 1459 | dstr_destroy(&d); |
1460 | ||
1461 | return (ok); | |
1462 | } | |
1463 | ||
1464 | static int twocb(dstr *v) | |
1465 | { | |
1466 | dstr d = DSTR_INIT; | |
1467 | mpw *m, *ml; | |
1468 | size_t sz; | |
1469 | int ok = 1; | |
1470 | ||
1471 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; | |
1472 | dstr_ensure(&d, sz); | |
1473 | ||
1474 | sz = MPW_RQ(sz); | |
1475 | m = xmalloc(MPWS(sz)); | |
1476 | ml = m + sz; | |
1477 | ||
1478 | mpx_loadb(m, ml, v[0].buf, v[0].len); | |
1479 | mpx_storeb2cn(m, ml, d.buf, v[1].len); | |
1480 | if (memcmp(d.buf, v[1].buf, v[1].len)) { | |
1481 | dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); | |
1482 | ok = 0; | |
1483 | } | |
1484 | ||
1485 | mpx_loadb2cn(m, ml, v[1].buf, v[1].len); | |
1486 | mpx_storeb(m, ml, d.buf, v[0].len); | |
1487 | if (memcmp(d.buf, v[0].buf, v[0].len)) { | |
1488 | dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); | |
1489 | ok = 0; | |
1490 | } | |
1491 | ||
1492 | if (!ok) { | |
1493 | dumpbits("pos", v[0].buf, v[0].len); | |
1494 | dumpbits("neg", v[1].buf, v[1].len); | |
1495 | } | |
1496 | ||
12ed8a1f | 1497 | xfree(m); |
f09e814a | 1498 | dstr_destroy(&d); |
1499 | ||
1500 | return (ok); | |
1501 | } | |
1502 | ||
42684bdb | 1503 | static int lsl(dstr *v) |
1504 | { | |
1505 | mpw *a, *al; | |
1506 | int n = *(int *)v[1].buf; | |
1507 | mpw *c, *cl; | |
1508 | mpw *d, *dl; | |
1509 | int ok = 1; | |
1510 | ||
1511 | LOAD(a, al, &v[0]); | |
1512 | LOAD(c, cl, &v[2]); | |
1513 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1514 | ||
1515 | mpx_lsl(d, dl, a, al, n); | |
1a05a8ef | 1516 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1517 | fprintf(stderr, "\n*** lsl(%i) failed\n", n); |
45c0fd36 | 1518 | dumpmp(" a", a, al); |
42684bdb | 1519 | dumpmp("expected", c, cl); |
1520 | dumpmp(" result", d, dl); | |
1521 | ok = 0; | |
1522 | } | |
1523 | ||
12ed8a1f | 1524 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1525 | return (ok); |
1526 | } | |
1527 | ||
81578196 | 1528 | static int lslc(dstr *v) |
1529 | { | |
1530 | mpw *a, *al; | |
1531 | int n = *(int *)v[1].buf; | |
1532 | mpw *c, *cl; | |
1533 | mpw *d, *dl; | |
1534 | int ok = 1; | |
1535 | ||
1536 | LOAD(a, al, &v[0]); | |
1537 | LOAD(c, cl, &v[2]); | |
1538 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); | |
1539 | ||
1540 | mpx_lslc(d, dl, a, al, n); | |
1541 | if (!mpx_ueq(d, dl, c, cl)) { | |
1542 | fprintf(stderr, "\n*** lslc(%i) failed\n", n); | |
45c0fd36 | 1543 | dumpmp(" a", a, al); |
81578196 | 1544 | dumpmp("expected", c, cl); |
1545 | dumpmp(" result", d, dl); | |
1546 | ok = 0; | |
1547 | } | |
1548 | ||
12ed8a1f | 1549 | xfree(a); xfree(c); xfree(d); |
81578196 | 1550 | return (ok); |
1551 | } | |
1552 | ||
42684bdb | 1553 | static int lsr(dstr *v) |
1554 | { | |
1555 | mpw *a, *al; | |
1556 | int n = *(int *)v[1].buf; | |
1557 | mpw *c, *cl; | |
1558 | mpw *d, *dl; | |
1559 | int ok = 1; | |
1560 | ||
1561 | LOAD(a, al, &v[0]); | |
1562 | LOAD(c, cl, &v[2]); | |
1563 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1); | |
1564 | ||
1565 | mpx_lsr(d, dl, a, al, n); | |
1a05a8ef | 1566 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1567 | fprintf(stderr, "\n*** lsr(%i) failed\n", n); |
45c0fd36 | 1568 | dumpmp(" a", a, al); |
42684bdb | 1569 | dumpmp("expected", c, cl); |
1570 | dumpmp(" result", d, dl); | |
1571 | ok = 0; | |
1572 | } | |
1573 | ||
12ed8a1f | 1574 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1575 | return (ok); |
1576 | } | |
1577 | ||
1578 | static int uadd(dstr *v) | |
1579 | { | |
1580 | mpw *a, *al; | |
1581 | mpw *b, *bl; | |
1582 | mpw *c, *cl; | |
1583 | mpw *d, *dl; | |
1584 | int ok = 1; | |
1585 | ||
1586 | LOAD(a, al, &v[0]); | |
1587 | LOAD(b, bl, &v[1]); | |
1588 | LOAD(c, cl, &v[2]); | |
1589 | ALLOC(d, dl, MAX(al - a, bl - b) + 1); | |
1590 | ||
1591 | mpx_uadd(d, dl, a, al, b, bl); | |
1a05a8ef | 1592 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1593 | fprintf(stderr, "\n*** uadd failed\n"); |
45c0fd36 MW |
1594 | dumpmp(" a", a, al); |
1595 | dumpmp(" b", b, bl); | |
42684bdb | 1596 | dumpmp("expected", c, cl); |
1597 | dumpmp(" result", d, dl); | |
1598 | ok = 0; | |
1599 | } | |
1600 | ||
12ed8a1f | 1601 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1602 | return (ok); |
1603 | } | |
1604 | ||
1605 | static int usub(dstr *v) | |
1606 | { | |
1607 | mpw *a, *al; | |
1608 | mpw *b, *bl; | |
1609 | mpw *c, *cl; | |
1610 | mpw *d, *dl; | |
1611 | int ok = 1; | |
1612 | ||
1613 | LOAD(a, al, &v[0]); | |
1614 | LOAD(b, bl, &v[1]); | |
1615 | LOAD(c, cl, &v[2]); | |
1616 | ALLOC(d, dl, al - a); | |
1617 | ||
1618 | mpx_usub(d, dl, a, al, b, bl); | |
1a05a8ef | 1619 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1620 | fprintf(stderr, "\n*** usub failed\n"); |
45c0fd36 MW |
1621 | dumpmp(" a", a, al); |
1622 | dumpmp(" b", b, bl); | |
42684bdb | 1623 | dumpmp("expected", c, cl); |
1624 | dumpmp(" result", d, dl); | |
1625 | ok = 0; | |
1626 | } | |
1627 | ||
12ed8a1f | 1628 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1629 | return (ok); |
1630 | } | |
1631 | ||
1632 | static int umul(dstr *v) | |
1633 | { | |
1634 | mpw *a, *al; | |
1635 | mpw *b, *bl; | |
1636 | mpw *c, *cl; | |
1637 | mpw *d, *dl; | |
1638 | int ok = 1; | |
1639 | ||
1640 | LOAD(a, al, &v[0]); | |
1641 | LOAD(b, bl, &v[1]); | |
1642 | LOAD(c, cl, &v[2]); | |
1643 | ALLOC(d, dl, (al - a) + (bl - b)); | |
1644 | ||
1645 | mpx_umul(d, dl, a, al, b, bl); | |
1a05a8ef | 1646 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1647 | fprintf(stderr, "\n*** umul failed\n"); |
45c0fd36 MW |
1648 | dumpmp(" a", a, al); |
1649 | dumpmp(" b", b, bl); | |
42684bdb | 1650 | dumpmp("expected", c, cl); |
1651 | dumpmp(" result", d, dl); | |
1652 | ok = 0; | |
1653 | } | |
1654 | ||
12ed8a1f | 1655 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb | 1656 | return (ok); |
1657 | } | |
1658 | ||
1659 | static int usqr(dstr *v) | |
1660 | { | |
1661 | mpw *a, *al; | |
1662 | mpw *c, *cl; | |
1663 | mpw *d, *dl; | |
1664 | int ok = 1; | |
1665 | ||
1666 | LOAD(a, al, &v[0]); | |
1667 | LOAD(c, cl, &v[1]); | |
1668 | ALLOC(d, dl, 2 * (al - a)); | |
1669 | ||
1670 | mpx_usqr(d, dl, a, al); | |
1a05a8ef | 1671 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb | 1672 | fprintf(stderr, "\n*** usqr failed\n"); |
45c0fd36 | 1673 | dumpmp(" a", a, al); |
42684bdb | 1674 | dumpmp("expected", c, cl); |
1675 | dumpmp(" result", d, dl); | |
1676 | ok = 0; | |
1677 | } | |
1678 | ||
12ed8a1f | 1679 | xfree(a); xfree(c); xfree(d); |
42684bdb | 1680 | return (ok); |
1681 | } | |
1682 | ||
1683 | static int udiv(dstr *v) | |
1684 | { | |
1685 | mpw *a, *al; | |
1686 | mpw *b, *bl; | |
1687 | mpw *q, *ql; | |
1688 | mpw *r, *rl; | |
1689 | mpw *qq, *qql; | |
1690 | mpw *s, *sl; | |
1691 | int ok = 1; | |
1692 | ||
1693 | ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len); | |
1694 | LOAD(b, bl, &v[1]); | |
1695 | LOAD(q, ql, &v[2]); | |
1696 | LOAD(r, rl, &v[3]); | |
1697 | ALLOC(qq, qql, al - a); | |
1698 | ALLOC(s, sl, (bl - b) + 1); | |
1699 | ||
1700 | mpx_udiv(qq, qql, a, al, b, bl, s, sl); | |
1a05a8ef | 1701 | if (!mpx_ueq(qq, qql, q, ql) || |
1702 | !mpx_ueq(a, al, r, rl)) { | |
42684bdb | 1703 | fprintf(stderr, "\n*** udiv failed\n"); |
1704 | dumpmp(" divisor", b, bl); | |
1705 | dumpmp("expect r", r, rl); | |
1706 | dumpmp("result r", a, al); | |
1707 | dumpmp("expect q", q, ql); | |
1708 | dumpmp("result q", qq, qql); | |
1709 | ok = 0; | |
1710 | } | |
1711 | ||
12ed8a1f | 1712 | xfree(a); xfree(b); xfree(r); xfree(q); xfree(s); xfree(qq); |
42684bdb | 1713 | return (ok); |
1714 | } | |
1715 | ||
1716 | static test_chunk defs[] = { | |
1717 | { "load-store", loadstore, { &type_hex, 0 } }, | |
f09e814a | 1718 | { "2cl", twocl, { &type_hex, &type_hex, } }, |
1719 | { "2cb", twocb, { &type_hex, &type_hex, } }, | |
42684bdb | 1720 | { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } }, |
81578196 | 1721 | { "lslc", lslc, { &type_hex, &type_int, &type_hex, 0 } }, |
42684bdb | 1722 | { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } }, |
1723 | { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1724 | { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1725 | { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } }, | |
1726 | { "usqr", usqr, { &type_hex, &type_hex, 0 } }, | |
1727 | { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } }, | |
1728 | { 0, 0, { 0 } } | |
1729 | }; | |
1730 | ||
1731 | int main(int argc, char *argv[]) | |
1732 | { | |
1733 | test_run(argc, argv, defs, SRCDIR"/tests/mpx"); | |
1734 | return (0); | |
1735 | } | |
1736 | ||
42684bdb | 1737 | #endif |
1738 | ||
d03ab969 | 1739 | /*----- That's all, folks -------------------------------------------------*/ |