d03ab969 |
1 | /* -*-c-*- |
2 | * |
12ed8a1f |
3 | * $Id$ |
d03ab969 |
4 | * |
5 | * Low-level multiprecision arithmetic |
6 | * |
7 | * (c) 1999 Straylight/Edgeware |
8 | */ |
9 | |
10 | /*----- Licensing notice --------------------------------------------------* |
11 | * |
12 | * This file is part of Catacomb. |
13 | * |
14 | * Catacomb is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU Library General Public License as |
16 | * published by the Free Software Foundation; either version 2 of the |
17 | * License, or (at your option) any later version. |
18 | * |
19 | * Catacomb is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU Library General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU Library General Public |
25 | * License along with Catacomb; if not, write to the Free |
26 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, |
27 | * MA 02111-1307, USA. |
28 | */ |
29 | |
d03ab969 |
30 | /*----- Header files ------------------------------------------------------*/ |
31 | |
c8a2f9ef |
32 | #include <assert.h> |
d03ab969 |
33 | #include <stdio.h> |
34 | #include <stdlib.h> |
35 | #include <string.h> |
36 | |
37 | #include <mLib/bits.h> |
38 | |
39 | #include "mptypes.h" |
40 | #include "mpx.h" |
75263f25 |
41 | #include "bitops.h" |
d03ab969 |
42 | |
43 | /*----- Loading and storing -----------------------------------------------*/ |
44 | |
45 | /* --- @mpx_storel@ --- * |
46 | * |
47 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
c8a2f9ef |
48 | * @void *pp@ = pointer to octet array |
d03ab969 |
49 | * @size_t sz@ = size of octet array |
50 | * |
51 | * Returns: --- |
52 | * |
53 | * Use: Stores an MP in an octet array, least significant octet |
54 | * first. High-end octets are silently discarded if there |
55 | * isn't enough space for them. |
56 | */ |
57 | |
c8a2f9ef |
58 | void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 |
59 | { |
60 | mpw n, w = 0; |
c8a2f9ef |
61 | octet *p = pp, *q = p + sz; |
d03ab969 |
62 | unsigned bits = 0; |
63 | |
64 | while (p < q) { |
65 | if (bits < 8) { |
66 | if (v >= vl) { |
67 | *p++ = U8(w); |
68 | break; |
69 | } |
70 | n = *v++; |
71 | *p++ = U8(w | n << bits); |
72 | w = n >> (8 - bits); |
73 | bits += MPW_BITS - 8; |
74 | } else { |
75 | *p++ = U8(w); |
76 | w >>= 8; |
77 | bits -= 8; |
78 | } |
79 | } |
80 | memset(p, 0, q - p); |
81 | } |
82 | |
83 | /* --- @mpx_loadl@ --- * |
84 | * |
85 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
c8a2f9ef |
86 | * @const void *pp@ = pointer to octet array |
d03ab969 |
87 | * @size_t sz@ = size of octet array |
88 | * |
89 | * Returns: --- |
90 | * |
91 | * Use: Loads an MP in an octet array, least significant octet |
92 | * first. High-end octets are ignored if there isn't enough |
93 | * space for them. |
94 | */ |
95 | |
c8a2f9ef |
96 | void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 |
97 | { |
98 | unsigned n; |
c8a2f9ef |
99 | mpw w = 0; |
100 | const octet *p = pp, *q = p + sz; |
d03ab969 |
101 | unsigned bits = 0; |
102 | |
103 | if (v >= vl) |
104 | return; |
105 | while (p < q) { |
106 | n = U8(*p++); |
107 | w |= n << bits; |
108 | bits += 8; |
109 | if (bits >= MPW_BITS) { |
110 | *v++ = MPW(w); |
111 | w = n >> (MPW_BITS - bits + 8); |
112 | bits -= MPW_BITS; |
113 | if (v >= vl) |
114 | return; |
115 | } |
116 | } |
117 | *v++ = w; |
118 | MPX_ZERO(v, vl); |
119 | } |
120 | |
121 | /* --- @mpx_storeb@ --- * |
122 | * |
123 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
c8a2f9ef |
124 | * @void *pp@ = pointer to octet array |
d03ab969 |
125 | * @size_t sz@ = size of octet array |
126 | * |
127 | * Returns: --- |
128 | * |
129 | * Use: Stores an MP in an octet array, most significant octet |
130 | * first. High-end octets are silently discarded if there |
131 | * isn't enough space for them. |
132 | */ |
133 | |
c8a2f9ef |
134 | void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz) |
d03ab969 |
135 | { |
136 | mpw n, w = 0; |
c8a2f9ef |
137 | octet *p = pp, *q = p + sz; |
d03ab969 |
138 | unsigned bits = 0; |
139 | |
140 | while (q > p) { |
141 | if (bits < 8) { |
142 | if (v >= vl) { |
143 | *--q = U8(w); |
144 | break; |
145 | } |
146 | n = *v++; |
147 | *--q = U8(w | n << bits); |
148 | w = n >> (8 - bits); |
149 | bits += MPW_BITS - 8; |
150 | } else { |
151 | *--q = U8(w); |
152 | w >>= 8; |
153 | bits -= 8; |
154 | } |
155 | } |
156 | memset(p, 0, q - p); |
157 | } |
158 | |
159 | /* --- @mpx_loadb@ --- * |
160 | * |
161 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
c8a2f9ef |
162 | * @const void *pp@ = pointer to octet array |
d03ab969 |
163 | * @size_t sz@ = size of octet array |
164 | * |
165 | * Returns: --- |
166 | * |
167 | * Use: Loads an MP in an octet array, most significant octet |
168 | * first. High-end octets are ignored if there isn't enough |
169 | * space for them. |
170 | */ |
171 | |
c8a2f9ef |
172 | void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz) |
d03ab969 |
173 | { |
174 | unsigned n; |
c8a2f9ef |
175 | mpw w = 0; |
176 | const octet *p = pp, *q = p + sz; |
d03ab969 |
177 | unsigned bits = 0; |
178 | |
179 | if (v >= vl) |
180 | return; |
181 | while (q > p) { |
182 | n = U8(*--q); |
183 | w |= n << bits; |
184 | bits += 8; |
185 | if (bits >= MPW_BITS) { |
186 | *v++ = MPW(w); |
187 | w = n >> (MPW_BITS - bits + 8); |
188 | bits -= MPW_BITS; |
189 | if (v >= vl) |
190 | return; |
191 | } |
192 | } |
193 | *v++ = w; |
194 | MPX_ZERO(v, vl); |
195 | } |
196 | |
f09e814a |
197 | /* --- @mpx_storel2cn@ --- * |
198 | * |
199 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
200 | * @void *pp@ = pointer to octet array |
201 | * @size_t sz@ = size of octet array |
202 | * |
203 | * Returns: --- |
204 | * |
205 | * Use: Stores a negative MP in an octet array, least significant |
206 | * octet first, as two's complement. High-end octets are |
207 | * silently discarded if there isn't enough space for them. |
208 | * This obviously makes the output bad. |
209 | */ |
210 | |
211 | void mpx_storel2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) |
212 | { |
213 | unsigned c = 1; |
214 | unsigned b = 0; |
215 | mpw n, w = 0; |
216 | octet *p = pp, *q = p + sz; |
217 | unsigned bits = 0; |
218 | |
219 | while (p < q) { |
220 | if (bits < 8) { |
221 | if (v >= vl) { |
222 | b = w; |
223 | break; |
224 | } |
225 | n = *v++; |
226 | b = w | n << bits; |
227 | w = n >> (8 - bits); |
228 | bits += MPW_BITS - 8; |
229 | } else { |
230 | b = w; |
231 | w >>= 8; |
232 | bits -= 8; |
233 | } |
234 | b = U8(~b + c); |
2bd53494 |
235 | c = c && !b; |
f09e814a |
236 | *p++ = b; |
237 | } |
238 | while (p < q) { |
239 | b = U8(~b + c); |
2bd53494 |
240 | c = c && !b; |
f09e814a |
241 | *p++ = b; |
242 | b = 0; |
243 | } |
244 | } |
245 | |
246 | /* --- @mpx_loadl2cn@ --- * |
247 | * |
248 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
249 | * @const void *pp@ = pointer to octet array |
250 | * @size_t sz@ = size of octet array |
251 | * |
252 | * Returns: --- |
253 | * |
254 | * Use: Loads a negative MP in an octet array, least significant |
255 | * octet first, as two's complement. High-end octets are |
256 | * ignored if there isn't enough space for them. This probably |
257 | * means you made the wrong choice coming here. |
258 | */ |
259 | |
260 | void mpx_loadl2cn(mpw *v, mpw *vl, const void *pp, size_t sz) |
261 | { |
262 | unsigned n; |
263 | unsigned c = 1; |
264 | mpw w = 0; |
265 | const octet *p = pp, *q = p + sz; |
266 | unsigned bits = 0; |
267 | |
268 | if (v >= vl) |
269 | return; |
270 | while (p < q) { |
271 | n = U8(~(*p++) + c); |
2bd53494 |
272 | c = c && !n; |
f09e814a |
273 | w |= n << bits; |
274 | bits += 8; |
275 | if (bits >= MPW_BITS) { |
276 | *v++ = MPW(w); |
277 | w = n >> (MPW_BITS - bits + 8); |
278 | bits -= MPW_BITS; |
279 | if (v >= vl) |
280 | return; |
281 | } |
282 | } |
283 | *v++ = w; |
284 | MPX_ZERO(v, vl); |
285 | } |
286 | |
287 | /* --- @mpx_storeb2cn@ --- * |
288 | * |
289 | * Arguments: @const mpw *v, *vl@ = base and limit of source vector |
290 | * @void *pp@ = pointer to octet array |
291 | * @size_t sz@ = size of octet array |
292 | * |
293 | * Returns: --- |
294 | * |
295 | * Use: Stores a negative MP in an octet array, most significant |
296 | * octet first, as two's complement. High-end octets are |
297 | * silently discarded if there isn't enough space for them, |
298 | * which probably isn't what you meant. |
299 | */ |
300 | |
301 | void mpx_storeb2cn(const mpw *v, const mpw *vl, void *pp, size_t sz) |
302 | { |
303 | mpw n, w = 0; |
304 | unsigned b = 0; |
305 | unsigned c = 1; |
306 | octet *p = pp, *q = p + sz; |
307 | unsigned bits = 0; |
308 | |
309 | while (q > p) { |
310 | if (bits < 8) { |
311 | if (v >= vl) { |
312 | b = w; |
313 | break; |
314 | } |
315 | n = *v++; |
316 | b = w | n << bits; |
317 | w = n >> (8 - bits); |
318 | bits += MPW_BITS - 8; |
319 | } else { |
320 | b = w; |
321 | w >>= 8; |
322 | bits -= 8; |
323 | } |
324 | b = U8(~b + c); |
2bd53494 |
325 | c = c && !b; |
f09e814a |
326 | *--q = b; |
327 | } |
328 | while (q > p) { |
329 | b = ~b + c; |
2bd53494 |
330 | c = c && !(b & 0xff); |
f09e814a |
331 | *--q = b; |
332 | b = 0; |
333 | } |
334 | } |
335 | |
336 | /* --- @mpx_loadb2cn@ --- * |
337 | * |
338 | * Arguments: @mpw *v, *vl@ = base and limit of destination vector |
339 | * @const void *pp@ = pointer to octet array |
340 | * @size_t sz@ = size of octet array |
341 | * |
342 | * Returns: --- |
343 | * |
344 | * Use: Loads a negative MP in an octet array, most significant octet |
345 | * first as two's complement. High-end octets are ignored if |
346 | * there isn't enough space for them. This probably means you |
347 | * chose this function wrongly. |
348 | */ |
349 | |
350 | void mpx_loadb2cn(mpw *v, mpw *vl, const void *pp, size_t sz) |
351 | { |
352 | unsigned n; |
353 | unsigned c = 1; |
354 | mpw w = 0; |
355 | const octet *p = pp, *q = p + sz; |
356 | unsigned bits = 0; |
357 | |
358 | if (v >= vl) |
359 | return; |
360 | while (q > p) { |
361 | n = U8(~(*--q) + c); |
2bd53494 |
362 | c = c && !n; |
f09e814a |
363 | w |= n << bits; |
364 | bits += 8; |
365 | if (bits >= MPW_BITS) { |
366 | *v++ = MPW(w); |
367 | w = n >> (MPW_BITS - bits + 8); |
368 | bits -= MPW_BITS; |
369 | if (v >= vl) |
370 | return; |
371 | } |
372 | } |
373 | *v++ = w; |
374 | MPX_ZERO(v, vl); |
375 | } |
376 | |
d03ab969 |
377 | /*----- Logical shifting --------------------------------------------------*/ |
378 | |
379 | /* --- @mpx_lsl@ --- * |
380 | * |
381 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
382 | * @const mpw *av, *avl@ = source vector base and limit |
383 | * @size_t n@ = number of bit positions to shift by |
384 | * |
385 | * Returns: --- |
386 | * |
387 | * Use: Performs a logical shift left operation on an integer. |
388 | */ |
389 | |
390 | void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) |
391 | { |
392 | size_t nw; |
393 | unsigned nb; |
394 | |
395 | /* --- Trivial special case --- */ |
396 | |
397 | if (n == 0) |
398 | MPX_COPY(dv, dvl, av, avl); |
399 | |
400 | /* --- Single bit shifting --- */ |
401 | |
402 | else if (n == 1) { |
403 | mpw w = 0; |
404 | while (av < avl) { |
405 | mpw t; |
406 | if (dv >= dvl) |
407 | goto done; |
408 | t = *av++; |
409 | *dv++ = MPW((t << 1) | w); |
410 | w = t >> (MPW_BITS - 1); |
411 | } |
412 | if (dv >= dvl) |
413 | goto done; |
414 | *dv++ = MPW(w); |
415 | MPX_ZERO(dv, dvl); |
c8a2f9ef |
416 | goto done; |
d03ab969 |
417 | } |
418 | |
419 | /* --- Break out word and bit shifts for more sophisticated work --- */ |
420 | |
421 | nw = n / MPW_BITS; |
422 | nb = n % MPW_BITS; |
423 | |
424 | /* --- Handle a shift by a multiple of the word size --- */ |
425 | |
426 | if (nb == 0) { |
4f29a732 |
427 | if (nw >= dvl - dv) |
428 | MPX_ZERO(dv, dvl); |
429 | else { |
430 | MPX_COPY(dv + nw, dvl, av, avl); |
431 | memset(dv, 0, MPWS(nw)); |
432 | } |
d03ab969 |
433 | } |
434 | |
c8a2f9ef |
435 | /* --- And finally the difficult case --- * |
436 | * |
437 | * This is a little convoluted, because I have to start from the end and |
438 | * work backwards to avoid overwriting the source, if they're both the same |
439 | * block of memory. |
440 | */ |
d03ab969 |
441 | |
442 | else { |
443 | mpw w; |
444 | size_t nr = MPW_BITS - nb; |
c8a2f9ef |
445 | size_t dvn = dvl - dv; |
446 | size_t avn = avl - av; |
d03ab969 |
447 | |
c8a2f9ef |
448 | if (dvn <= nw) { |
d03ab969 |
449 | MPX_ZERO(dv, dvl); |
450 | goto done; |
451 | } |
d03ab969 |
452 | |
c8a2f9ef |
453 | if (dvn > avn + nw) { |
454 | size_t off = avn + nw + 1; |
455 | MPX_ZERO(dv + off, dvl); |
456 | dvl = dv + off; |
457 | w = 0; |
458 | } else { |
459 | avl = av + dvn - nw; |
460 | w = *--avl << nb; |
d03ab969 |
461 | } |
462 | |
c8a2f9ef |
463 | while (avl > av) { |
464 | mpw t = *--avl; |
465 | *--dvl = (t >> nr) | w; |
466 | w = t << nb; |
d03ab969 |
467 | } |
c8a2f9ef |
468 | |
469 | *--dvl = w; |
470 | MPX_ZERO(dv, dvl); |
d03ab969 |
471 | } |
472 | |
473 | done:; |
474 | } |
475 | |
81578196 |
476 | /* --- @mpx_lslc@ --- * |
477 | * |
478 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
479 | * @const mpw *av, *avl@ = source vector base and limit |
480 | * @size_t n@ = number of bit positions to shift by |
481 | * |
482 | * Returns: --- |
483 | * |
484 | * Use: Performs a logical shift left operation on an integer, only |
485 | * it fills in the bits with ones instead of zeroes. |
486 | */ |
487 | |
488 | void mpx_lslc(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) |
489 | { |
490 | size_t nw; |
491 | unsigned nb; |
492 | |
493 | /* --- Trivial special case --- */ |
494 | |
495 | if (n == 0) |
496 | MPX_COPY(dv, dvl, av, avl); |
497 | |
498 | /* --- Single bit shifting --- */ |
499 | |
500 | else if (n == 1) { |
501 | mpw w = 1; |
502 | while (av < avl) { |
503 | mpw t; |
504 | if (dv >= dvl) |
505 | goto done; |
506 | t = *av++; |
507 | *dv++ = MPW((t << 1) | w); |
508 | w = t >> (MPW_BITS - 1); |
509 | } |
510 | if (dv >= dvl) |
511 | goto done; |
512 | *dv++ = MPW(w); |
513 | MPX_ZERO(dv, dvl); |
514 | goto done; |
515 | } |
516 | |
517 | /* --- Break out word and bit shifts for more sophisticated work --- */ |
518 | |
519 | nw = n / MPW_BITS; |
520 | nb = n % MPW_BITS; |
521 | |
522 | /* --- Handle a shift by a multiple of the word size --- */ |
523 | |
524 | if (nb == 0) { |
525 | if (nw >= dvl - dv) |
526 | MPX_ONE(dv, dvl); |
527 | else { |
528 | MPX_COPY(dv + nw, dvl, av, avl); |
529 | MPX_ONE(dv, dv + nw); |
530 | } |
531 | } |
532 | |
533 | /* --- And finally the difficult case --- * |
534 | * |
535 | * This is a little convoluted, because I have to start from the end and |
536 | * work backwards to avoid overwriting the source, if they're both the same |
537 | * block of memory. |
538 | */ |
539 | |
540 | else { |
541 | mpw w; |
542 | size_t nr = MPW_BITS - nb; |
543 | size_t dvn = dvl - dv; |
544 | size_t avn = avl - av; |
545 | |
546 | if (dvn <= nw) { |
547 | MPX_ONE(dv, dvl); |
548 | goto done; |
549 | } |
550 | |
551 | if (dvn > avn + nw) { |
552 | size_t off = avn + nw + 1; |
553 | MPX_ZERO(dv + off, dvl); |
554 | dvl = dv + off; |
555 | w = 0; |
556 | } else { |
557 | avl = av + dvn - nw; |
558 | w = *--avl << nb; |
559 | } |
560 | |
561 | while (avl > av) { |
562 | mpw t = *--avl; |
563 | *--dvl = (t >> nr) | w; |
564 | w = t << nb; |
565 | } |
566 | |
567 | *--dvl = (MPW_MAX >> nr) | w; |
568 | MPX_ONE(dv, dvl); |
569 | } |
570 | |
571 | done:; |
572 | } |
573 | |
d03ab969 |
574 | /* --- @mpx_lsr@ --- * |
575 | * |
576 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
577 | * @const mpw *av, *avl@ = source vector base and limit |
578 | * @size_t n@ = number of bit positions to shift by |
579 | * |
580 | * Returns: --- |
581 | * |
582 | * Use: Performs a logical shift right operation on an integer. |
583 | */ |
584 | |
585 | void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n) |
586 | { |
587 | size_t nw; |
588 | unsigned nb; |
589 | |
590 | /* --- Trivial special case --- */ |
591 | |
592 | if (n == 0) |
593 | MPX_COPY(dv, dvl, av, avl); |
594 | |
595 | /* --- Single bit shifting --- */ |
596 | |
597 | else if (n == 1) { |
f40868de |
598 | mpw w = av < avl ? *av++ >> 1 : 0; |
d03ab969 |
599 | while (av < avl) { |
600 | mpw t; |
601 | if (dv >= dvl) |
602 | goto done; |
603 | t = *av++; |
604 | *dv++ = MPW((t << (MPW_BITS - 1)) | w); |
605 | w = t >> 1; |
606 | } |
607 | if (dv >= dvl) |
608 | goto done; |
609 | *dv++ = MPW(w); |
610 | MPX_ZERO(dv, dvl); |
c8a2f9ef |
611 | goto done; |
d03ab969 |
612 | } |
613 | |
614 | /* --- Break out word and bit shifts for more sophisticated work --- */ |
615 | |
616 | nw = n / MPW_BITS; |
617 | nb = n % MPW_BITS; |
618 | |
619 | /* --- Handle a shift by a multiple of the word size --- */ |
620 | |
4f29a732 |
621 | if (nb == 0) { |
622 | if (nw >= avl - av) |
623 | MPX_ZERO(dv, dvl); |
624 | else |
625 | MPX_COPY(dv, dvl, av + nw, avl); |
626 | } |
d03ab969 |
627 | |
628 | /* --- And finally the difficult case --- */ |
629 | |
630 | else { |
631 | mpw w; |
632 | size_t nr = MPW_BITS - nb; |
633 | |
634 | av += nw; |
4f29a732 |
635 | w = av < avl ? *av++ : 0; |
d03ab969 |
636 | while (av < avl) { |
637 | mpw t; |
638 | if (dv >= dvl) |
639 | goto done; |
640 | t = *av++; |
641 | *dv++ = MPW((w >> nb) | (t << nr)); |
642 | w = t; |
643 | } |
644 | if (dv < dvl) { |
645 | *dv++ = MPW(w >> nb); |
646 | MPX_ZERO(dv, dvl); |
647 | } |
648 | } |
649 | |
650 | done:; |
651 | } |
652 | |
0f32e0f8 |
653 | /*----- Bitwise operations ------------------------------------------------*/ |
654 | |
f09e814a |
655 | /* --- @mpx_bitop@ --- * |
0f32e0f8 |
656 | * |
657 | * Arguments: @mpw *dv, *dvl@ = destination vector |
658 | * @const mpw *av, *avl@ = first source vector |
659 | * @const mpw *bv, *bvl@ = second source vector |
660 | * |
661 | * Returns: --- |
662 | * |
f09e814a |
663 | * Use; Provides the dyadic boolean functions. |
0f32e0f8 |
664 | */ |
665 | |
f09e814a |
666 | #define MPX_BITBINOP(string) \ |
0f32e0f8 |
667 | \ |
f09e814a |
668 | void mpx_bit##string(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, \ |
669 | const mpw *bv, const mpw *bvl) \ |
0f32e0f8 |
670 | { \ |
671 | MPX_SHRINK(av, avl); \ |
672 | MPX_SHRINK(bv, bvl); \ |
673 | \ |
674 | while (dv < dvl) { \ |
675 | mpw a, b; \ |
676 | a = (av < avl) ? *av++ : 0; \ |
677 | b = (bv < bvl) ? *bv++ : 0; \ |
75263f25 |
678 | *dv++ = B##string(a, b); \ |
0f32e0f8 |
679 | } \ |
680 | } |
681 | |
f09e814a |
682 | MPX_DOBIN(MPX_BITBINOP) |
0f32e0f8 |
683 | |
684 | void mpx_not(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) |
685 | { |
686 | MPX_SHRINK(av, avl); |
687 | |
688 | while (dv < dvl) { |
689 | mpw a; |
690 | a = (av < avl) ? *av++ : 0; |
691 | *dv++ = ~a; |
692 | } |
693 | } |
694 | |
d03ab969 |
695 | /*----- Unsigned arithmetic -----------------------------------------------*/ |
696 | |
f45a00c6 |
697 | /* --- @mpx_2c@ --- * |
698 | * |
699 | * Arguments: @mpw *dv, *dvl@ = destination vector |
700 | * @const mpw *v, *vl@ = source vector |
701 | * |
702 | * Returns: --- |
703 | * |
704 | * Use: Calculates the two's complement of @v@. |
705 | */ |
706 | |
707 | void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl) |
708 | { |
709 | mpw c = 0; |
710 | while (dv < dvl && v < vl) |
711 | *dv++ = c = MPW(~*v++); |
712 | if (dv < dvl) { |
713 | if (c > MPW_MAX / 2) |
714 | c = MPW(~0); |
715 | while (dv < dvl) |
716 | *dv++ = c; |
717 | } |
718 | MPX_UADDN(dv, dvl, 1); |
719 | } |
720 | |
1a05a8ef |
721 | /* --- @mpx_ueq@ --- * |
722 | * |
723 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit |
724 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
725 | * |
726 | * Returns: Nonzero if the two vectors are equal. |
727 | * |
728 | * Use: Performs an unsigned integer test for equality. |
729 | */ |
730 | |
731 | int mpx_ueq(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) |
732 | { |
733 | MPX_SHRINK(av, avl); |
734 | MPX_SHRINK(bv, bvl); |
735 | if (avl - av != bvl - bv) |
736 | return (0); |
737 | while (av < avl) { |
738 | if (*av++ != *bv++) |
739 | return (0); |
740 | } |
741 | return (1); |
742 | } |
743 | |
d03ab969 |
744 | /* --- @mpx_ucmp@ --- * |
745 | * |
746 | * Arguments: @const mpw *av, *avl@ = first argument vector base and limit |
747 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
748 | * |
749 | * Returns: Less than, equal to, or greater than zero depending on |
750 | * whether @a@ is less than, equal to or greater than @b@, |
751 | * respectively. |
752 | * |
753 | * Use: Performs an unsigned integer comparison. |
754 | */ |
755 | |
756 | int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl) |
757 | { |
758 | MPX_SHRINK(av, avl); |
759 | MPX_SHRINK(bv, bvl); |
760 | |
761 | if (avl - av > bvl - bv) |
762 | return (+1); |
763 | else if (avl - av < bvl - bv) |
764 | return (-1); |
765 | else while (avl > av) { |
766 | mpw a = *--avl, b = *--bvl; |
767 | if (a > b) |
768 | return (+1); |
769 | else if (a < b) |
770 | return (-1); |
771 | } |
772 | return (0); |
773 | } |
1a05a8ef |
774 | |
d03ab969 |
775 | /* --- @mpx_uadd@ --- * |
776 | * |
777 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
778 | * @const mpw *av, *avl@ = first addend vector base and limit |
779 | * @const mpw *bv, *bvl@ = second addend vector base and limit |
780 | * |
781 | * Returns: --- |
782 | * |
783 | * Use: Performs unsigned integer addition. If the result overflows |
784 | * the destination vector, high-order bits are discarded. This |
785 | * means that two's complement addition happens more or less for |
786 | * free, although that's more a side-effect than anything else. |
787 | * The result vector may be equal to either or both source |
788 | * vectors, but may not otherwise overlap them. |
789 | */ |
790 | |
791 | void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
792 | const mpw *bv, const mpw *bvl) |
793 | { |
794 | mpw c = 0; |
795 | |
796 | while (av < avl || bv < bvl) { |
797 | mpw a, b; |
798 | mpd x; |
799 | if (dv >= dvl) |
800 | return; |
801 | a = (av < avl) ? *av++ : 0; |
802 | b = (bv < bvl) ? *bv++ : 0; |
803 | x = (mpd)a + (mpd)b + c; |
804 | *dv++ = MPW(x); |
805 | c = x >> MPW_BITS; |
806 | } |
807 | if (dv < dvl) { |
808 | *dv++ = c; |
809 | MPX_ZERO(dv, dvl); |
810 | } |
811 | } |
812 | |
dd517851 |
813 | /* --- @mpx_uaddn@ --- * |
814 | * |
815 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit |
816 | * @mpw n@ = other addend |
817 | * |
818 | * Returns: --- |
819 | * |
820 | * Use: Adds a small integer to a multiprecision number. |
821 | */ |
822 | |
823 | void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); } |
824 | |
f46efa79 |
825 | /* --- @mpx_uaddnlsl@ --- * |
826 | * |
827 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector |
828 | * @mpw a@ = second argument |
829 | * @unsigned o@ = offset in bits |
830 | * |
831 | * Returns: --- |
832 | * |
833 | * Use: Computes %$d + 2^o a$%. If the result overflows then |
834 | * high-order bits are discarded, as usual. We must have |
835 | * @0 < o < MPW_BITS@. |
836 | */ |
837 | |
838 | void mpx_uaddnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) |
839 | { |
840 | mpd x = (mpd)a << o; |
841 | |
842 | while (x && dv < dvl) { |
843 | x += *dv; |
844 | *dv++ = MPW(x); |
845 | x >>= MPW_BITS; |
846 | } |
847 | } |
848 | |
d03ab969 |
849 | /* --- @mpx_usub@ --- * |
850 | * |
851 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
852 | * @const mpw *av, *avl@ = first argument vector base and limit |
853 | * @const mpw *bv, *bvl@ = second argument vector base and limit |
854 | * |
855 | * Returns: --- |
856 | * |
857 | * Use: Performs unsigned integer subtraction. If the result |
858 | * overflows the destination vector, high-order bits are |
859 | * discarded. This means that two's complement subtraction |
860 | * happens more or less for free, althuogh that's more a side- |
861 | * effect than anything else. The result vector may be equal to |
862 | * either or both source vectors, but may not otherwise overlap |
863 | * them. |
864 | */ |
865 | |
866 | void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
867 | const mpw *bv, const mpw *bvl) |
868 | { |
869 | mpw c = 0; |
870 | |
871 | while (av < avl || bv < bvl) { |
872 | mpw a, b; |
873 | mpd x; |
874 | if (dv >= dvl) |
875 | return; |
876 | a = (av < avl) ? *av++ : 0; |
877 | b = (bv < bvl) ? *bv++ : 0; |
c8a2f9ef |
878 | x = (mpd)a - (mpd)b - c; |
d03ab969 |
879 | *dv++ = MPW(x); |
c8a2f9ef |
880 | if (x >> MPW_BITS) |
881 | c = 1; |
882 | else |
883 | c = 0; |
d03ab969 |
884 | } |
c8a2f9ef |
885 | if (c) |
886 | c = MPW_MAX; |
d03ab969 |
887 | while (dv < dvl) |
c8a2f9ef |
888 | *dv++ = c; |
d03ab969 |
889 | } |
890 | |
dd517851 |
891 | /* --- @mpx_usubn@ --- * |
892 | * |
893 | * Arguments: @mpw *dv, *dvl@ = source and destination base and limit |
894 | * @n@ = subtrahend |
895 | * |
896 | * Returns: --- |
897 | * |
898 | * Use: Subtracts a small integer from a multiprecision number. |
899 | */ |
900 | |
901 | void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); } |
902 | |
f46efa79 |
903 | /* --- @mpx_uaddnlsl@ --- * |
904 | * |
905 | * Arguments: @mpw *dv, *dvl@ = destination and first argument vector |
906 | * @mpw a@ = second argument |
907 | * @unsigned o@ = offset in bits |
908 | * |
909 | * Returns: --- |
910 | * |
911 | * Use: Computes %$d + 2^o a$%. If the result overflows then |
912 | * high-order bits are discarded, as usual. We must have |
913 | * @0 < o < MPW_BITS@. |
914 | */ |
915 | |
916 | void mpx_usubnlsl(mpw *dv, mpw *dvl, mpw a, unsigned o) |
917 | { |
918 | mpw b = a >> (MPW_BITS - o); |
919 | a <<= o; |
920 | |
921 | if (dv < dvl) { |
922 | mpd x = (mpd)*dv - (mpd)a; |
923 | *dv++ = MPW(x); |
924 | if (x >> MPW_BITS) |
925 | b++; |
926 | MPX_USUBN(dv, dvl, b); |
927 | } |
928 | } |
929 | |
d03ab969 |
930 | /* --- @mpx_umul@ --- * |
931 | * |
932 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
933 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
934 | * @const mpw *bv, *bvl@ = multiplier vector base and limit |
935 | * |
936 | * Returns: --- |
937 | * |
938 | * Use: Performs unsigned integer multiplication. If the result |
939 | * overflows the desination vector, high-order bits are |
940 | * discarded. The result vector may not overlap the argument |
941 | * vectors in any way. |
942 | */ |
943 | |
944 | void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, |
945 | const mpw *bv, const mpw *bvl) |
946 | { |
947 | /* --- This is probably worthwhile on a multiply --- */ |
948 | |
949 | MPX_SHRINK(av, avl); |
950 | MPX_SHRINK(bv, bvl); |
951 | |
952 | /* --- Deal with a multiply by zero --- */ |
953 | |
954 | if (bv == bvl) { |
c8a2f9ef |
955 | MPX_ZERO(dv, dvl); |
d03ab969 |
956 | return; |
957 | } |
958 | |
959 | /* --- Do the initial multiply and initialize the accumulator --- */ |
960 | |
961 | MPX_UMULN(dv, dvl, av, avl, *bv++); |
962 | |
963 | /* --- Do the remaining multiply/accumulates --- */ |
964 | |
c8a2f9ef |
965 | while (dv < dvl && bv < bvl) { |
d03ab969 |
966 | mpw m = *bv++; |
c8a2f9ef |
967 | mpw c = 0; |
d03ab969 |
968 | const mpw *avv = av; |
969 | mpw *dvv = ++dv; |
970 | |
971 | while (avv < avl) { |
972 | mpd x; |
973 | if (dvv >= dvl) |
974 | goto next; |
c8a2f9ef |
975 | x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c; |
976 | *dvv++ = MPW(x); |
d03ab969 |
977 | c = x >> MPW_BITS; |
978 | } |
c8a2f9ef |
979 | MPX_UADDN(dvv, dvl, c); |
d03ab969 |
980 | next:; |
981 | } |
982 | } |
983 | |
dd517851 |
984 | /* --- @mpx_umuln@ --- * |
985 | * |
986 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
987 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
988 | * @mpw m@ = multiplier |
989 | * |
990 | * Returns: --- |
991 | * |
992 | * Use: Multiplies a multiprecision integer by a single-word value. |
993 | * The destination and source may be equal. The destination |
994 | * is completely cleared after use. |
995 | */ |
996 | |
997 | void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) |
998 | { |
999 | MPX_UMULN(dv, dvl, av, avl, m); |
1000 | } |
1001 | |
1002 | /* --- @mpx_umlan@ --- * |
1003 | * |
1004 | * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit |
1005 | * @const mpw *av, *avl@ = multiplicand vector base and limit |
1006 | * @mpw m@ = multiplier |
1007 | * |
1008 | * Returns: --- |
1009 | * |
1010 | * Use: Multiplies a multiprecision integer by a single-word value |
1011 | * and adds the result to an accumulator. |
1012 | */ |
1013 | |
1014 | void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m) |
1015 | { |
1016 | MPX_UMLAN(dv, dvl, av, avl, m); |
1017 | } |
1018 | |
c8a2f9ef |
1019 | /* --- @mpx_usqr@ --- * |
1020 | * |
1021 | * Arguments: @mpw *dv, *dvl@ = destination vector base and limit |
1022 | * @const mpw *av, *av@ = source vector base and limit |
1023 | * |
1024 | * Returns: --- |
1025 | * |
1026 | * Use: Performs unsigned integer squaring. The result vector must |
1027 | * not overlap the source vector in any way. |
1028 | */ |
1029 | |
1030 | void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl) |
1031 | { |
1032 | MPX_ZERO(dv, dvl); |
1033 | |
1034 | /* --- Main loop --- */ |
1035 | |
1036 | while (av < avl) { |
1037 | const mpw *avv = av; |
1038 | mpw *dvv = dv; |
1039 | mpw a = *av; |
1040 | mpd c; |
1041 | |
1042 | /* --- Stop if I've run out of destination --- */ |
1043 | |
1044 | if (dvv >= dvl) |
1045 | break; |
1046 | |
1047 | /* --- Work out the square at this point in the proceedings --- */ |
1048 | |
1049 | { |
c8a2f9ef |
1050 | mpd x = (mpd)a * (mpd)a + *dvv; |
1051 | *dvv++ = MPW(x); |
1052 | c = MPW(x >> MPW_BITS); |
1053 | } |
1054 | |
1055 | /* --- Now fix up the rest of the vector upwards --- */ |
1056 | |
1057 | avv++; |
1058 | while (dvv < dvl && avv < avl) { |
c8a2f9ef |
1059 | mpd x = (mpd)a * (mpd)*avv++; |
1060 | mpd y = ((x << 1) & MPW_MAX) + c + *dvv; |
1061 | c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS); |
1062 | *dvv++ = MPW(y); |
1063 | } |
1064 | while (dvv < dvl && c) { |
1065 | mpd x = c + *dvv; |
1066 | *dvv++ = MPW(x); |
1067 | c = x >> MPW_BITS; |
1068 | } |
1069 | |
1070 | /* --- Get ready for the next round --- */ |
1071 | |
1072 | av++; |
1073 | dv += 2; |
1074 | } |
1075 | } |
1076 | |
d03ab969 |
1077 | /* --- @mpx_udiv@ --- * |
1078 | * |
1079 | * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit |
1080 | * @mpw *rv, *rvl@ = dividend/remainder vector base and limit |
1081 | * @const mpw *dv, *dvl@ = divisor vector base and limit |
c8a2f9ef |
1082 | * @mpw *sv, *svl@ = scratch workspace |
d03ab969 |
1083 | * |
1084 | * Returns: --- |
1085 | * |
1086 | * Use: Performs unsigned integer division. If the result overflows |
1087 | * the quotient vector, high-order bits are discarded. (Clearly |
1088 | * the remainder vector can't overflow.) The various vectors |
1089 | * may not overlap in any way. Yes, I know it's a bit odd |
1090 | * requiring the dividend to be in the result position but it |
1091 | * does make some sense really. The remainder must have |
c8a2f9ef |
1092 | * headroom for at least two extra words. The scratch space |
f45a00c6 |
1093 | * must be at least one word larger than the divisor. |
d03ab969 |
1094 | */ |
1095 | |
1096 | void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl, |
c8a2f9ef |
1097 | const mpw *dv, const mpw *dvl, |
1098 | mpw *sv, mpw *svl) |
d03ab969 |
1099 | { |
d03ab969 |
1100 | unsigned norm = 0; |
1101 | size_t scale; |
1102 | mpw d, dd; |
1103 | |
1104 | /* --- Initialize the quotient --- */ |
1105 | |
1106 | MPX_ZERO(qv, qvl); |
1107 | |
c8a2f9ef |
1108 | /* --- Perform some sanity checks --- */ |
1109 | |
1110 | MPX_SHRINK(dv, dvl); |
1111 | assert(((void)"division by zero in mpx_udiv", dv < dvl)); |
1112 | |
d03ab969 |
1113 | /* --- Normalize the divisor --- * |
1114 | * |
1115 | * The algorithm requires that the divisor be at least two digits long. |
1116 | * This is easy to fix. |
1117 | */ |
1118 | |
c8a2f9ef |
1119 | { |
1120 | unsigned b; |
d03ab969 |
1121 | |
c8a2f9ef |
1122 | d = dvl[-1]; |
1123 | for (b = MPW_BITS / 2; b; b >>= 1) { |
34e4f738 |
1124 | if (d <= (MPW_MAX >> b)) { |
c8a2f9ef |
1125 | d <<= b; |
1126 | norm += b; |
1127 | } |
1128 | } |
1129 | if (dv + 1 == dvl) |
1130 | norm += MPW_BITS; |
d03ab969 |
1131 | } |
d03ab969 |
1132 | |
1133 | /* --- Normalize the dividend/remainder to match --- */ |
1134 | |
c8a2f9ef |
1135 | if (norm) { |
c8a2f9ef |
1136 | mpx_lsl(rv, rvl, rv, rvl, norm); |
f45a00c6 |
1137 | mpx_lsl(sv, svl, dv, dvl, norm); |
c8a2f9ef |
1138 | dv = sv; |
f45a00c6 |
1139 | dvl = svl; |
c8a2f9ef |
1140 | MPX_SHRINK(dv, dvl); |
1141 | } |
1142 | |
d03ab969 |
1143 | MPX_SHRINK(rv, rvl); |
c8a2f9ef |
1144 | d = dvl[-1]; |
1145 | dd = dvl[-2]; |
d03ab969 |
1146 | |
1147 | /* --- Work out the relative scales --- */ |
1148 | |
1149 | { |
1150 | size_t rvn = rvl - rv; |
c8a2f9ef |
1151 | size_t dvn = dvl - dv; |
d03ab969 |
1152 | |
1153 | /* --- If the divisor is clearly larger, notice this --- */ |
1154 | |
1155 | if (dvn > rvn) { |
1156 | mpx_lsr(rv, rvl, rv, rvl, norm); |
1157 | return; |
1158 | } |
1159 | |
1160 | scale = rvn - dvn; |
1161 | } |
1162 | |
1163 | /* --- Calculate the most significant quotient digit --- * |
1164 | * |
1165 | * Because the divisor has its top bit set, this can only happen once. The |
1166 | * pointer arithmetic is a little contorted, to make sure that the |
1167 | * behaviour is defined. |
1168 | */ |
1169 | |
1170 | if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) { |
1171 | mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl); |
1172 | if (qvl - qv > scale) |
1173 | qv[scale] = 1; |
1174 | } |
1175 | |
1176 | /* --- Now for the main loop --- */ |
1177 | |
1178 | { |
c8a2f9ef |
1179 | mpw *rvv = rvl - 2; |
d03ab969 |
1180 | |
1181 | while (scale) { |
c8a2f9ef |
1182 | mpw q; |
1183 | mpd rh; |
d03ab969 |
1184 | |
1185 | /* --- Get an estimate for the next quotient digit --- */ |
1186 | |
c8a2f9ef |
1187 | mpw r = rvv[1]; |
1188 | mpw rr = rvv[0]; |
1189 | mpw rrr = *--rvv; |
1190 | |
1191 | scale--; |
1192 | rh = ((mpd)r << MPW_BITS) | rr; |
d03ab969 |
1193 | if (r == d) |
1194 | q = MPW_MAX; |
c8a2f9ef |
1195 | else |
1196 | q = MPW(rh / d); |
d03ab969 |
1197 | |
1198 | /* --- Refine the estimate --- */ |
1199 | |
1200 | { |
1201 | mpd yh = (mpd)d * q; |
ce76ff16 |
1202 | mpd yy = (mpd)dd * q; |
1203 | mpw yl; |
c8a2f9ef |
1204 | |
ce76ff16 |
1205 | if (yy > MPW_MAX) |
1206 | yh += yy >> MPW_BITS; |
1207 | yl = MPW(yy); |
c8a2f9ef |
1208 | |
1209 | while (yh > rh || (yh == rh && yl > rrr)) { |
1210 | q--; |
1211 | yh -= d; |
ce76ff16 |
1212 | if (yl < dd) |
1213 | yh--; |
99b30c23 |
1214 | yl = MPW(yl - dd); |
c8a2f9ef |
1215 | } |
1216 | } |
1217 | |
1218 | /* --- Remove a chunk from the dividend --- */ |
1219 | |
1220 | { |
1221 | mpw *svv; |
1222 | const mpw *dvv; |
f45a00c6 |
1223 | mpw mc = 0, sc = 0; |
c8a2f9ef |
1224 | |
f45a00c6 |
1225 | /* --- Calculate the size of the chunk --- * |
1226 | * |
1227 | * This does the whole job of calculating @r >> scale - qd@. |
1228 | */ |
c8a2f9ef |
1229 | |
f45a00c6 |
1230 | for (svv = rv + scale, dvv = dv; |
1231 | dvv < dvl && svv < rvl; |
1232 | svv++, dvv++) { |
1233 | mpd x = (mpd)*dvv * (mpd)q + mc; |
1234 | mc = x >> MPW_BITS; |
1235 | x = (mpd)*svv - MPW(x) - sc; |
c8a2f9ef |
1236 | *svv = MPW(x); |
f45a00c6 |
1237 | if (x >> MPW_BITS) |
1238 | sc = 1; |
1239 | else |
1240 | sc = 0; |
1241 | } |
1242 | |
1243 | if (svv < rvl) { |
1244 | mpd x = (mpd)*svv - mc - sc; |
1245 | *svv++ = MPW(x); |
1246 | if (x >> MPW_BITS) |
1247 | sc = MPW_MAX; |
1248 | else |
1249 | sc = 0; |
1250 | while (svv < rvl) |
1251 | *svv++ = sc; |
c8a2f9ef |
1252 | } |
c8a2f9ef |
1253 | |
f45a00c6 |
1254 | /* --- Fix if the quotient was too large --- * |
c8a2f9ef |
1255 | * |
f45a00c6 |
1256 | * This doesn't seem to happen very often. |
c8a2f9ef |
1257 | */ |
1258 | |
c8a2f9ef |
1259 | if (rvl[-1] > MPW_MAX / 2) { |
1260 | mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl); |
1261 | q--; |
1262 | } |
1263 | } |
1264 | |
1265 | /* --- Done for another iteration --- */ |
1266 | |
1267 | if (qvl - qv > scale) |
1268 | qv[scale] = q; |
1269 | r = rr; |
1270 | rr = rrr; |
1271 | } |
1272 | } |
1273 | |
1274 | /* --- Now fiddle with unnormalizing and things --- */ |
1275 | |
1276 | mpx_lsr(rv, rvl, rv, rvl, norm); |
d03ab969 |
1277 | } |
1278 | |
698bd937 |
1279 | /* --- @mpx_udivn@ --- * |
1280 | * |
1281 | * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap |
1282 | * dividend) |
1283 | * @const mpw *rv, *rvl@ = dividend |
1284 | * @mpw d@ = single-precision divisor |
1285 | * |
1286 | * Returns: Remainder after divison. |
1287 | * |
1288 | * Use: Performs a single-precision division operation. |
1289 | */ |
1290 | |
1291 | mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d) |
1292 | { |
1293 | size_t i; |
1294 | size_t ql = qvl - qv; |
1295 | mpd r = 0; |
1296 | |
1297 | i = rvl - rv; |
1298 | while (i > 0) { |
1299 | i--; |
1300 | r = (r << MPW_BITS) | rv[i]; |
1301 | if (i < ql) |
1302 | qv[i] = r / d; |
1303 | r %= d; |
1304 | } |
1305 | return (MPW(r)); |
1306 | } |
1307 | |
42684bdb |
1308 | /*----- Test rig ----------------------------------------------------------*/ |
1309 | |
1310 | #ifdef TEST_RIG |
1311 | |
1312 | #include <mLib/alloc.h> |
1313 | #include <mLib/dstr.h> |
1314 | #include <mLib/quis.h> |
1315 | #include <mLib/testrig.h> |
1316 | |
1317 | #include "mpscan.h" |
1318 | |
1319 | #define ALLOC(v, vl, sz) do { \ |
1320 | size_t _sz = (sz); \ |
1321 | mpw *_vv = xmalloc(MPWS(_sz)); \ |
1322 | mpw *_vvl = _vv + _sz; \ |
1323 | (v) = _vv; \ |
1324 | (vl) = _vvl; \ |
1325 | } while (0) |
1326 | |
1327 | #define LOAD(v, vl, d) do { \ |
1328 | const dstr *_d = (d); \ |
1329 | mpw *_v, *_vl; \ |
1330 | ALLOC(_v, _vl, MPW_RQ(_d->len)); \ |
1331 | mpx_loadb(_v, _vl, _d->buf, _d->len); \ |
1332 | (v) = _v; \ |
1333 | (vl) = _vl; \ |
1334 | } while (0) |
1335 | |
1336 | #define MAX(x, y) ((x) > (y) ? (x) : (y)) |
1337 | |
1338 | static void dumpbits(const char *msg, const void *pp, size_t sz) |
1339 | { |
1340 | const octet *p = pp; |
1341 | fputs(msg, stderr); |
1342 | for (; sz; sz--) |
1343 | fprintf(stderr, " %02x", *p++); |
1344 | fputc('\n', stderr); |
1345 | } |
1346 | |
1347 | static void dumpmp(const char *msg, const mpw *v, const mpw *vl) |
1348 | { |
1349 | fputs(msg, stderr); |
1350 | MPX_SHRINK(v, vl); |
1351 | while (v < vl) |
1352 | fprintf(stderr, " %08lx", (unsigned long)*--vl); |
1353 | fputc('\n', stderr); |
1354 | } |
1355 | |
1356 | static int chkscan(const mpw *v, const mpw *vl, |
1357 | const void *pp, size_t sz, int step) |
1358 | { |
1359 | mpscan mps; |
1360 | const octet *p = pp; |
1361 | unsigned bit = 0; |
1362 | int ok = 1; |
1363 | |
1364 | mpscan_initx(&mps, v, vl); |
1365 | while (sz) { |
1366 | unsigned x = *p; |
1367 | int i; |
1368 | p += step; |
1369 | for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) { |
1370 | if (MPSCAN_BIT(&mps) != (x & 1)) { |
1371 | fprintf(stderr, |
1372 | "\n*** error, step %i, bit %u, expected %u, found %u\n", |
1373 | step, bit, x & 1, MPSCAN_BIT(&mps)); |
1374 | ok = 0; |
1375 | } |
1376 | x >>= 1; |
1377 | bit++; |
1378 | } |
1379 | sz--; |
1380 | } |
1381 | |
1382 | return (ok); |
1383 | } |
1384 | |
1385 | static int loadstore(dstr *v) |
1386 | { |
1387 | dstr d = DSTR_INIT; |
1388 | size_t sz = MPW_RQ(v->len) * 2, diff; |
1389 | mpw *m, *ml; |
1390 | int ok = 1; |
1391 | |
1392 | dstr_ensure(&d, v->len); |
1393 | m = xmalloc(MPWS(sz)); |
1394 | |
1395 | for (diff = 0; diff < sz; diff += 5) { |
1396 | size_t oct; |
1397 | |
1398 | ml = m + sz - diff; |
1399 | |
1400 | mpx_loadl(m, ml, v->buf, v->len); |
1401 | if (!chkscan(m, ml, v->buf, v->len, +1)) |
1402 | ok = 0; |
1403 | MPX_OCTETS(oct, m, ml); |
1404 | mpx_storel(m, ml, d.buf, d.sz); |
1405 | if (memcmp(d.buf, v->buf, oct) != 0) { |
1406 | dumpbits("\n*** storel failed", d.buf, d.sz); |
1407 | ok = 0; |
1408 | } |
1409 | |
1410 | mpx_loadb(m, ml, v->buf, v->len); |
1411 | if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1)) |
1412 | ok = 0; |
1413 | MPX_OCTETS(oct, m, ml); |
1414 | mpx_storeb(m, ml, d.buf, d.sz); |
1415 | if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) { |
1416 | dumpbits("\n*** storeb failed", d.buf, d.sz); |
1417 | ok = 0; |
1418 | } |
1419 | } |
1420 | |
1421 | if (!ok) |
1422 | dumpbits("input data", v->buf, v->len); |
1423 | |
12ed8a1f |
1424 | xfree(m); |
42684bdb |
1425 | dstr_destroy(&d); |
1426 | return (ok); |
1427 | } |
1428 | |
f09e814a |
1429 | static int twocl(dstr *v) |
1430 | { |
1431 | dstr d = DSTR_INIT; |
1432 | mpw *m, *ml; |
1433 | size_t sz; |
1434 | int ok = 1; |
1435 | |
1436 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; |
1437 | dstr_ensure(&d, sz); |
1438 | |
1439 | sz = MPW_RQ(sz); |
1440 | m = xmalloc(MPWS(sz)); |
1441 | ml = m + sz; |
1442 | |
1443 | mpx_loadl(m, ml, v[0].buf, v[0].len); |
1444 | mpx_storel2cn(m, ml, d.buf, v[1].len); |
1445 | if (memcmp(d.buf, v[1].buf, v[1].len)) { |
1446 | dumpbits("\n*** storel2cn failed", d.buf, v[1].len); |
1447 | ok = 0; |
1448 | } |
1449 | |
1450 | mpx_loadl2cn(m, ml, v[1].buf, v[1].len); |
1451 | mpx_storel(m, ml, d.buf, v[0].len); |
1452 | if (memcmp(d.buf, v[0].buf, v[0].len)) { |
1453 | dumpbits("\n*** loadl2cn failed", d.buf, v[0].len); |
1454 | ok = 0; |
1455 | } |
1456 | |
1457 | if (!ok) { |
1458 | dumpbits("pos", v[0].buf, v[0].len); |
1459 | dumpbits("neg", v[1].buf, v[1].len); |
1460 | } |
1461 | |
12ed8a1f |
1462 | xfree(m); |
f09e814a |
1463 | dstr_destroy(&d); |
1464 | |
1465 | return (ok); |
1466 | } |
1467 | |
1468 | static int twocb(dstr *v) |
1469 | { |
1470 | dstr d = DSTR_INIT; |
1471 | mpw *m, *ml; |
1472 | size_t sz; |
1473 | int ok = 1; |
1474 | |
1475 | sz = v[0].len; if (v[1].len > sz) sz = v[1].len; |
1476 | dstr_ensure(&d, sz); |
1477 | |
1478 | sz = MPW_RQ(sz); |
1479 | m = xmalloc(MPWS(sz)); |
1480 | ml = m + sz; |
1481 | |
1482 | mpx_loadb(m, ml, v[0].buf, v[0].len); |
1483 | mpx_storeb2cn(m, ml, d.buf, v[1].len); |
1484 | if (memcmp(d.buf, v[1].buf, v[1].len)) { |
1485 | dumpbits("\n*** storeb2cn failed", d.buf, v[1].len); |
1486 | ok = 0; |
1487 | } |
1488 | |
1489 | mpx_loadb2cn(m, ml, v[1].buf, v[1].len); |
1490 | mpx_storeb(m, ml, d.buf, v[0].len); |
1491 | if (memcmp(d.buf, v[0].buf, v[0].len)) { |
1492 | dumpbits("\n*** loadb2cn failed", d.buf, v[0].len); |
1493 | ok = 0; |
1494 | } |
1495 | |
1496 | if (!ok) { |
1497 | dumpbits("pos", v[0].buf, v[0].len); |
1498 | dumpbits("neg", v[1].buf, v[1].len); |
1499 | } |
1500 | |
12ed8a1f |
1501 | xfree(m); |
f09e814a |
1502 | dstr_destroy(&d); |
1503 | |
1504 | return (ok); |
1505 | } |
1506 | |
42684bdb |
1507 | static int lsl(dstr *v) |
1508 | { |
1509 | mpw *a, *al; |
1510 | int n = *(int *)v[1].buf; |
1511 | mpw *c, *cl; |
1512 | mpw *d, *dl; |
1513 | int ok = 1; |
1514 | |
1515 | LOAD(a, al, &v[0]); |
1516 | LOAD(c, cl, &v[2]); |
1517 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); |
1518 | |
1519 | mpx_lsl(d, dl, a, al, n); |
1a05a8ef |
1520 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1521 | fprintf(stderr, "\n*** lsl(%i) failed\n", n); |
1522 | dumpmp(" a", a, al); |
1523 | dumpmp("expected", c, cl); |
1524 | dumpmp(" result", d, dl); |
1525 | ok = 0; |
1526 | } |
1527 | |
12ed8a1f |
1528 | xfree(a); xfree(c); xfree(d); |
42684bdb |
1529 | return (ok); |
1530 | } |
1531 | |
81578196 |
1532 | static int lslc(dstr *v) |
1533 | { |
1534 | mpw *a, *al; |
1535 | int n = *(int *)v[1].buf; |
1536 | mpw *c, *cl; |
1537 | mpw *d, *dl; |
1538 | int ok = 1; |
1539 | |
1540 | LOAD(a, al, &v[0]); |
1541 | LOAD(c, cl, &v[2]); |
1542 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS); |
1543 | |
1544 | mpx_lslc(d, dl, a, al, n); |
1545 | if (!mpx_ueq(d, dl, c, cl)) { |
1546 | fprintf(stderr, "\n*** lslc(%i) failed\n", n); |
1547 | dumpmp(" a", a, al); |
1548 | dumpmp("expected", c, cl); |
1549 | dumpmp(" result", d, dl); |
1550 | ok = 0; |
1551 | } |
1552 | |
12ed8a1f |
1553 | xfree(a); xfree(c); xfree(d); |
81578196 |
1554 | return (ok); |
1555 | } |
1556 | |
42684bdb |
1557 | static int lsr(dstr *v) |
1558 | { |
1559 | mpw *a, *al; |
1560 | int n = *(int *)v[1].buf; |
1561 | mpw *c, *cl; |
1562 | mpw *d, *dl; |
1563 | int ok = 1; |
1564 | |
1565 | LOAD(a, al, &v[0]); |
1566 | LOAD(c, cl, &v[2]); |
1567 | ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1); |
1568 | |
1569 | mpx_lsr(d, dl, a, al, n); |
1a05a8ef |
1570 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1571 | fprintf(stderr, "\n*** lsr(%i) failed\n", n); |
1572 | dumpmp(" a", a, al); |
1573 | dumpmp("expected", c, cl); |
1574 | dumpmp(" result", d, dl); |
1575 | ok = 0; |
1576 | } |
1577 | |
12ed8a1f |
1578 | xfree(a); xfree(c); xfree(d); |
42684bdb |
1579 | return (ok); |
1580 | } |
1581 | |
1582 | static int uadd(dstr *v) |
1583 | { |
1584 | mpw *a, *al; |
1585 | mpw *b, *bl; |
1586 | mpw *c, *cl; |
1587 | mpw *d, *dl; |
1588 | int ok = 1; |
1589 | |
1590 | LOAD(a, al, &v[0]); |
1591 | LOAD(b, bl, &v[1]); |
1592 | LOAD(c, cl, &v[2]); |
1593 | ALLOC(d, dl, MAX(al - a, bl - b) + 1); |
1594 | |
1595 | mpx_uadd(d, dl, a, al, b, bl); |
1a05a8ef |
1596 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1597 | fprintf(stderr, "\n*** uadd failed\n"); |
1598 | dumpmp(" a", a, al); |
1599 | dumpmp(" b", b, bl); |
1600 | dumpmp("expected", c, cl); |
1601 | dumpmp(" result", d, dl); |
1602 | ok = 0; |
1603 | } |
1604 | |
12ed8a1f |
1605 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb |
1606 | return (ok); |
1607 | } |
1608 | |
1609 | static int usub(dstr *v) |
1610 | { |
1611 | mpw *a, *al; |
1612 | mpw *b, *bl; |
1613 | mpw *c, *cl; |
1614 | mpw *d, *dl; |
1615 | int ok = 1; |
1616 | |
1617 | LOAD(a, al, &v[0]); |
1618 | LOAD(b, bl, &v[1]); |
1619 | LOAD(c, cl, &v[2]); |
1620 | ALLOC(d, dl, al - a); |
1621 | |
1622 | mpx_usub(d, dl, a, al, b, bl); |
1a05a8ef |
1623 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1624 | fprintf(stderr, "\n*** usub failed\n"); |
1625 | dumpmp(" a", a, al); |
1626 | dumpmp(" b", b, bl); |
1627 | dumpmp("expected", c, cl); |
1628 | dumpmp(" result", d, dl); |
1629 | ok = 0; |
1630 | } |
1631 | |
12ed8a1f |
1632 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb |
1633 | return (ok); |
1634 | } |
1635 | |
1636 | static int umul(dstr *v) |
1637 | { |
1638 | mpw *a, *al; |
1639 | mpw *b, *bl; |
1640 | mpw *c, *cl; |
1641 | mpw *d, *dl; |
1642 | int ok = 1; |
1643 | |
1644 | LOAD(a, al, &v[0]); |
1645 | LOAD(b, bl, &v[1]); |
1646 | LOAD(c, cl, &v[2]); |
1647 | ALLOC(d, dl, (al - a) + (bl - b)); |
1648 | |
1649 | mpx_umul(d, dl, a, al, b, bl); |
1a05a8ef |
1650 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1651 | fprintf(stderr, "\n*** umul failed\n"); |
1652 | dumpmp(" a", a, al); |
1653 | dumpmp(" b", b, bl); |
1654 | dumpmp("expected", c, cl); |
1655 | dumpmp(" result", d, dl); |
1656 | ok = 0; |
1657 | } |
1658 | |
12ed8a1f |
1659 | xfree(a); xfree(b); xfree(c); xfree(d); |
42684bdb |
1660 | return (ok); |
1661 | } |
1662 | |
1663 | static int usqr(dstr *v) |
1664 | { |
1665 | mpw *a, *al; |
1666 | mpw *c, *cl; |
1667 | mpw *d, *dl; |
1668 | int ok = 1; |
1669 | |
1670 | LOAD(a, al, &v[0]); |
1671 | LOAD(c, cl, &v[1]); |
1672 | ALLOC(d, dl, 2 * (al - a)); |
1673 | |
1674 | mpx_usqr(d, dl, a, al); |
1a05a8ef |
1675 | if (!mpx_ueq(d, dl, c, cl)) { |
42684bdb |
1676 | fprintf(stderr, "\n*** usqr failed\n"); |
1677 | dumpmp(" a", a, al); |
1678 | dumpmp("expected", c, cl); |
1679 | dumpmp(" result", d, dl); |
1680 | ok = 0; |
1681 | } |
1682 | |
12ed8a1f |
1683 | xfree(a); xfree(c); xfree(d); |
42684bdb |
1684 | return (ok); |
1685 | } |
1686 | |
1687 | static int udiv(dstr *v) |
1688 | { |
1689 | mpw *a, *al; |
1690 | mpw *b, *bl; |
1691 | mpw *q, *ql; |
1692 | mpw *r, *rl; |
1693 | mpw *qq, *qql; |
1694 | mpw *s, *sl; |
1695 | int ok = 1; |
1696 | |
1697 | ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len); |
1698 | LOAD(b, bl, &v[1]); |
1699 | LOAD(q, ql, &v[2]); |
1700 | LOAD(r, rl, &v[3]); |
1701 | ALLOC(qq, qql, al - a); |
1702 | ALLOC(s, sl, (bl - b) + 1); |
1703 | |
1704 | mpx_udiv(qq, qql, a, al, b, bl, s, sl); |
1a05a8ef |
1705 | if (!mpx_ueq(qq, qql, q, ql) || |
1706 | !mpx_ueq(a, al, r, rl)) { |
42684bdb |
1707 | fprintf(stderr, "\n*** udiv failed\n"); |
1708 | dumpmp(" divisor", b, bl); |
1709 | dumpmp("expect r", r, rl); |
1710 | dumpmp("result r", a, al); |
1711 | dumpmp("expect q", q, ql); |
1712 | dumpmp("result q", qq, qql); |
1713 | ok = 0; |
1714 | } |
1715 | |
12ed8a1f |
1716 | xfree(a); xfree(b); xfree(r); xfree(q); xfree(s); xfree(qq); |
42684bdb |
1717 | return (ok); |
1718 | } |
1719 | |
1720 | static test_chunk defs[] = { |
1721 | { "load-store", loadstore, { &type_hex, 0 } }, |
f09e814a |
1722 | { "2cl", twocl, { &type_hex, &type_hex, } }, |
1723 | { "2cb", twocb, { &type_hex, &type_hex, } }, |
42684bdb |
1724 | { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } }, |
81578196 |
1725 | { "lslc", lslc, { &type_hex, &type_int, &type_hex, 0 } }, |
42684bdb |
1726 | { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } }, |
1727 | { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } }, |
1728 | { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } }, |
1729 | { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } }, |
1730 | { "usqr", usqr, { &type_hex, &type_hex, 0 } }, |
1731 | { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } }, |
1732 | { 0, 0, { 0 } } |
1733 | }; |
1734 | |
1735 | int main(int argc, char *argv[]) |
1736 | { |
1737 | test_run(argc, argv, defs, SRCDIR"/tests/mpx"); |
1738 | return (0); |
1739 | } |
1740 | |
42684bdb |
1741 | #endif |
1742 | |
d03ab969 |
1743 | /*----- That's all, folks -------------------------------------------------*/ |