Integrate testing for MPX routines.
[u/mdw/catacomb] / mpx.c
1 /* -*-c-*-
2 *
3 * $Id: mpx.c,v 1.6 1999/11/20 22:43:44 mdw Exp $
4 *
5 * Low-level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.c,v $
33 * Revision 1.6 1999/11/20 22:43:44 mdw
34 * Integrate testing for MPX routines.
35 *
36 * Revision 1.5 1999/11/20 22:23:27 mdw
37 * Add function versions of some low-level macros with wider use.
38 *
39 * Revision 1.4 1999/11/17 18:04:09 mdw
40 * Add two's-complement functionality. Improve mpx_udiv a little by
41 * performing the multiplication of the divisor by q with the subtraction
42 * from r.
43 *
44 * Revision 1.3 1999/11/13 01:57:31 mdw
45 * Remove stray debugging code.
46 *
47 * Revision 1.2 1999/11/13 01:50:59 mdw
48 * Multiprecision routines finished and tested.
49 *
50 * Revision 1.1 1999/09/03 08:41:12 mdw
51 * Initial import.
52 *
53 */
54
55 /*----- Header files ------------------------------------------------------*/
56
57 #include <assert.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61
62 #include <mLib/bits.h>
63
64 #include "mptypes.h"
65 #include "mpx.h"
66
67 /*----- Loading and storing -----------------------------------------------*/
68
69 /* --- @mpx_storel@ --- *
70 *
71 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
72 * @void *pp@ = pointer to octet array
73 * @size_t sz@ = size of octet array
74 *
75 * Returns: ---
76 *
77 * Use: Stores an MP in an octet array, least significant octet
78 * first. High-end octets are silently discarded if there
79 * isn't enough space for them.
80 */
81
82 void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
83 {
84 mpw n, w = 0;
85 octet *p = pp, *q = p + sz;
86 unsigned bits = 0;
87
88 while (p < q) {
89 if (bits < 8) {
90 if (v >= vl) {
91 *p++ = U8(w);
92 break;
93 }
94 n = *v++;
95 *p++ = U8(w | n << bits);
96 w = n >> (8 - bits);
97 bits += MPW_BITS - 8;
98 } else {
99 *p++ = U8(w);
100 w >>= 8;
101 bits -= 8;
102 }
103 }
104 memset(p, 0, q - p);
105 }
106
107 /* --- @mpx_loadl@ --- *
108 *
109 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
110 * @const void *pp@ = pointer to octet array
111 * @size_t sz@ = size of octet array
112 *
113 * Returns: ---
114 *
115 * Use: Loads an MP in an octet array, least significant octet
116 * first. High-end octets are ignored if there isn't enough
117 * space for them.
118 */
119
120 void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
121 {
122 unsigned n;
123 mpw w = 0;
124 const octet *p = pp, *q = p + sz;
125 unsigned bits = 0;
126
127 if (v >= vl)
128 return;
129 while (p < q) {
130 n = U8(*p++);
131 w |= n << bits;
132 bits += 8;
133 if (bits >= MPW_BITS) {
134 *v++ = MPW(w);
135 w = n >> (MPW_BITS - bits + 8);
136 bits -= MPW_BITS;
137 if (v >= vl)
138 return;
139 }
140 }
141 *v++ = w;
142 MPX_ZERO(v, vl);
143 }
144
145 /* --- @mpx_storeb@ --- *
146 *
147 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
148 * @void *pp@ = pointer to octet array
149 * @size_t sz@ = size of octet array
150 *
151 * Returns: ---
152 *
153 * Use: Stores an MP in an octet array, most significant octet
154 * first. High-end octets are silently discarded if there
155 * isn't enough space for them.
156 */
157
158 void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
159 {
160 mpw n, w = 0;
161 octet *p = pp, *q = p + sz;
162 unsigned bits = 0;
163
164 while (q > p) {
165 if (bits < 8) {
166 if (v >= vl) {
167 *--q = U8(w);
168 break;
169 }
170 n = *v++;
171 *--q = U8(w | n << bits);
172 w = n >> (8 - bits);
173 bits += MPW_BITS - 8;
174 } else {
175 *--q = U8(w);
176 w >>= 8;
177 bits -= 8;
178 }
179 }
180 memset(p, 0, q - p);
181 }
182
183 /* --- @mpx_loadb@ --- *
184 *
185 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
186 * @const void *pp@ = pointer to octet array
187 * @size_t sz@ = size of octet array
188 *
189 * Returns: ---
190 *
191 * Use: Loads an MP in an octet array, most significant octet
192 * first. High-end octets are ignored if there isn't enough
193 * space for them.
194 */
195
196 void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
197 {
198 unsigned n;
199 mpw w = 0;
200 const octet *p = pp, *q = p + sz;
201 unsigned bits = 0;
202
203 if (v >= vl)
204 return;
205 while (q > p) {
206 n = U8(*--q);
207 w |= n << bits;
208 bits += 8;
209 if (bits >= MPW_BITS) {
210 *v++ = MPW(w);
211 w = n >> (MPW_BITS - bits + 8);
212 bits -= MPW_BITS;
213 if (v >= vl)
214 return;
215 }
216 }
217 *v++ = w;
218 MPX_ZERO(v, vl);
219 }
220
221 /*----- Logical shifting --------------------------------------------------*/
222
223 /* --- @mpx_lsl@ --- *
224 *
225 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
226 * @const mpw *av, *avl@ = source vector base and limit
227 * @size_t n@ = number of bit positions to shift by
228 *
229 * Returns: ---
230 *
231 * Use: Performs a logical shift left operation on an integer.
232 */
233
234 void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
235 {
236 size_t nw;
237 unsigned nb;
238
239 /* --- Trivial special case --- */
240
241 if (n == 0)
242 MPX_COPY(dv, dvl, av, avl);
243
244 /* --- Single bit shifting --- */
245
246 else if (n == 1) {
247 mpw w = 0;
248 while (av < avl) {
249 mpw t;
250 if (dv >= dvl)
251 goto done;
252 t = *av++;
253 *dv++ = MPW((t << 1) | w);
254 w = t >> (MPW_BITS - 1);
255 }
256 if (dv >= dvl)
257 goto done;
258 *dv++ = MPW(w);
259 MPX_ZERO(dv, dvl);
260 goto done;
261 }
262
263 /* --- Break out word and bit shifts for more sophisticated work --- */
264
265 nw = n / MPW_BITS;
266 nb = n % MPW_BITS;
267
268 /* --- Handle a shift by a multiple of the word size --- */
269
270 if (nb == 0) {
271 MPX_COPY(dv + nw, dvl, av, avl);
272 memset(dv, 0, MPWS(nw));
273 }
274
275 /* --- And finally the difficult case --- *
276 *
277 * This is a little convoluted, because I have to start from the end and
278 * work backwards to avoid overwriting the source, if they're both the same
279 * block of memory.
280 */
281
282 else {
283 mpw w;
284 size_t nr = MPW_BITS - nb;
285 size_t dvn = dvl - dv;
286 size_t avn = avl - av;
287
288 if (dvn <= nw) {
289 MPX_ZERO(dv, dvl);
290 goto done;
291 }
292
293 if (dvn > avn + nw) {
294 size_t off = avn + nw + 1;
295 MPX_ZERO(dv + off, dvl);
296 dvl = dv + off;
297 w = 0;
298 } else {
299 avl = av + dvn - nw;
300 w = *--avl << nb;
301 }
302
303 while (avl > av) {
304 mpw t = *--avl;
305 *--dvl = (t >> nr) | w;
306 w = t << nb;
307 }
308
309 *--dvl = w;
310 MPX_ZERO(dv, dvl);
311 }
312
313 done:;
314 }
315
316 /* --- @mpx_lsr@ --- *
317 *
318 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
319 * @const mpw *av, *avl@ = source vector base and limit
320 * @size_t n@ = number of bit positions to shift by
321 *
322 * Returns: ---
323 *
324 * Use: Performs a logical shift right operation on an integer.
325 */
326
327 void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
328 {
329 size_t nw;
330 unsigned nb;
331
332 /* --- Trivial special case --- */
333
334 if (n == 0)
335 MPX_COPY(dv, dvl, av, avl);
336
337 /* --- Single bit shifting --- */
338
339 else if (n == 1) {
340 mpw w = *av++ >> 1;
341 while (av < avl) {
342 mpw t;
343 if (dv >= dvl)
344 goto done;
345 t = *av++;
346 *dv++ = MPW((t << (MPW_BITS - 1)) | w);
347 w = t >> 1;
348 }
349 if (dv >= dvl)
350 goto done;
351 *dv++ = MPW(w);
352 MPX_ZERO(dv, dvl);
353 goto done;
354 }
355
356 /* --- Break out word and bit shifts for more sophisticated work --- */
357
358 nw = n / MPW_BITS;
359 nb = n % MPW_BITS;
360
361 /* --- Handle a shift by a multiple of the word size --- */
362
363 if (nb == 0)
364 MPX_COPY(dv, dvl, av + nw, avl);
365
366 /* --- And finally the difficult case --- */
367
368 else {
369 mpw w;
370 size_t nr = MPW_BITS - nb;
371
372 av += nw;
373 w = *av++;
374 while (av < avl) {
375 mpw t;
376 if (dv >= dvl)
377 goto done;
378 t = *av++;
379 *dv++ = MPW((w >> nb) | (t << nr));
380 w = t;
381 }
382 if (dv < dvl) {
383 *dv++ = MPW(w >> nb);
384 MPX_ZERO(dv, dvl);
385 }
386 }
387
388 done:;
389 }
390
391 /*----- Unsigned arithmetic -----------------------------------------------*/
392
393 /* --- @mpx_2c@ --- *
394 *
395 * Arguments: @mpw *dv, *dvl@ = destination vector
396 * @const mpw *v, *vl@ = source vector
397 *
398 * Returns: ---
399 *
400 * Use: Calculates the two's complement of @v@.
401 */
402
403 void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
404 {
405 mpw c = 0;
406 while (dv < dvl && v < vl)
407 *dv++ = c = MPW(~*v++);
408 if (dv < dvl) {
409 if (c > MPW_MAX / 2)
410 c = MPW(~0);
411 while (dv < dvl)
412 *dv++ = c;
413 }
414 MPX_UADDN(dv, dvl, 1);
415 }
416
417 /* --- @mpx_ucmp@ --- *
418 *
419 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
420 * @const mpw *bv, *bvl@ = second argument vector base and limit
421 *
422 * Returns: Less than, equal to, or greater than zero depending on
423 * whether @a@ is less than, equal to or greater than @b@,
424 * respectively.
425 *
426 * Use: Performs an unsigned integer comparison.
427 */
428
429 int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
430 {
431 MPX_SHRINK(av, avl);
432 MPX_SHRINK(bv, bvl);
433
434 if (avl - av > bvl - bv)
435 return (+1);
436 else if (avl - av < bvl - bv)
437 return (-1);
438 else while (avl > av) {
439 mpw a = *--avl, b = *--bvl;
440 if (a > b)
441 return (+1);
442 else if (a < b)
443 return (-1);
444 }
445 return (0);
446 }
447
448 /* --- @mpx_uadd@ --- *
449 *
450 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
451 * @const mpw *av, *avl@ = first addend vector base and limit
452 * @const mpw *bv, *bvl@ = second addend vector base and limit
453 *
454 * Returns: ---
455 *
456 * Use: Performs unsigned integer addition. If the result overflows
457 * the destination vector, high-order bits are discarded. This
458 * means that two's complement addition happens more or less for
459 * free, although that's more a side-effect than anything else.
460 * The result vector may be equal to either or both source
461 * vectors, but may not otherwise overlap them.
462 */
463
464 void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
465 const mpw *bv, const mpw *bvl)
466 {
467 mpw c = 0;
468
469 while (av < avl || bv < bvl) {
470 mpw a, b;
471 mpd x;
472 if (dv >= dvl)
473 return;
474 a = (av < avl) ? *av++ : 0;
475 b = (bv < bvl) ? *bv++ : 0;
476 x = (mpd)a + (mpd)b + c;
477 *dv++ = MPW(x);
478 c = x >> MPW_BITS;
479 }
480 if (dv < dvl) {
481 *dv++ = c;
482 MPX_ZERO(dv, dvl);
483 }
484 }
485
486 /* --- @mpx_uaddn@ --- *
487 *
488 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
489 * @mpw n@ = other addend
490 *
491 * Returns: ---
492 *
493 * Use: Adds a small integer to a multiprecision number.
494 */
495
496 void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
497
498 /* --- @mpx_usub@ --- *
499 *
500 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
501 * @const mpw *av, *avl@ = first argument vector base and limit
502 * @const mpw *bv, *bvl@ = second argument vector base and limit
503 *
504 * Returns: ---
505 *
506 * Use: Performs unsigned integer subtraction. If the result
507 * overflows the destination vector, high-order bits are
508 * discarded. This means that two's complement subtraction
509 * happens more or less for free, althuogh that's more a side-
510 * effect than anything else. The result vector may be equal to
511 * either or both source vectors, but may not otherwise overlap
512 * them.
513 */
514
515 void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
516 const mpw *bv, const mpw *bvl)
517 {
518 mpw c = 0;
519
520 while (av < avl || bv < bvl) {
521 mpw a, b;
522 mpd x;
523 if (dv >= dvl)
524 return;
525 a = (av < avl) ? *av++ : 0;
526 b = (bv < bvl) ? *bv++ : 0;
527 x = (mpd)a - (mpd)b - c;
528 *dv++ = MPW(x);
529 if (x >> MPW_BITS)
530 c = 1;
531 else
532 c = 0;
533 }
534 if (c)
535 c = MPW_MAX;
536 while (dv < dvl)
537 *dv++ = c;
538 }
539
540 /* --- @mpx_usubn@ --- *
541 *
542 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
543 * @n@ = subtrahend
544 *
545 * Returns: ---
546 *
547 * Use: Subtracts a small integer from a multiprecision number.
548 */
549
550 void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
551
552 /* --- @mpx_umul@ --- *
553 *
554 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
555 * @const mpw *av, *avl@ = multiplicand vector base and limit
556 * @const mpw *bv, *bvl@ = multiplier vector base and limit
557 *
558 * Returns: ---
559 *
560 * Use: Performs unsigned integer multiplication. If the result
561 * overflows the desination vector, high-order bits are
562 * discarded. The result vector may not overlap the argument
563 * vectors in any way.
564 */
565
566 void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
567 const mpw *bv, const mpw *bvl)
568 {
569 /* --- This is probably worthwhile on a multiply --- */
570
571 MPX_SHRINK(av, avl);
572 MPX_SHRINK(bv, bvl);
573
574 /* --- Deal with a multiply by zero --- */
575
576 if (bv == bvl) {
577 MPX_ZERO(dv, dvl);
578 return;
579 }
580
581 /* --- Do the initial multiply and initialize the accumulator --- */
582
583 MPX_UMULN(dv, dvl, av, avl, *bv++);
584
585 /* --- Do the remaining multiply/accumulates --- */
586
587 while (dv < dvl && bv < bvl) {
588 mpw m = *bv++;
589 mpw c = 0;
590 const mpw *avv = av;
591 mpw *dvv = ++dv;
592
593 while (avv < avl) {
594 mpd x;
595 if (dvv >= dvl)
596 goto next;
597 x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
598 *dvv++ = MPW(x);
599 c = x >> MPW_BITS;
600 }
601 MPX_UADDN(dvv, dvl, c);
602 next:;
603 }
604 }
605
606 /* --- @mpx_umuln@ --- *
607 *
608 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
609 * @const mpw *av, *avl@ = multiplicand vector base and limit
610 * @mpw m@ = multiplier
611 *
612 * Returns: ---
613 *
614 * Use: Multiplies a multiprecision integer by a single-word value.
615 * The destination and source may be equal. The destination
616 * is completely cleared after use.
617 */
618
619 void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
620 {
621 MPX_UMULN(dv, dvl, av, avl, m);
622 }
623
624 /* --- @mpx_umlan@ --- *
625 *
626 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
627 * @const mpw *av, *avl@ = multiplicand vector base and limit
628 * @mpw m@ = multiplier
629 *
630 * Returns: ---
631 *
632 * Use: Multiplies a multiprecision integer by a single-word value
633 * and adds the result to an accumulator.
634 */
635
636 void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
637 {
638 MPX_UMLAN(dv, dvl, av, avl, m);
639 }
640
641 /* --- @mpx_usqr@ --- *
642 *
643 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
644 * @const mpw *av, *av@ = source vector base and limit
645 *
646 * Returns: ---
647 *
648 * Use: Performs unsigned integer squaring. The result vector must
649 * not overlap the source vector in any way.
650 */
651
652 void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
653 {
654 MPX_ZERO(dv, dvl);
655
656 /* --- Main loop --- */
657
658 while (av < avl) {
659 const mpw *avv = av;
660 mpw *dvv = dv;
661 mpw a = *av;
662 mpd c;
663
664 /* --- Stop if I've run out of destination --- */
665
666 if (dvv >= dvl)
667 break;
668
669 /* --- Work out the square at this point in the proceedings --- */
670
671 {
672 mpd x = (mpd)a * (mpd)a + *dvv;
673 *dvv++ = MPW(x);
674 c = MPW(x >> MPW_BITS);
675 }
676
677 /* --- Now fix up the rest of the vector upwards --- */
678
679 avv++;
680 while (dvv < dvl && avv < avl) {
681 mpd x = (mpd)a * (mpd)*avv++;
682 mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
683 c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
684 *dvv++ = MPW(y);
685 }
686 while (dvv < dvl && c) {
687 mpd x = c + *dvv;
688 *dvv++ = MPW(x);
689 c = x >> MPW_BITS;
690 }
691
692 /* --- Get ready for the next round --- */
693
694 av++;
695 dv += 2;
696 }
697 }
698
699 /* --- @mpx_udiv@ --- *
700 *
701 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
702 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
703 * @const mpw *dv, *dvl@ = divisor vector base and limit
704 * @mpw *sv, *svl@ = scratch workspace
705 *
706 * Returns: ---
707 *
708 * Use: Performs unsigned integer division. If the result overflows
709 * the quotient vector, high-order bits are discarded. (Clearly
710 * the remainder vector can't overflow.) The various vectors
711 * may not overlap in any way. Yes, I know it's a bit odd
712 * requiring the dividend to be in the result position but it
713 * does make some sense really. The remainder must have
714 * headroom for at least two extra words. The scratch space
715 * must be at least one word larger than the divisor.
716 */
717
718 void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
719 const mpw *dv, const mpw *dvl,
720 mpw *sv, mpw *svl)
721 {
722 unsigned norm = 0;
723 size_t scale;
724 mpw d, dd;
725
726 /* --- Initialize the quotient --- */
727
728 MPX_ZERO(qv, qvl);
729
730 /* --- Perform some sanity checks --- */
731
732 MPX_SHRINK(dv, dvl);
733 assert(((void)"division by zero in mpx_udiv", dv < dvl));
734
735 /* --- Normalize the divisor --- *
736 *
737 * The algorithm requires that the divisor be at least two digits long.
738 * This is easy to fix.
739 */
740
741 {
742 unsigned b;
743
744 d = dvl[-1];
745 for (b = MPW_BITS / 2; b; b >>= 1) {
746 if (d < (MPW_MAX >> b)) {
747 d <<= b;
748 norm += b;
749 }
750 }
751 if (dv + 1 == dvl)
752 norm += MPW_BITS;
753 }
754
755 /* --- Normalize the dividend/remainder to match --- */
756
757 if (norm) {
758 mpx_lsl(rv, rvl, rv, rvl, norm);
759 mpx_lsl(sv, svl, dv, dvl, norm);
760 dv = sv;
761 dvl = svl;
762 MPX_SHRINK(dv, dvl);
763 }
764
765 MPX_SHRINK(rv, rvl);
766 d = dvl[-1];
767 dd = dvl[-2];
768
769 /* --- Work out the relative scales --- */
770
771 {
772 size_t rvn = rvl - rv;
773 size_t dvn = dvl - dv;
774
775 /* --- If the divisor is clearly larger, notice this --- */
776
777 if (dvn > rvn) {
778 mpx_lsr(rv, rvl, rv, rvl, norm);
779 return;
780 }
781
782 scale = rvn - dvn;
783 }
784
785 /* --- Calculate the most significant quotient digit --- *
786 *
787 * Because the divisor has its top bit set, this can only happen once. The
788 * pointer arithmetic is a little contorted, to make sure that the
789 * behaviour is defined.
790 */
791
792 if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
793 mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
794 if (qvl - qv > scale)
795 qv[scale] = 1;
796 }
797
798 /* --- Now for the main loop --- */
799
800 {
801 mpw *rvv = rvl - 2;
802
803 while (scale) {
804 mpw q;
805 mpd rh;
806
807 /* --- Get an estimate for the next quotient digit --- */
808
809 mpw r = rvv[1];
810 mpw rr = rvv[0];
811 mpw rrr = *--rvv;
812
813 scale--;
814 rh = ((mpd)r << MPW_BITS) | rr;
815 if (r == d)
816 q = MPW_MAX;
817 else
818 q = MPW(rh / d);
819
820 /* --- Refine the estimate --- */
821
822 {
823 mpd yh = (mpd)d * q;
824 mpd yl = (mpd)dd * q;
825
826 if (yl > MPW_MAX) {
827 yh += yl >> MPW_BITS;
828 yl &= MPW_MAX;
829 }
830
831 while (yh > rh || (yh == rh && yl > rrr)) {
832 q--;
833 yh -= d;
834 if (yl < dd) {
835 yh++;
836 yl += MPW_MAX;
837 }
838 yl -= dd;
839 }
840 }
841
842 /* --- Remove a chunk from the dividend --- */
843
844 {
845 mpw *svv;
846 const mpw *dvv;
847 mpw mc = 0, sc = 0;
848
849 /* --- Calculate the size of the chunk --- *
850 *
851 * This does the whole job of calculating @r >> scale - qd@.
852 */
853
854 for (svv = rv + scale, dvv = dv;
855 dvv < dvl && svv < rvl;
856 svv++, dvv++) {
857 mpd x = (mpd)*dvv * (mpd)q + mc;
858 mc = x >> MPW_BITS;
859 x = (mpd)*svv - MPW(x) - sc;
860 *svv = MPW(x);
861 if (x >> MPW_BITS)
862 sc = 1;
863 else
864 sc = 0;
865 }
866
867 if (svv < rvl) {
868 mpd x = (mpd)*svv - mc - sc;
869 *svv++ = MPW(x);
870 if (x >> MPW_BITS)
871 sc = MPW_MAX;
872 else
873 sc = 0;
874 while (svv < rvl)
875 *svv++ = sc;
876 }
877
878 /* --- Fix if the quotient was too large --- *
879 *
880 * This doesn't seem to happen very often.
881 */
882
883 if (rvl[-1] > MPW_MAX / 2) {
884 mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
885 q--;
886 }
887 }
888
889 /* --- Done for another iteration --- */
890
891 if (qvl - qv > scale)
892 qv[scale] = q;
893 r = rr;
894 rr = rrr;
895 }
896 }
897
898 /* --- Now fiddle with unnormalizing and things --- */
899
900 mpx_lsr(rv, rvl, rv, rvl, norm);
901 }
902
903 /*----- Test rig ----------------------------------------------------------*/
904
905 #ifdef TEST_RIG
906
907 #include <mLib/alloc.h>
908 #include <mLib/dstr.h>
909 #include <mLib/quis.h>
910 #include <mLib/testrig.h>
911
912 #include "mpscan.h"
913
914 #define ALLOC(v, vl, sz) do { \
915 size_t _sz = (sz); \
916 mpw *_vv = xmalloc(MPWS(_sz)); \
917 mpw *_vvl = _vv + _sz; \
918 (v) = _vv; \
919 (vl) = _vvl; \
920 } while (0)
921
922 #define LOAD(v, vl, d) do { \
923 const dstr *_d = (d); \
924 mpw *_v, *_vl; \
925 ALLOC(_v, _vl, MPW_RQ(_d->len)); \
926 mpx_loadb(_v, _vl, _d->buf, _d->len); \
927 (v) = _v; \
928 (vl) = _vl; \
929 } while (0)
930
931 #define MAX(x, y) ((x) > (y) ? (x) : (y))
932
933 static void dumpbits(const char *msg, const void *pp, size_t sz)
934 {
935 const octet *p = pp;
936 fputs(msg, stderr);
937 for (; sz; sz--)
938 fprintf(stderr, " %02x", *p++);
939 fputc('\n', stderr);
940 }
941
942 static void dumpmp(const char *msg, const mpw *v, const mpw *vl)
943 {
944 fputs(msg, stderr);
945 MPX_SHRINK(v, vl);
946 while (v < vl)
947 fprintf(stderr, " %08lx", (unsigned long)*--vl);
948 fputc('\n', stderr);
949 }
950
951 static int chkscan(const mpw *v, const mpw *vl,
952 const void *pp, size_t sz, int step)
953 {
954 mpscan mps;
955 const octet *p = pp;
956 unsigned bit = 0;
957 int ok = 1;
958
959 mpscan_initx(&mps, v, vl);
960 while (sz) {
961 unsigned x = *p;
962 int i;
963 p += step;
964 for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) {
965 if (MPSCAN_BIT(&mps) != (x & 1)) {
966 fprintf(stderr,
967 "\n*** error, step %i, bit %u, expected %u, found %u\n",
968 step, bit, x & 1, MPSCAN_BIT(&mps));
969 ok = 0;
970 }
971 x >>= 1;
972 bit++;
973 }
974 sz--;
975 }
976
977 return (ok);
978 }
979
980 static int loadstore(dstr *v)
981 {
982 dstr d = DSTR_INIT;
983 size_t sz = MPW_RQ(v->len) * 2, diff;
984 mpw *m, *ml;
985 int ok = 1;
986
987 dstr_ensure(&d, v->len);
988 m = xmalloc(MPWS(sz));
989
990 for (diff = 0; diff < sz; diff += 5) {
991 size_t oct;
992
993 ml = m + sz - diff;
994
995 mpx_loadl(m, ml, v->buf, v->len);
996 if (!chkscan(m, ml, v->buf, v->len, +1))
997 ok = 0;
998 MPX_OCTETS(oct, m, ml);
999 mpx_storel(m, ml, d.buf, d.sz);
1000 if (memcmp(d.buf, v->buf, oct) != 0) {
1001 dumpbits("\n*** storel failed", d.buf, d.sz);
1002 ok = 0;
1003 }
1004
1005 mpx_loadb(m, ml, v->buf, v->len);
1006 if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1))
1007 ok = 0;
1008 MPX_OCTETS(oct, m, ml);
1009 mpx_storeb(m, ml, d.buf, d.sz);
1010 if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) {
1011 dumpbits("\n*** storeb failed", d.buf, d.sz);
1012 ok = 0;
1013 }
1014 }
1015
1016 if (!ok)
1017 dumpbits("input data", v->buf, v->len);
1018
1019 free(m);
1020 dstr_destroy(&d);
1021 return (ok);
1022 }
1023
1024 static int lsl(dstr *v)
1025 {
1026 mpw *a, *al;
1027 int n = *(int *)v[1].buf;
1028 mpw *c, *cl;
1029 mpw *d, *dl;
1030 int ok = 1;
1031
1032 LOAD(a, al, &v[0]);
1033 LOAD(c, cl, &v[2]);
1034 ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS);
1035
1036 mpx_lsl(d, dl, a, al, n);
1037 if (MPX_UCMP(d, dl, !=, c, cl)) {
1038 fprintf(stderr, "\n*** lsl(%i) failed\n", n);
1039 dumpmp(" a", a, al);
1040 dumpmp("expected", c, cl);
1041 dumpmp(" result", d, dl);
1042 ok = 0;
1043 }
1044
1045 free(a); free(c); free(d);
1046 return (ok);
1047 }
1048
1049 static int lsr(dstr *v)
1050 {
1051 mpw *a, *al;
1052 int n = *(int *)v[1].buf;
1053 mpw *c, *cl;
1054 mpw *d, *dl;
1055 int ok = 1;
1056
1057 LOAD(a, al, &v[0]);
1058 LOAD(c, cl, &v[2]);
1059 ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1);
1060
1061 mpx_lsr(d, dl, a, al, n);
1062 if (MPX_UCMP(d, dl, !=, c, cl)) {
1063 fprintf(stderr, "\n*** lsr(%i) failed\n", n);
1064 dumpmp(" a", a, al);
1065 dumpmp("expected", c, cl);
1066 dumpmp(" result", d, dl);
1067 ok = 0;
1068 }
1069
1070 free(a); free(c); free(d);
1071 return (ok);
1072 }
1073
1074 static int uadd(dstr *v)
1075 {
1076 mpw *a, *al;
1077 mpw *b, *bl;
1078 mpw *c, *cl;
1079 mpw *d, *dl;
1080 int ok = 1;
1081
1082 LOAD(a, al, &v[0]);
1083 LOAD(b, bl, &v[1]);
1084 LOAD(c, cl, &v[2]);
1085 ALLOC(d, dl, MAX(al - a, bl - b) + 1);
1086
1087 mpx_uadd(d, dl, a, al, b, bl);
1088 if (MPX_UCMP(d, dl, !=, c, cl)) {
1089 fprintf(stderr, "\n*** uadd failed\n");
1090 dumpmp(" a", a, al);
1091 dumpmp(" b", b, bl);
1092 dumpmp("expected", c, cl);
1093 dumpmp(" result", d, dl);
1094 ok = 0;
1095 }
1096
1097 free(a); free(b); free(c); free(d);
1098 return (ok);
1099 }
1100
1101 static int usub(dstr *v)
1102 {
1103 mpw *a, *al;
1104 mpw *b, *bl;
1105 mpw *c, *cl;
1106 mpw *d, *dl;
1107 int ok = 1;
1108
1109 LOAD(a, al, &v[0]);
1110 LOAD(b, bl, &v[1]);
1111 LOAD(c, cl, &v[2]);
1112 ALLOC(d, dl, al - a);
1113
1114 mpx_usub(d, dl, a, al, b, bl);
1115 if (MPX_UCMP(d, dl, !=, c, cl)) {
1116 fprintf(stderr, "\n*** usub failed\n");
1117 dumpmp(" a", a, al);
1118 dumpmp(" b", b, bl);
1119 dumpmp("expected", c, cl);
1120 dumpmp(" result", d, dl);
1121 ok = 0;
1122 }
1123
1124 free(a); free(b); free(c); free(d);
1125 return (ok);
1126 }
1127
1128 static int umul(dstr *v)
1129 {
1130 mpw *a, *al;
1131 mpw *b, *bl;
1132 mpw *c, *cl;
1133 mpw *d, *dl;
1134 int ok = 1;
1135
1136 LOAD(a, al, &v[0]);
1137 LOAD(b, bl, &v[1]);
1138 LOAD(c, cl, &v[2]);
1139 ALLOC(d, dl, (al - a) + (bl - b));
1140
1141 mpx_umul(d, dl, a, al, b, bl);
1142 if (MPX_UCMP(d, dl, !=, c, cl)) {
1143 fprintf(stderr, "\n*** umul failed\n");
1144 dumpmp(" a", a, al);
1145 dumpmp(" b", b, bl);
1146 dumpmp("expected", c, cl);
1147 dumpmp(" result", d, dl);
1148 ok = 0;
1149 }
1150
1151 free(a); free(b); free(c); free(d);
1152 return (ok);
1153 }
1154
1155 static int usqr(dstr *v)
1156 {
1157 mpw *a, *al;
1158 mpw *c, *cl;
1159 mpw *d, *dl;
1160 int ok = 1;
1161
1162 LOAD(a, al, &v[0]);
1163 LOAD(c, cl, &v[1]);
1164 ALLOC(d, dl, 2 * (al - a));
1165
1166 mpx_usqr(d, dl, a, al);
1167 if (MPX_UCMP(d, dl, !=, c, cl)) {
1168 fprintf(stderr, "\n*** usqr failed\n");
1169 dumpmp(" a", a, al);
1170 dumpmp("expected", c, cl);
1171 dumpmp(" result", d, dl);
1172 ok = 0;
1173 }
1174
1175 free(a); free(c); free(d);
1176 return (ok);
1177 }
1178
1179 static int udiv(dstr *v)
1180 {
1181 mpw *a, *al;
1182 mpw *b, *bl;
1183 mpw *q, *ql;
1184 mpw *r, *rl;
1185 mpw *qq, *qql;
1186 mpw *s, *sl;
1187 int ok = 1;
1188
1189 ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len);
1190 LOAD(b, bl, &v[1]);
1191 LOAD(q, ql, &v[2]);
1192 LOAD(r, rl, &v[3]);
1193 ALLOC(qq, qql, al - a);
1194 ALLOC(s, sl, (bl - b) + 1);
1195
1196 mpx_udiv(qq, qql, a, al, b, bl, s, sl);
1197 if (MPX_UCMP(qq, qql, !=, q, ql) ||
1198 MPX_UCMP(a, al, !=, r, rl)) {
1199 fprintf(stderr, "\n*** udiv failed\n");
1200 dumpmp(" divisor", b, bl);
1201 dumpmp("expect r", r, rl);
1202 dumpmp("result r", a, al);
1203 dumpmp("expect q", q, ql);
1204 dumpmp("result q", qq, qql);
1205 ok = 0;
1206 }
1207
1208 free(a); free(b); free(r); free(q); free(s); free(qq);
1209 return (ok);
1210 }
1211
1212 static test_chunk defs[] = {
1213 { "load-store", loadstore, { &type_hex, 0 } },
1214 { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } },
1215 { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } },
1216 { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } },
1217 { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } },
1218 { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } },
1219 { "usqr", usqr, { &type_hex, &type_hex, 0 } },
1220 { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } },
1221 { 0, 0, { 0 } }
1222 };
1223
1224 int main(int argc, char *argv[])
1225 {
1226 test_run(argc, argv, defs, SRCDIR"/tests/mpx");
1227 return (0);
1228 }
1229
1230
1231 #endif
1232
1233 /*----- That's all, folks -------------------------------------------------*/