Renamed from `rsa-decrypt', since the name was no longer appropriate.
[u/mdw/catacomb] / mpx.c
CommitLineData
d03ab969 1/* -*-c-*-
2 *
99b30c23 3 * $Id: mpx.c,v 1.9 2000/06/26 07:52:50 mdw Exp $
d03ab969 4 *
5 * Low-level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.c,v $
99b30c23 33 * Revision 1.9 2000/06/26 07:52:50 mdw
34 * Portability fix for the bug fix.
35 *
ce76ff16 36 * Revision 1.8 2000/06/25 12:59:02 mdw
37 * (mpx_udiv): Fix bug in quotient digit estimation.
38 *
698bd937 39 * Revision 1.7 1999/12/22 15:49:07 mdw
40 * New function for division by a small integer.
41 *
42684bdb 42 * Revision 1.6 1999/11/20 22:43:44 mdw
43 * Integrate testing for MPX routines.
44 *
dd517851 45 * Revision 1.5 1999/11/20 22:23:27 mdw
46 * Add function versions of some low-level macros with wider use.
47 *
f45a00c6 48 * Revision 1.4 1999/11/17 18:04:09 mdw
49 * Add two's-complement functionality. Improve mpx_udiv a little by
50 * performing the multiplication of the divisor by q with the subtraction
51 * from r.
52 *
501da53c 53 * Revision 1.3 1999/11/13 01:57:31 mdw
54 * Remove stray debugging code.
55 *
c8a2f9ef 56 * Revision 1.2 1999/11/13 01:50:59 mdw
57 * Multiprecision routines finished and tested.
58 *
d03ab969 59 * Revision 1.1 1999/09/03 08:41:12 mdw
60 * Initial import.
61 *
62 */
63
64/*----- Header files ------------------------------------------------------*/
65
c8a2f9ef 66#include <assert.h>
d03ab969 67#include <stdio.h>
68#include <stdlib.h>
69#include <string.h>
70
71#include <mLib/bits.h>
72
73#include "mptypes.h"
74#include "mpx.h"
75
76/*----- Loading and storing -----------------------------------------------*/
77
78/* --- @mpx_storel@ --- *
79 *
80 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
c8a2f9ef 81 * @void *pp@ = pointer to octet array
d03ab969 82 * @size_t sz@ = size of octet array
83 *
84 * Returns: ---
85 *
86 * Use: Stores an MP in an octet array, least significant octet
87 * first. High-end octets are silently discarded if there
88 * isn't enough space for them.
89 */
90
c8a2f9ef 91void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
d03ab969 92{
93 mpw n, w = 0;
c8a2f9ef 94 octet *p = pp, *q = p + sz;
d03ab969 95 unsigned bits = 0;
96
97 while (p < q) {
98 if (bits < 8) {
99 if (v >= vl) {
100 *p++ = U8(w);
101 break;
102 }
103 n = *v++;
104 *p++ = U8(w | n << bits);
105 w = n >> (8 - bits);
106 bits += MPW_BITS - 8;
107 } else {
108 *p++ = U8(w);
109 w >>= 8;
110 bits -= 8;
111 }
112 }
113 memset(p, 0, q - p);
114}
115
116/* --- @mpx_loadl@ --- *
117 *
118 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
c8a2f9ef 119 * @const void *pp@ = pointer to octet array
d03ab969 120 * @size_t sz@ = size of octet array
121 *
122 * Returns: ---
123 *
124 * Use: Loads an MP in an octet array, least significant octet
125 * first. High-end octets are ignored if there isn't enough
126 * space for them.
127 */
128
c8a2f9ef 129void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
d03ab969 130{
131 unsigned n;
c8a2f9ef 132 mpw w = 0;
133 const octet *p = pp, *q = p + sz;
d03ab969 134 unsigned bits = 0;
135
136 if (v >= vl)
137 return;
138 while (p < q) {
139 n = U8(*p++);
140 w |= n << bits;
141 bits += 8;
142 if (bits >= MPW_BITS) {
143 *v++ = MPW(w);
144 w = n >> (MPW_BITS - bits + 8);
145 bits -= MPW_BITS;
146 if (v >= vl)
147 return;
148 }
149 }
150 *v++ = w;
151 MPX_ZERO(v, vl);
152}
153
154/* --- @mpx_storeb@ --- *
155 *
156 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
c8a2f9ef 157 * @void *pp@ = pointer to octet array
d03ab969 158 * @size_t sz@ = size of octet array
159 *
160 * Returns: ---
161 *
162 * Use: Stores an MP in an octet array, most significant octet
163 * first. High-end octets are silently discarded if there
164 * isn't enough space for them.
165 */
166
c8a2f9ef 167void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
d03ab969 168{
169 mpw n, w = 0;
c8a2f9ef 170 octet *p = pp, *q = p + sz;
d03ab969 171 unsigned bits = 0;
172
173 while (q > p) {
174 if (bits < 8) {
175 if (v >= vl) {
176 *--q = U8(w);
177 break;
178 }
179 n = *v++;
180 *--q = U8(w | n << bits);
181 w = n >> (8 - bits);
182 bits += MPW_BITS - 8;
183 } else {
184 *--q = U8(w);
185 w >>= 8;
186 bits -= 8;
187 }
188 }
189 memset(p, 0, q - p);
190}
191
192/* --- @mpx_loadb@ --- *
193 *
194 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
c8a2f9ef 195 * @const void *pp@ = pointer to octet array
d03ab969 196 * @size_t sz@ = size of octet array
197 *
198 * Returns: ---
199 *
200 * Use: Loads an MP in an octet array, most significant octet
201 * first. High-end octets are ignored if there isn't enough
202 * space for them.
203 */
204
c8a2f9ef 205void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
d03ab969 206{
207 unsigned n;
c8a2f9ef 208 mpw w = 0;
209 const octet *p = pp, *q = p + sz;
d03ab969 210 unsigned bits = 0;
211
212 if (v >= vl)
213 return;
214 while (q > p) {
215 n = U8(*--q);
216 w |= n << bits;
217 bits += 8;
218 if (bits >= MPW_BITS) {
219 *v++ = MPW(w);
220 w = n >> (MPW_BITS - bits + 8);
221 bits -= MPW_BITS;
222 if (v >= vl)
223 return;
224 }
225 }
226 *v++ = w;
227 MPX_ZERO(v, vl);
228}
229
230/*----- Logical shifting --------------------------------------------------*/
231
232/* --- @mpx_lsl@ --- *
233 *
234 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
235 * @const mpw *av, *avl@ = source vector base and limit
236 * @size_t n@ = number of bit positions to shift by
237 *
238 * Returns: ---
239 *
240 * Use: Performs a logical shift left operation on an integer.
241 */
242
243void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
244{
245 size_t nw;
246 unsigned nb;
247
248 /* --- Trivial special case --- */
249
250 if (n == 0)
251 MPX_COPY(dv, dvl, av, avl);
252
253 /* --- Single bit shifting --- */
254
255 else if (n == 1) {
256 mpw w = 0;
257 while (av < avl) {
258 mpw t;
259 if (dv >= dvl)
260 goto done;
261 t = *av++;
262 *dv++ = MPW((t << 1) | w);
263 w = t >> (MPW_BITS - 1);
264 }
265 if (dv >= dvl)
266 goto done;
267 *dv++ = MPW(w);
268 MPX_ZERO(dv, dvl);
c8a2f9ef 269 goto done;
d03ab969 270 }
271
272 /* --- Break out word and bit shifts for more sophisticated work --- */
273
274 nw = n / MPW_BITS;
275 nb = n % MPW_BITS;
276
277 /* --- Handle a shift by a multiple of the word size --- */
278
279 if (nb == 0) {
280 MPX_COPY(dv + nw, dvl, av, avl);
281 memset(dv, 0, MPWS(nw));
282 }
283
c8a2f9ef 284 /* --- And finally the difficult case --- *
285 *
286 * This is a little convoluted, because I have to start from the end and
287 * work backwards to avoid overwriting the source, if they're both the same
288 * block of memory.
289 */
d03ab969 290
291 else {
292 mpw w;
293 size_t nr = MPW_BITS - nb;
c8a2f9ef 294 size_t dvn = dvl - dv;
295 size_t avn = avl - av;
d03ab969 296
c8a2f9ef 297 if (dvn <= nw) {
d03ab969 298 MPX_ZERO(dv, dvl);
299 goto done;
300 }
d03ab969 301
c8a2f9ef 302 if (dvn > avn + nw) {
303 size_t off = avn + nw + 1;
304 MPX_ZERO(dv + off, dvl);
305 dvl = dv + off;
306 w = 0;
307 } else {
308 avl = av + dvn - nw;
309 w = *--avl << nb;
d03ab969 310 }
311
c8a2f9ef 312 while (avl > av) {
313 mpw t = *--avl;
314 *--dvl = (t >> nr) | w;
315 w = t << nb;
d03ab969 316 }
c8a2f9ef 317
318 *--dvl = w;
319 MPX_ZERO(dv, dvl);
d03ab969 320 }
321
322done:;
323}
324
325/* --- @mpx_lsr@ --- *
326 *
327 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
328 * @const mpw *av, *avl@ = source vector base and limit
329 * @size_t n@ = number of bit positions to shift by
330 *
331 * Returns: ---
332 *
333 * Use: Performs a logical shift right operation on an integer.
334 */
335
336void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
337{
338 size_t nw;
339 unsigned nb;
340
341 /* --- Trivial special case --- */
342
343 if (n == 0)
344 MPX_COPY(dv, dvl, av, avl);
345
346 /* --- Single bit shifting --- */
347
348 else if (n == 1) {
349 mpw w = *av++ >> 1;
350 while (av < avl) {
351 mpw t;
352 if (dv >= dvl)
353 goto done;
354 t = *av++;
355 *dv++ = MPW((t << (MPW_BITS - 1)) | w);
356 w = t >> 1;
357 }
358 if (dv >= dvl)
359 goto done;
360 *dv++ = MPW(w);
361 MPX_ZERO(dv, dvl);
c8a2f9ef 362 goto done;
d03ab969 363 }
364
365 /* --- Break out word and bit shifts for more sophisticated work --- */
366
367 nw = n / MPW_BITS;
368 nb = n % MPW_BITS;
369
370 /* --- Handle a shift by a multiple of the word size --- */
371
372 if (nb == 0)
373 MPX_COPY(dv, dvl, av + nw, avl);
374
375 /* --- And finally the difficult case --- */
376
377 else {
378 mpw w;
379 size_t nr = MPW_BITS - nb;
380
381 av += nw;
382 w = *av++;
383 while (av < avl) {
384 mpw t;
385 if (dv >= dvl)
386 goto done;
387 t = *av++;
388 *dv++ = MPW((w >> nb) | (t << nr));
389 w = t;
390 }
391 if (dv < dvl) {
392 *dv++ = MPW(w >> nb);
393 MPX_ZERO(dv, dvl);
394 }
395 }
396
397done:;
398}
399
400/*----- Unsigned arithmetic -----------------------------------------------*/
401
f45a00c6 402/* --- @mpx_2c@ --- *
403 *
404 * Arguments: @mpw *dv, *dvl@ = destination vector
405 * @const mpw *v, *vl@ = source vector
406 *
407 * Returns: ---
408 *
409 * Use: Calculates the two's complement of @v@.
410 */
411
412void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
413{
414 mpw c = 0;
415 while (dv < dvl && v < vl)
416 *dv++ = c = MPW(~*v++);
417 if (dv < dvl) {
418 if (c > MPW_MAX / 2)
419 c = MPW(~0);
420 while (dv < dvl)
421 *dv++ = c;
422 }
423 MPX_UADDN(dv, dvl, 1);
424}
425
d03ab969 426/* --- @mpx_ucmp@ --- *
427 *
428 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
429 * @const mpw *bv, *bvl@ = second argument vector base and limit
430 *
431 * Returns: Less than, equal to, or greater than zero depending on
432 * whether @a@ is less than, equal to or greater than @b@,
433 * respectively.
434 *
435 * Use: Performs an unsigned integer comparison.
436 */
437
438int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
439{
440 MPX_SHRINK(av, avl);
441 MPX_SHRINK(bv, bvl);
442
443 if (avl - av > bvl - bv)
444 return (+1);
445 else if (avl - av < bvl - bv)
446 return (-1);
447 else while (avl > av) {
448 mpw a = *--avl, b = *--bvl;
449 if (a > b)
450 return (+1);
451 else if (a < b)
452 return (-1);
453 }
454 return (0);
455}
456
457/* --- @mpx_uadd@ --- *
458 *
459 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
460 * @const mpw *av, *avl@ = first addend vector base and limit
461 * @const mpw *bv, *bvl@ = second addend vector base and limit
462 *
463 * Returns: ---
464 *
465 * Use: Performs unsigned integer addition. If the result overflows
466 * the destination vector, high-order bits are discarded. This
467 * means that two's complement addition happens more or less for
468 * free, although that's more a side-effect than anything else.
469 * The result vector may be equal to either or both source
470 * vectors, but may not otherwise overlap them.
471 */
472
473void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
474 const mpw *bv, const mpw *bvl)
475{
476 mpw c = 0;
477
478 while (av < avl || bv < bvl) {
479 mpw a, b;
480 mpd x;
481 if (dv >= dvl)
482 return;
483 a = (av < avl) ? *av++ : 0;
484 b = (bv < bvl) ? *bv++ : 0;
485 x = (mpd)a + (mpd)b + c;
486 *dv++ = MPW(x);
487 c = x >> MPW_BITS;
488 }
489 if (dv < dvl) {
490 *dv++ = c;
491 MPX_ZERO(dv, dvl);
492 }
493}
494
dd517851 495/* --- @mpx_uaddn@ --- *
496 *
497 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
498 * @mpw n@ = other addend
499 *
500 * Returns: ---
501 *
502 * Use: Adds a small integer to a multiprecision number.
503 */
504
505void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
506
d03ab969 507/* --- @mpx_usub@ --- *
508 *
509 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
510 * @const mpw *av, *avl@ = first argument vector base and limit
511 * @const mpw *bv, *bvl@ = second argument vector base and limit
512 *
513 * Returns: ---
514 *
515 * Use: Performs unsigned integer subtraction. If the result
516 * overflows the destination vector, high-order bits are
517 * discarded. This means that two's complement subtraction
518 * happens more or less for free, althuogh that's more a side-
519 * effect than anything else. The result vector may be equal to
520 * either or both source vectors, but may not otherwise overlap
521 * them.
522 */
523
524void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
525 const mpw *bv, const mpw *bvl)
526{
527 mpw c = 0;
528
529 while (av < avl || bv < bvl) {
530 mpw a, b;
531 mpd x;
532 if (dv >= dvl)
533 return;
534 a = (av < avl) ? *av++ : 0;
535 b = (bv < bvl) ? *bv++ : 0;
c8a2f9ef 536 x = (mpd)a - (mpd)b - c;
d03ab969 537 *dv++ = MPW(x);
c8a2f9ef 538 if (x >> MPW_BITS)
539 c = 1;
540 else
541 c = 0;
d03ab969 542 }
c8a2f9ef 543 if (c)
544 c = MPW_MAX;
d03ab969 545 while (dv < dvl)
c8a2f9ef 546 *dv++ = c;
d03ab969 547}
548
dd517851 549/* --- @mpx_usubn@ --- *
550 *
551 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
552 * @n@ = subtrahend
553 *
554 * Returns: ---
555 *
556 * Use: Subtracts a small integer from a multiprecision number.
557 */
558
559void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
560
d03ab969 561/* --- @mpx_umul@ --- *
562 *
563 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
564 * @const mpw *av, *avl@ = multiplicand vector base and limit
565 * @const mpw *bv, *bvl@ = multiplier vector base and limit
566 *
567 * Returns: ---
568 *
569 * Use: Performs unsigned integer multiplication. If the result
570 * overflows the desination vector, high-order bits are
571 * discarded. The result vector may not overlap the argument
572 * vectors in any way.
573 */
574
575void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
576 const mpw *bv, const mpw *bvl)
577{
578 /* --- This is probably worthwhile on a multiply --- */
579
580 MPX_SHRINK(av, avl);
581 MPX_SHRINK(bv, bvl);
582
583 /* --- Deal with a multiply by zero --- */
584
585 if (bv == bvl) {
c8a2f9ef 586 MPX_ZERO(dv, dvl);
d03ab969 587 return;
588 }
589
590 /* --- Do the initial multiply and initialize the accumulator --- */
591
592 MPX_UMULN(dv, dvl, av, avl, *bv++);
593
594 /* --- Do the remaining multiply/accumulates --- */
595
c8a2f9ef 596 while (dv < dvl && bv < bvl) {
d03ab969 597 mpw m = *bv++;
c8a2f9ef 598 mpw c = 0;
d03ab969 599 const mpw *avv = av;
600 mpw *dvv = ++dv;
601
602 while (avv < avl) {
603 mpd x;
604 if (dvv >= dvl)
605 goto next;
c8a2f9ef 606 x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
607 *dvv++ = MPW(x);
d03ab969 608 c = x >> MPW_BITS;
609 }
c8a2f9ef 610 MPX_UADDN(dvv, dvl, c);
d03ab969 611 next:;
612 }
613}
614
dd517851 615/* --- @mpx_umuln@ --- *
616 *
617 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
618 * @const mpw *av, *avl@ = multiplicand vector base and limit
619 * @mpw m@ = multiplier
620 *
621 * Returns: ---
622 *
623 * Use: Multiplies a multiprecision integer by a single-word value.
624 * The destination and source may be equal. The destination
625 * is completely cleared after use.
626 */
627
628void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
629{
630 MPX_UMULN(dv, dvl, av, avl, m);
631}
632
633/* --- @mpx_umlan@ --- *
634 *
635 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
636 * @const mpw *av, *avl@ = multiplicand vector base and limit
637 * @mpw m@ = multiplier
638 *
639 * Returns: ---
640 *
641 * Use: Multiplies a multiprecision integer by a single-word value
642 * and adds the result to an accumulator.
643 */
644
645void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
646{
647 MPX_UMLAN(dv, dvl, av, avl, m);
648}
649
c8a2f9ef 650/* --- @mpx_usqr@ --- *
651 *
652 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
653 * @const mpw *av, *av@ = source vector base and limit
654 *
655 * Returns: ---
656 *
657 * Use: Performs unsigned integer squaring. The result vector must
658 * not overlap the source vector in any way.
659 */
660
661void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
662{
663 MPX_ZERO(dv, dvl);
664
665 /* --- Main loop --- */
666
667 while (av < avl) {
668 const mpw *avv = av;
669 mpw *dvv = dv;
670 mpw a = *av;
671 mpd c;
672
673 /* --- Stop if I've run out of destination --- */
674
675 if (dvv >= dvl)
676 break;
677
678 /* --- Work out the square at this point in the proceedings --- */
679
680 {
c8a2f9ef 681 mpd x = (mpd)a * (mpd)a + *dvv;
682 *dvv++ = MPW(x);
683 c = MPW(x >> MPW_BITS);
684 }
685
686 /* --- Now fix up the rest of the vector upwards --- */
687
688 avv++;
689 while (dvv < dvl && avv < avl) {
c8a2f9ef 690 mpd x = (mpd)a * (mpd)*avv++;
691 mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
692 c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
693 *dvv++ = MPW(y);
694 }
695 while (dvv < dvl && c) {
696 mpd x = c + *dvv;
697 *dvv++ = MPW(x);
698 c = x >> MPW_BITS;
699 }
700
701 /* --- Get ready for the next round --- */
702
703 av++;
704 dv += 2;
705 }
706}
707
d03ab969 708/* --- @mpx_udiv@ --- *
709 *
710 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
711 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
712 * @const mpw *dv, *dvl@ = divisor vector base and limit
c8a2f9ef 713 * @mpw *sv, *svl@ = scratch workspace
d03ab969 714 *
715 * Returns: ---
716 *
717 * Use: Performs unsigned integer division. If the result overflows
718 * the quotient vector, high-order bits are discarded. (Clearly
719 * the remainder vector can't overflow.) The various vectors
720 * may not overlap in any way. Yes, I know it's a bit odd
721 * requiring the dividend to be in the result position but it
722 * does make some sense really. The remainder must have
c8a2f9ef 723 * headroom for at least two extra words. The scratch space
f45a00c6 724 * must be at least one word larger than the divisor.
d03ab969 725 */
726
727void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
c8a2f9ef 728 const mpw *dv, const mpw *dvl,
729 mpw *sv, mpw *svl)
d03ab969 730{
d03ab969 731 unsigned norm = 0;
732 size_t scale;
733 mpw d, dd;
734
735 /* --- Initialize the quotient --- */
736
737 MPX_ZERO(qv, qvl);
738
c8a2f9ef 739 /* --- Perform some sanity checks --- */
740
741 MPX_SHRINK(dv, dvl);
742 assert(((void)"division by zero in mpx_udiv", dv < dvl));
743
d03ab969 744 /* --- Normalize the divisor --- *
745 *
746 * The algorithm requires that the divisor be at least two digits long.
747 * This is easy to fix.
748 */
749
c8a2f9ef 750 {
751 unsigned b;
d03ab969 752
c8a2f9ef 753 d = dvl[-1];
754 for (b = MPW_BITS / 2; b; b >>= 1) {
755 if (d < (MPW_MAX >> b)) {
756 d <<= b;
757 norm += b;
758 }
759 }
760 if (dv + 1 == dvl)
761 norm += MPW_BITS;
d03ab969 762 }
d03ab969 763
764 /* --- Normalize the dividend/remainder to match --- */
765
c8a2f9ef 766 if (norm) {
c8a2f9ef 767 mpx_lsl(rv, rvl, rv, rvl, norm);
f45a00c6 768 mpx_lsl(sv, svl, dv, dvl, norm);
c8a2f9ef 769 dv = sv;
f45a00c6 770 dvl = svl;
c8a2f9ef 771 MPX_SHRINK(dv, dvl);
772 }
773
d03ab969 774 MPX_SHRINK(rv, rvl);
c8a2f9ef 775 d = dvl[-1];
776 dd = dvl[-2];
d03ab969 777
778 /* --- Work out the relative scales --- */
779
780 {
781 size_t rvn = rvl - rv;
c8a2f9ef 782 size_t dvn = dvl - dv;
d03ab969 783
784 /* --- If the divisor is clearly larger, notice this --- */
785
786 if (dvn > rvn) {
787 mpx_lsr(rv, rvl, rv, rvl, norm);
788 return;
789 }
790
791 scale = rvn - dvn;
792 }
793
794 /* --- Calculate the most significant quotient digit --- *
795 *
796 * Because the divisor has its top bit set, this can only happen once. The
797 * pointer arithmetic is a little contorted, to make sure that the
798 * behaviour is defined.
799 */
800
801 if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
802 mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
803 if (qvl - qv > scale)
804 qv[scale] = 1;
805 }
806
807 /* --- Now for the main loop --- */
808
809 {
c8a2f9ef 810 mpw *rvv = rvl - 2;
d03ab969 811
812 while (scale) {
c8a2f9ef 813 mpw q;
814 mpd rh;
d03ab969 815
816 /* --- Get an estimate for the next quotient digit --- */
817
c8a2f9ef 818 mpw r = rvv[1];
819 mpw rr = rvv[0];
820 mpw rrr = *--rvv;
821
822 scale--;
823 rh = ((mpd)r << MPW_BITS) | rr;
d03ab969 824 if (r == d)
825 q = MPW_MAX;
c8a2f9ef 826 else
827 q = MPW(rh / d);
d03ab969 828
829 /* --- Refine the estimate --- */
830
831 {
832 mpd yh = (mpd)d * q;
ce76ff16 833 mpd yy = (mpd)dd * q;
834 mpw yl;
c8a2f9ef 835
ce76ff16 836 if (yy > MPW_MAX)
837 yh += yy >> MPW_BITS;
838 yl = MPW(yy);
c8a2f9ef 839
840 while (yh > rh || (yh == rh && yl > rrr)) {
841 q--;
842 yh -= d;
ce76ff16 843 if (yl < dd)
844 yh--;
99b30c23 845 yl = MPW(yl - dd);
c8a2f9ef 846 }
847 }
848
849 /* --- Remove a chunk from the dividend --- */
850
851 {
852 mpw *svv;
853 const mpw *dvv;
f45a00c6 854 mpw mc = 0, sc = 0;
c8a2f9ef 855
f45a00c6 856 /* --- Calculate the size of the chunk --- *
857 *
858 * This does the whole job of calculating @r >> scale - qd@.
859 */
c8a2f9ef 860
f45a00c6 861 for (svv = rv + scale, dvv = dv;
862 dvv < dvl && svv < rvl;
863 svv++, dvv++) {
864 mpd x = (mpd)*dvv * (mpd)q + mc;
865 mc = x >> MPW_BITS;
866 x = (mpd)*svv - MPW(x) - sc;
c8a2f9ef 867 *svv = MPW(x);
f45a00c6 868 if (x >> MPW_BITS)
869 sc = 1;
870 else
871 sc = 0;
872 }
873
874 if (svv < rvl) {
875 mpd x = (mpd)*svv - mc - sc;
876 *svv++ = MPW(x);
877 if (x >> MPW_BITS)
878 sc = MPW_MAX;
879 else
880 sc = 0;
881 while (svv < rvl)
882 *svv++ = sc;
c8a2f9ef 883 }
c8a2f9ef 884
f45a00c6 885 /* --- Fix if the quotient was too large --- *
c8a2f9ef 886 *
f45a00c6 887 * This doesn't seem to happen very often.
c8a2f9ef 888 */
889
c8a2f9ef 890 if (rvl[-1] > MPW_MAX / 2) {
891 mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
892 q--;
893 }
894 }
895
896 /* --- Done for another iteration --- */
897
898 if (qvl - qv > scale)
899 qv[scale] = q;
900 r = rr;
901 rr = rrr;
902 }
903 }
904
905 /* --- Now fiddle with unnormalizing and things --- */
906
907 mpx_lsr(rv, rvl, rv, rvl, norm);
d03ab969 908}
909
698bd937 910/* --- @mpx_udivn@ --- *
911 *
912 * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap
913 * dividend)
914 * @const mpw *rv, *rvl@ = dividend
915 * @mpw d@ = single-precision divisor
916 *
917 * Returns: Remainder after divison.
918 *
919 * Use: Performs a single-precision division operation.
920 */
921
922mpw mpx_udivn(mpw *qv, mpw *qvl, const mpw *rv, const mpw *rvl, mpw d)
923{
924 size_t i;
925 size_t ql = qvl - qv;
926 mpd r = 0;
927
928 i = rvl - rv;
929 while (i > 0) {
930 i--;
931 r = (r << MPW_BITS) | rv[i];
932 if (i < ql)
933 qv[i] = r / d;
934 r %= d;
935 }
936 return (MPW(r));
937}
938
42684bdb 939/*----- Test rig ----------------------------------------------------------*/
940
941#ifdef TEST_RIG
942
943#include <mLib/alloc.h>
944#include <mLib/dstr.h>
945#include <mLib/quis.h>
946#include <mLib/testrig.h>
947
948#include "mpscan.h"
949
950#define ALLOC(v, vl, sz) do { \
951 size_t _sz = (sz); \
952 mpw *_vv = xmalloc(MPWS(_sz)); \
953 mpw *_vvl = _vv + _sz; \
954 (v) = _vv; \
955 (vl) = _vvl; \
956} while (0)
957
958#define LOAD(v, vl, d) do { \
959 const dstr *_d = (d); \
960 mpw *_v, *_vl; \
961 ALLOC(_v, _vl, MPW_RQ(_d->len)); \
962 mpx_loadb(_v, _vl, _d->buf, _d->len); \
963 (v) = _v; \
964 (vl) = _vl; \
965} while (0)
966
967#define MAX(x, y) ((x) > (y) ? (x) : (y))
968
969static void dumpbits(const char *msg, const void *pp, size_t sz)
970{
971 const octet *p = pp;
972 fputs(msg, stderr);
973 for (; sz; sz--)
974 fprintf(stderr, " %02x", *p++);
975 fputc('\n', stderr);
976}
977
978static void dumpmp(const char *msg, const mpw *v, const mpw *vl)
979{
980 fputs(msg, stderr);
981 MPX_SHRINK(v, vl);
982 while (v < vl)
983 fprintf(stderr, " %08lx", (unsigned long)*--vl);
984 fputc('\n', stderr);
985}
986
987static int chkscan(const mpw *v, const mpw *vl,
988 const void *pp, size_t sz, int step)
989{
990 mpscan mps;
991 const octet *p = pp;
992 unsigned bit = 0;
993 int ok = 1;
994
995 mpscan_initx(&mps, v, vl);
996 while (sz) {
997 unsigned x = *p;
998 int i;
999 p += step;
1000 for (i = 0; i < 8 && MPSCAN_STEP(&mps); i++) {
1001 if (MPSCAN_BIT(&mps) != (x & 1)) {
1002 fprintf(stderr,
1003 "\n*** error, step %i, bit %u, expected %u, found %u\n",
1004 step, bit, x & 1, MPSCAN_BIT(&mps));
1005 ok = 0;
1006 }
1007 x >>= 1;
1008 bit++;
1009 }
1010 sz--;
1011 }
1012
1013 return (ok);
1014}
1015
1016static int loadstore(dstr *v)
1017{
1018 dstr d = DSTR_INIT;
1019 size_t sz = MPW_RQ(v->len) * 2, diff;
1020 mpw *m, *ml;
1021 int ok = 1;
1022
1023 dstr_ensure(&d, v->len);
1024 m = xmalloc(MPWS(sz));
1025
1026 for (diff = 0; diff < sz; diff += 5) {
1027 size_t oct;
1028
1029 ml = m + sz - diff;
1030
1031 mpx_loadl(m, ml, v->buf, v->len);
1032 if (!chkscan(m, ml, v->buf, v->len, +1))
1033 ok = 0;
1034 MPX_OCTETS(oct, m, ml);
1035 mpx_storel(m, ml, d.buf, d.sz);
1036 if (memcmp(d.buf, v->buf, oct) != 0) {
1037 dumpbits("\n*** storel failed", d.buf, d.sz);
1038 ok = 0;
1039 }
1040
1041 mpx_loadb(m, ml, v->buf, v->len);
1042 if (!chkscan(m, ml, v->buf + v->len - 1, v->len, -1))
1043 ok = 0;
1044 MPX_OCTETS(oct, m, ml);
1045 mpx_storeb(m, ml, d.buf, d.sz);
1046 if (memcmp(d.buf + d.sz - oct, v->buf + v->len - oct, oct) != 0) {
1047 dumpbits("\n*** storeb failed", d.buf, d.sz);
1048 ok = 0;
1049 }
1050 }
1051
1052 if (!ok)
1053 dumpbits("input data", v->buf, v->len);
1054
1055 free(m);
1056 dstr_destroy(&d);
1057 return (ok);
1058}
1059
1060static int lsl(dstr *v)
1061{
1062 mpw *a, *al;
1063 int n = *(int *)v[1].buf;
1064 mpw *c, *cl;
1065 mpw *d, *dl;
1066 int ok = 1;
1067
1068 LOAD(a, al, &v[0]);
1069 LOAD(c, cl, &v[2]);
1070 ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS);
1071
1072 mpx_lsl(d, dl, a, al, n);
1073 if (MPX_UCMP(d, dl, !=, c, cl)) {
1074 fprintf(stderr, "\n*** lsl(%i) failed\n", n);
1075 dumpmp(" a", a, al);
1076 dumpmp("expected", c, cl);
1077 dumpmp(" result", d, dl);
1078 ok = 0;
1079 }
1080
1081 free(a); free(c); free(d);
1082 return (ok);
1083}
1084
1085static int lsr(dstr *v)
1086{
1087 mpw *a, *al;
1088 int n = *(int *)v[1].buf;
1089 mpw *c, *cl;
1090 mpw *d, *dl;
1091 int ok = 1;
1092
1093 LOAD(a, al, &v[0]);
1094 LOAD(c, cl, &v[2]);
1095 ALLOC(d, dl, al - a + (n + MPW_BITS - 1) / MPW_BITS + 1);
1096
1097 mpx_lsr(d, dl, a, al, n);
1098 if (MPX_UCMP(d, dl, !=, c, cl)) {
1099 fprintf(stderr, "\n*** lsr(%i) failed\n", n);
1100 dumpmp(" a", a, al);
1101 dumpmp("expected", c, cl);
1102 dumpmp(" result", d, dl);
1103 ok = 0;
1104 }
1105
1106 free(a); free(c); free(d);
1107 return (ok);
1108}
1109
1110static int uadd(dstr *v)
1111{
1112 mpw *a, *al;
1113 mpw *b, *bl;
1114 mpw *c, *cl;
1115 mpw *d, *dl;
1116 int ok = 1;
1117
1118 LOAD(a, al, &v[0]);
1119 LOAD(b, bl, &v[1]);
1120 LOAD(c, cl, &v[2]);
1121 ALLOC(d, dl, MAX(al - a, bl - b) + 1);
1122
1123 mpx_uadd(d, dl, a, al, b, bl);
1124 if (MPX_UCMP(d, dl, !=, c, cl)) {
1125 fprintf(stderr, "\n*** uadd failed\n");
1126 dumpmp(" a", a, al);
1127 dumpmp(" b", b, bl);
1128 dumpmp("expected", c, cl);
1129 dumpmp(" result", d, dl);
1130 ok = 0;
1131 }
1132
1133 free(a); free(b); free(c); free(d);
1134 return (ok);
1135}
1136
1137static int usub(dstr *v)
1138{
1139 mpw *a, *al;
1140 mpw *b, *bl;
1141 mpw *c, *cl;
1142 mpw *d, *dl;
1143 int ok = 1;
1144
1145 LOAD(a, al, &v[0]);
1146 LOAD(b, bl, &v[1]);
1147 LOAD(c, cl, &v[2]);
1148 ALLOC(d, dl, al - a);
1149
1150 mpx_usub(d, dl, a, al, b, bl);
1151 if (MPX_UCMP(d, dl, !=, c, cl)) {
1152 fprintf(stderr, "\n*** usub failed\n");
1153 dumpmp(" a", a, al);
1154 dumpmp(" b", b, bl);
1155 dumpmp("expected", c, cl);
1156 dumpmp(" result", d, dl);
1157 ok = 0;
1158 }
1159
1160 free(a); free(b); free(c); free(d);
1161 return (ok);
1162}
1163
1164static int umul(dstr *v)
1165{
1166 mpw *a, *al;
1167 mpw *b, *bl;
1168 mpw *c, *cl;
1169 mpw *d, *dl;
1170 int ok = 1;
1171
1172 LOAD(a, al, &v[0]);
1173 LOAD(b, bl, &v[1]);
1174 LOAD(c, cl, &v[2]);
1175 ALLOC(d, dl, (al - a) + (bl - b));
1176
1177 mpx_umul(d, dl, a, al, b, bl);
1178 if (MPX_UCMP(d, dl, !=, c, cl)) {
1179 fprintf(stderr, "\n*** umul failed\n");
1180 dumpmp(" a", a, al);
1181 dumpmp(" b", b, bl);
1182 dumpmp("expected", c, cl);
1183 dumpmp(" result", d, dl);
1184 ok = 0;
1185 }
1186
1187 free(a); free(b); free(c); free(d);
1188 return (ok);
1189}
1190
1191static int usqr(dstr *v)
1192{
1193 mpw *a, *al;
1194 mpw *c, *cl;
1195 mpw *d, *dl;
1196 int ok = 1;
1197
1198 LOAD(a, al, &v[0]);
1199 LOAD(c, cl, &v[1]);
1200 ALLOC(d, dl, 2 * (al - a));
1201
1202 mpx_usqr(d, dl, a, al);
1203 if (MPX_UCMP(d, dl, !=, c, cl)) {
1204 fprintf(stderr, "\n*** usqr failed\n");
1205 dumpmp(" a", a, al);
1206 dumpmp("expected", c, cl);
1207 dumpmp(" result", d, dl);
1208 ok = 0;
1209 }
1210
1211 free(a); free(c); free(d);
1212 return (ok);
1213}
1214
1215static int udiv(dstr *v)
1216{
1217 mpw *a, *al;
1218 mpw *b, *bl;
1219 mpw *q, *ql;
1220 mpw *r, *rl;
1221 mpw *qq, *qql;
1222 mpw *s, *sl;
1223 int ok = 1;
1224
1225 ALLOC(a, al, MPW_RQ(v[0].len) + 2); mpx_loadb(a, al, v[0].buf, v[0].len);
1226 LOAD(b, bl, &v[1]);
1227 LOAD(q, ql, &v[2]);
1228 LOAD(r, rl, &v[3]);
1229 ALLOC(qq, qql, al - a);
1230 ALLOC(s, sl, (bl - b) + 1);
1231
1232 mpx_udiv(qq, qql, a, al, b, bl, s, sl);
1233 if (MPX_UCMP(qq, qql, !=, q, ql) ||
1234 MPX_UCMP(a, al, !=, r, rl)) {
1235 fprintf(stderr, "\n*** udiv failed\n");
1236 dumpmp(" divisor", b, bl);
1237 dumpmp("expect r", r, rl);
1238 dumpmp("result r", a, al);
1239 dumpmp("expect q", q, ql);
1240 dumpmp("result q", qq, qql);
1241 ok = 0;
1242 }
1243
1244 free(a); free(b); free(r); free(q); free(s); free(qq);
1245 return (ok);
1246}
1247
1248static test_chunk defs[] = {
1249 { "load-store", loadstore, { &type_hex, 0 } },
1250 { "lsl", lsl, { &type_hex, &type_int, &type_hex, 0 } },
1251 { "lsr", lsr, { &type_hex, &type_int, &type_hex, 0 } },
1252 { "uadd", uadd, { &type_hex, &type_hex, &type_hex, 0 } },
1253 { "usub", usub, { &type_hex, &type_hex, &type_hex, 0 } },
1254 { "umul", umul, { &type_hex, &type_hex, &type_hex, 0 } },
1255 { "usqr", usqr, { &type_hex, &type_hex, 0 } },
1256 { "udiv", udiv, { &type_hex, &type_hex, &type_hex, &type_hex, 0 } },
1257 { 0, 0, { 0 } }
1258};
1259
1260int main(int argc, char *argv[])
1261{
1262 test_run(argc, argv, defs, SRCDIR"/tests/mpx");
1263 return (0);
1264}
1265
1266
1267#endif
1268
d03ab969 1269/*----- That's all, folks -------------------------------------------------*/