Add function versions of some low-level macros with wider use.
[u/mdw/catacomb] / mpx.c
1 /* -*-c-*-
2 *
3 * $Id: mpx.c,v 1.5 1999/11/20 22:23:27 mdw Exp $
4 *
5 * Low-level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.c,v $
33 * Revision 1.5 1999/11/20 22:23:27 mdw
34 * Add function versions of some low-level macros with wider use.
35 *
36 * Revision 1.4 1999/11/17 18:04:09 mdw
37 * Add two's-complement functionality. Improve mpx_udiv a little by
38 * performing the multiplication of the divisor by q with the subtraction
39 * from r.
40 *
41 * Revision 1.3 1999/11/13 01:57:31 mdw
42 * Remove stray debugging code.
43 *
44 * Revision 1.2 1999/11/13 01:50:59 mdw
45 * Multiprecision routines finished and tested.
46 *
47 * Revision 1.1 1999/09/03 08:41:12 mdw
48 * Initial import.
49 *
50 */
51
52 /*----- Header files ------------------------------------------------------*/
53
54 #include <assert.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58
59 #include <mLib/bits.h>
60
61 #include "mptypes.h"
62 #include "mpx.h"
63
64 /*----- Loading and storing -----------------------------------------------*/
65
66 /* --- @mpx_storel@ --- *
67 *
68 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
69 * @void *pp@ = pointer to octet array
70 * @size_t sz@ = size of octet array
71 *
72 * Returns: ---
73 *
74 * Use: Stores an MP in an octet array, least significant octet
75 * first. High-end octets are silently discarded if there
76 * isn't enough space for them.
77 */
78
79 void mpx_storel(const mpw *v, const mpw *vl, void *pp, size_t sz)
80 {
81 mpw n, w = 0;
82 octet *p = pp, *q = p + sz;
83 unsigned bits = 0;
84
85 while (p < q) {
86 if (bits < 8) {
87 if (v >= vl) {
88 *p++ = U8(w);
89 break;
90 }
91 n = *v++;
92 *p++ = U8(w | n << bits);
93 w = n >> (8 - bits);
94 bits += MPW_BITS - 8;
95 } else {
96 *p++ = U8(w);
97 w >>= 8;
98 bits -= 8;
99 }
100 }
101 memset(p, 0, q - p);
102 }
103
104 /* --- @mpx_loadl@ --- *
105 *
106 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
107 * @const void *pp@ = pointer to octet array
108 * @size_t sz@ = size of octet array
109 *
110 * Returns: ---
111 *
112 * Use: Loads an MP in an octet array, least significant octet
113 * first. High-end octets are ignored if there isn't enough
114 * space for them.
115 */
116
117 void mpx_loadl(mpw *v, mpw *vl, const void *pp, size_t sz)
118 {
119 unsigned n;
120 mpw w = 0;
121 const octet *p = pp, *q = p + sz;
122 unsigned bits = 0;
123
124 if (v >= vl)
125 return;
126 while (p < q) {
127 n = U8(*p++);
128 w |= n << bits;
129 bits += 8;
130 if (bits >= MPW_BITS) {
131 *v++ = MPW(w);
132 w = n >> (MPW_BITS - bits + 8);
133 bits -= MPW_BITS;
134 if (v >= vl)
135 return;
136 }
137 }
138 *v++ = w;
139 MPX_ZERO(v, vl);
140 }
141
142 /* --- @mpx_storeb@ --- *
143 *
144 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
145 * @void *pp@ = pointer to octet array
146 * @size_t sz@ = size of octet array
147 *
148 * Returns: ---
149 *
150 * Use: Stores an MP in an octet array, most significant octet
151 * first. High-end octets are silently discarded if there
152 * isn't enough space for them.
153 */
154
155 void mpx_storeb(const mpw *v, const mpw *vl, void *pp, size_t sz)
156 {
157 mpw n, w = 0;
158 octet *p = pp, *q = p + sz;
159 unsigned bits = 0;
160
161 while (q > p) {
162 if (bits < 8) {
163 if (v >= vl) {
164 *--q = U8(w);
165 break;
166 }
167 n = *v++;
168 *--q = U8(w | n << bits);
169 w = n >> (8 - bits);
170 bits += MPW_BITS - 8;
171 } else {
172 *--q = U8(w);
173 w >>= 8;
174 bits -= 8;
175 }
176 }
177 memset(p, 0, q - p);
178 }
179
180 /* --- @mpx_loadb@ --- *
181 *
182 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
183 * @const void *pp@ = pointer to octet array
184 * @size_t sz@ = size of octet array
185 *
186 * Returns: ---
187 *
188 * Use: Loads an MP in an octet array, most significant octet
189 * first. High-end octets are ignored if there isn't enough
190 * space for them.
191 */
192
193 void mpx_loadb(mpw *v, mpw *vl, const void *pp, size_t sz)
194 {
195 unsigned n;
196 mpw w = 0;
197 const octet *p = pp, *q = p + sz;
198 unsigned bits = 0;
199
200 if (v >= vl)
201 return;
202 while (q > p) {
203 n = U8(*--q);
204 w |= n << bits;
205 bits += 8;
206 if (bits >= MPW_BITS) {
207 *v++ = MPW(w);
208 w = n >> (MPW_BITS - bits + 8);
209 bits -= MPW_BITS;
210 if (v >= vl)
211 return;
212 }
213 }
214 *v++ = w;
215 MPX_ZERO(v, vl);
216 }
217
218 /*----- Logical shifting --------------------------------------------------*/
219
220 /* --- @mpx_lsl@ --- *
221 *
222 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
223 * @const mpw *av, *avl@ = source vector base and limit
224 * @size_t n@ = number of bit positions to shift by
225 *
226 * Returns: ---
227 *
228 * Use: Performs a logical shift left operation on an integer.
229 */
230
231 void mpx_lsl(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
232 {
233 size_t nw;
234 unsigned nb;
235
236 /* --- Trivial special case --- */
237
238 if (n == 0)
239 MPX_COPY(dv, dvl, av, avl);
240
241 /* --- Single bit shifting --- */
242
243 else if (n == 1) {
244 mpw w = 0;
245 while (av < avl) {
246 mpw t;
247 if (dv >= dvl)
248 goto done;
249 t = *av++;
250 *dv++ = MPW((t << 1) | w);
251 w = t >> (MPW_BITS - 1);
252 }
253 if (dv >= dvl)
254 goto done;
255 *dv++ = MPW(w);
256 MPX_ZERO(dv, dvl);
257 goto done;
258 }
259
260 /* --- Break out word and bit shifts for more sophisticated work --- */
261
262 nw = n / MPW_BITS;
263 nb = n % MPW_BITS;
264
265 /* --- Handle a shift by a multiple of the word size --- */
266
267 if (nb == 0) {
268 MPX_COPY(dv + nw, dvl, av, avl);
269 memset(dv, 0, MPWS(nw));
270 }
271
272 /* --- And finally the difficult case --- *
273 *
274 * This is a little convoluted, because I have to start from the end and
275 * work backwards to avoid overwriting the source, if they're both the same
276 * block of memory.
277 */
278
279 else {
280 mpw w;
281 size_t nr = MPW_BITS - nb;
282 size_t dvn = dvl - dv;
283 size_t avn = avl - av;
284
285 if (dvn <= nw) {
286 MPX_ZERO(dv, dvl);
287 goto done;
288 }
289
290 if (dvn > avn + nw) {
291 size_t off = avn + nw + 1;
292 MPX_ZERO(dv + off, dvl);
293 dvl = dv + off;
294 w = 0;
295 } else {
296 avl = av + dvn - nw;
297 w = *--avl << nb;
298 }
299
300 while (avl > av) {
301 mpw t = *--avl;
302 *--dvl = (t >> nr) | w;
303 w = t << nb;
304 }
305
306 *--dvl = w;
307 MPX_ZERO(dv, dvl);
308 }
309
310 done:;
311 }
312
313 /* --- @mpx_lsr@ --- *
314 *
315 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
316 * @const mpw *av, *avl@ = source vector base and limit
317 * @size_t n@ = number of bit positions to shift by
318 *
319 * Returns: ---
320 *
321 * Use: Performs a logical shift right operation on an integer.
322 */
323
324 void mpx_lsr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, size_t n)
325 {
326 size_t nw;
327 unsigned nb;
328
329 /* --- Trivial special case --- */
330
331 if (n == 0)
332 MPX_COPY(dv, dvl, av, avl);
333
334 /* --- Single bit shifting --- */
335
336 else if (n == 1) {
337 mpw w = *av++ >> 1;
338 while (av < avl) {
339 mpw t;
340 if (dv >= dvl)
341 goto done;
342 t = *av++;
343 *dv++ = MPW((t << (MPW_BITS - 1)) | w);
344 w = t >> 1;
345 }
346 if (dv >= dvl)
347 goto done;
348 *dv++ = MPW(w);
349 MPX_ZERO(dv, dvl);
350 goto done;
351 }
352
353 /* --- Break out word and bit shifts for more sophisticated work --- */
354
355 nw = n / MPW_BITS;
356 nb = n % MPW_BITS;
357
358 /* --- Handle a shift by a multiple of the word size --- */
359
360 if (nb == 0)
361 MPX_COPY(dv, dvl, av + nw, avl);
362
363 /* --- And finally the difficult case --- */
364
365 else {
366 mpw w;
367 size_t nr = MPW_BITS - nb;
368
369 av += nw;
370 w = *av++;
371 while (av < avl) {
372 mpw t;
373 if (dv >= dvl)
374 goto done;
375 t = *av++;
376 *dv++ = MPW((w >> nb) | (t << nr));
377 w = t;
378 }
379 if (dv < dvl) {
380 *dv++ = MPW(w >> nb);
381 MPX_ZERO(dv, dvl);
382 }
383 }
384
385 done:;
386 }
387
388 /*----- Unsigned arithmetic -----------------------------------------------*/
389
390 /* --- @mpx_2c@ --- *
391 *
392 * Arguments: @mpw *dv, *dvl@ = destination vector
393 * @const mpw *v, *vl@ = source vector
394 *
395 * Returns: ---
396 *
397 * Use: Calculates the two's complement of @v@.
398 */
399
400 void mpx_2c(mpw *dv, mpw *dvl, const mpw *v, const mpw *vl)
401 {
402 mpw c = 0;
403 while (dv < dvl && v < vl)
404 *dv++ = c = MPW(~*v++);
405 if (dv < dvl) {
406 if (c > MPW_MAX / 2)
407 c = MPW(~0);
408 while (dv < dvl)
409 *dv++ = c;
410 }
411 MPX_UADDN(dv, dvl, 1);
412 }
413
414 /* --- @mpx_ucmp@ --- *
415 *
416 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
417 * @const mpw *bv, *bvl@ = second argument vector base and limit
418 *
419 * Returns: Less than, equal to, or greater than zero depending on
420 * whether @a@ is less than, equal to or greater than @b@,
421 * respectively.
422 *
423 * Use: Performs an unsigned integer comparison.
424 */
425
426 int mpx_ucmp(const mpw *av, const mpw *avl, const mpw *bv, const mpw *bvl)
427 {
428 MPX_SHRINK(av, avl);
429 MPX_SHRINK(bv, bvl);
430
431 if (avl - av > bvl - bv)
432 return (+1);
433 else if (avl - av < bvl - bv)
434 return (-1);
435 else while (avl > av) {
436 mpw a = *--avl, b = *--bvl;
437 if (a > b)
438 return (+1);
439 else if (a < b)
440 return (-1);
441 }
442 return (0);
443 }
444
445 /* --- @mpx_uadd@ --- *
446 *
447 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
448 * @const mpw *av, *avl@ = first addend vector base and limit
449 * @const mpw *bv, *bvl@ = second addend vector base and limit
450 *
451 * Returns: ---
452 *
453 * Use: Performs unsigned integer addition. If the result overflows
454 * the destination vector, high-order bits are discarded. This
455 * means that two's complement addition happens more or less for
456 * free, although that's more a side-effect than anything else.
457 * The result vector may be equal to either or both source
458 * vectors, but may not otherwise overlap them.
459 */
460
461 void mpx_uadd(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
462 const mpw *bv, const mpw *bvl)
463 {
464 mpw c = 0;
465
466 while (av < avl || bv < bvl) {
467 mpw a, b;
468 mpd x;
469 if (dv >= dvl)
470 return;
471 a = (av < avl) ? *av++ : 0;
472 b = (bv < bvl) ? *bv++ : 0;
473 x = (mpd)a + (mpd)b + c;
474 *dv++ = MPW(x);
475 c = x >> MPW_BITS;
476 }
477 if (dv < dvl) {
478 *dv++ = c;
479 MPX_ZERO(dv, dvl);
480 }
481 }
482
483 /* --- @mpx_uaddn@ --- *
484 *
485 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
486 * @mpw n@ = other addend
487 *
488 * Returns: ---
489 *
490 * Use: Adds a small integer to a multiprecision number.
491 */
492
493 void mpx_uaddn(mpw *dv, mpw *dvl, mpw n) { MPX_UADDN(dv, dvl, n); }
494
495 /* --- @mpx_usub@ --- *
496 *
497 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
498 * @const mpw *av, *avl@ = first argument vector base and limit
499 * @const mpw *bv, *bvl@ = second argument vector base and limit
500 *
501 * Returns: ---
502 *
503 * Use: Performs unsigned integer subtraction. If the result
504 * overflows the destination vector, high-order bits are
505 * discarded. This means that two's complement subtraction
506 * happens more or less for free, althuogh that's more a side-
507 * effect than anything else. The result vector may be equal to
508 * either or both source vectors, but may not otherwise overlap
509 * them.
510 */
511
512 void mpx_usub(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
513 const mpw *bv, const mpw *bvl)
514 {
515 mpw c = 0;
516
517 while (av < avl || bv < bvl) {
518 mpw a, b;
519 mpd x;
520 if (dv >= dvl)
521 return;
522 a = (av < avl) ? *av++ : 0;
523 b = (bv < bvl) ? *bv++ : 0;
524 x = (mpd)a - (mpd)b - c;
525 *dv++ = MPW(x);
526 if (x >> MPW_BITS)
527 c = 1;
528 else
529 c = 0;
530 }
531 if (c)
532 c = MPW_MAX;
533 while (dv < dvl)
534 *dv++ = c;
535 }
536
537 /* --- @mpx_usubn@ --- *
538 *
539 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
540 * @n@ = subtrahend
541 *
542 * Returns: ---
543 *
544 * Use: Subtracts a small integer from a multiprecision number.
545 */
546
547 void mpx_usubn(mpw *dv, mpw *dvl, mpw n) { MPX_USUBN(dv, dvl, n); }
548
549 /* --- @mpx_umul@ --- *
550 *
551 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
552 * @const mpw *av, *avl@ = multiplicand vector base and limit
553 * @const mpw *bv, *bvl@ = multiplier vector base and limit
554 *
555 * Returns: ---
556 *
557 * Use: Performs unsigned integer multiplication. If the result
558 * overflows the desination vector, high-order bits are
559 * discarded. The result vector may not overlap the argument
560 * vectors in any way.
561 */
562
563 void mpx_umul(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl,
564 const mpw *bv, const mpw *bvl)
565 {
566 /* --- This is probably worthwhile on a multiply --- */
567
568 MPX_SHRINK(av, avl);
569 MPX_SHRINK(bv, bvl);
570
571 /* --- Deal with a multiply by zero --- */
572
573 if (bv == bvl) {
574 MPX_ZERO(dv, dvl);
575 return;
576 }
577
578 /* --- Do the initial multiply and initialize the accumulator --- */
579
580 MPX_UMULN(dv, dvl, av, avl, *bv++);
581
582 /* --- Do the remaining multiply/accumulates --- */
583
584 while (dv < dvl && bv < bvl) {
585 mpw m = *bv++;
586 mpw c = 0;
587 const mpw *avv = av;
588 mpw *dvv = ++dv;
589
590 while (avv < avl) {
591 mpd x;
592 if (dvv >= dvl)
593 goto next;
594 x = (mpd)*dvv + (mpd)m * (mpd)*avv++ + c;
595 *dvv++ = MPW(x);
596 c = x >> MPW_BITS;
597 }
598 MPX_UADDN(dvv, dvl, c);
599 next:;
600 }
601 }
602
603 /* --- @mpx_umuln@ --- *
604 *
605 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
606 * @const mpw *av, *avl@ = multiplicand vector base and limit
607 * @mpw m@ = multiplier
608 *
609 * Returns: ---
610 *
611 * Use: Multiplies a multiprecision integer by a single-word value.
612 * The destination and source may be equal. The destination
613 * is completely cleared after use.
614 */
615
616 void mpx_umuln(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
617 {
618 MPX_UMULN(dv, dvl, av, avl, m);
619 }
620
621 /* --- @mpx_umlan@ --- *
622 *
623 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
624 * @const mpw *av, *avl@ = multiplicand vector base and limit
625 * @mpw m@ = multiplier
626 *
627 * Returns: ---
628 *
629 * Use: Multiplies a multiprecision integer by a single-word value
630 * and adds the result to an accumulator.
631 */
632
633 void mpx_umlan(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl, mpw m)
634 {
635 MPX_UMLAN(dv, dvl, av, avl, m);
636 }
637
638 /* --- @mpx_usqr@ --- *
639 *
640 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
641 * @const mpw *av, *av@ = source vector base and limit
642 *
643 * Returns: ---
644 *
645 * Use: Performs unsigned integer squaring. The result vector must
646 * not overlap the source vector in any way.
647 */
648
649 void mpx_usqr(mpw *dv, mpw *dvl, const mpw *av, const mpw *avl)
650 {
651 MPX_ZERO(dv, dvl);
652
653 /* --- Main loop --- */
654
655 while (av < avl) {
656 const mpw *avv = av;
657 mpw *dvv = dv;
658 mpw a = *av;
659 mpd c;
660
661 /* --- Stop if I've run out of destination --- */
662
663 if (dvv >= dvl)
664 break;
665
666 /* --- Work out the square at this point in the proceedings --- */
667
668 {
669 mpd x = (mpd)a * (mpd)a + *dvv;
670 *dvv++ = MPW(x);
671 c = MPW(x >> MPW_BITS);
672 }
673
674 /* --- Now fix up the rest of the vector upwards --- */
675
676 avv++;
677 while (dvv < dvl && avv < avl) {
678 mpd x = (mpd)a * (mpd)*avv++;
679 mpd y = ((x << 1) & MPW_MAX) + c + *dvv;
680 c = (x >> (MPW_BITS - 1)) + (y >> MPW_BITS);
681 *dvv++ = MPW(y);
682 }
683 while (dvv < dvl && c) {
684 mpd x = c + *dvv;
685 *dvv++ = MPW(x);
686 c = x >> MPW_BITS;
687 }
688
689 /* --- Get ready for the next round --- */
690
691 av++;
692 dv += 2;
693 }
694 }
695
696 /* --- @mpx_udiv@ --- *
697 *
698 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
699 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
700 * @const mpw *dv, *dvl@ = divisor vector base and limit
701 * @mpw *sv, *svl@ = scratch workspace
702 *
703 * Returns: ---
704 *
705 * Use: Performs unsigned integer division. If the result overflows
706 * the quotient vector, high-order bits are discarded. (Clearly
707 * the remainder vector can't overflow.) The various vectors
708 * may not overlap in any way. Yes, I know it's a bit odd
709 * requiring the dividend to be in the result position but it
710 * does make some sense really. The remainder must have
711 * headroom for at least two extra words. The scratch space
712 * must be at least one word larger than the divisor.
713 */
714
715 void mpx_udiv(mpw *qv, mpw *qvl, mpw *rv, mpw *rvl,
716 const mpw *dv, const mpw *dvl,
717 mpw *sv, mpw *svl)
718 {
719 unsigned norm = 0;
720 size_t scale;
721 mpw d, dd;
722
723 /* --- Initialize the quotient --- */
724
725 MPX_ZERO(qv, qvl);
726
727 /* --- Perform some sanity checks --- */
728
729 MPX_SHRINK(dv, dvl);
730 assert(((void)"division by zero in mpx_udiv", dv < dvl));
731
732 /* --- Normalize the divisor --- *
733 *
734 * The algorithm requires that the divisor be at least two digits long.
735 * This is easy to fix.
736 */
737
738 {
739 unsigned b;
740
741 d = dvl[-1];
742 for (b = MPW_BITS / 2; b; b >>= 1) {
743 if (d < (MPW_MAX >> b)) {
744 d <<= b;
745 norm += b;
746 }
747 }
748 if (dv + 1 == dvl)
749 norm += MPW_BITS;
750 }
751
752 /* --- Normalize the dividend/remainder to match --- */
753
754 if (norm) {
755 mpx_lsl(rv, rvl, rv, rvl, norm);
756 mpx_lsl(sv, svl, dv, dvl, norm);
757 dv = sv;
758 dvl = svl;
759 MPX_SHRINK(dv, dvl);
760 }
761
762 MPX_SHRINK(rv, rvl);
763 d = dvl[-1];
764 dd = dvl[-2];
765
766 /* --- Work out the relative scales --- */
767
768 {
769 size_t rvn = rvl - rv;
770 size_t dvn = dvl - dv;
771
772 /* --- If the divisor is clearly larger, notice this --- */
773
774 if (dvn > rvn) {
775 mpx_lsr(rv, rvl, rv, rvl, norm);
776 return;
777 }
778
779 scale = rvn - dvn;
780 }
781
782 /* --- Calculate the most significant quotient digit --- *
783 *
784 * Because the divisor has its top bit set, this can only happen once. The
785 * pointer arithmetic is a little contorted, to make sure that the
786 * behaviour is defined.
787 */
788
789 if (MPX_UCMP(rv + scale, rvl, >=, dv, dvl)) {
790 mpx_usub(rv + scale, rvl, rv + scale, rvl, dv, dvl);
791 if (qvl - qv > scale)
792 qv[scale] = 1;
793 }
794
795 /* --- Now for the main loop --- */
796
797 {
798 mpw *rvv = rvl - 2;
799
800 while (scale) {
801 mpw q;
802 mpd rh;
803
804 /* --- Get an estimate for the next quotient digit --- */
805
806 mpw r = rvv[1];
807 mpw rr = rvv[0];
808 mpw rrr = *--rvv;
809
810 scale--;
811 rh = ((mpd)r << MPW_BITS) | rr;
812 if (r == d)
813 q = MPW_MAX;
814 else
815 q = MPW(rh / d);
816
817 /* --- Refine the estimate --- */
818
819 {
820 mpd yh = (mpd)d * q;
821 mpd yl = (mpd)dd * q;
822
823 if (yl > MPW_MAX) {
824 yh += yl >> MPW_BITS;
825 yl &= MPW_MAX;
826 }
827
828 while (yh > rh || (yh == rh && yl > rrr)) {
829 q--;
830 yh -= d;
831 if (yl < dd) {
832 yh++;
833 yl += MPW_MAX;
834 }
835 yl -= dd;
836 }
837 }
838
839 /* --- Remove a chunk from the dividend --- */
840
841 {
842 mpw *svv;
843 const mpw *dvv;
844 mpw mc = 0, sc = 0;
845
846 /* --- Calculate the size of the chunk --- *
847 *
848 * This does the whole job of calculating @r >> scale - qd@.
849 */
850
851 for (svv = rv + scale, dvv = dv;
852 dvv < dvl && svv < rvl;
853 svv++, dvv++) {
854 mpd x = (mpd)*dvv * (mpd)q + mc;
855 mc = x >> MPW_BITS;
856 x = (mpd)*svv - MPW(x) - sc;
857 *svv = MPW(x);
858 if (x >> MPW_BITS)
859 sc = 1;
860 else
861 sc = 0;
862 }
863
864 if (svv < rvl) {
865 mpd x = (mpd)*svv - mc - sc;
866 *svv++ = MPW(x);
867 if (x >> MPW_BITS)
868 sc = MPW_MAX;
869 else
870 sc = 0;
871 while (svv < rvl)
872 *svv++ = sc;
873 }
874
875 /* --- Fix if the quotient was too large --- *
876 *
877 * This doesn't seem to happen very often.
878 */
879
880 if (rvl[-1] > MPW_MAX / 2) {
881 mpx_uadd(rv + scale, rvl, rv + scale, rvl, dv, dvl);
882 q--;
883 }
884 }
885
886 /* --- Done for another iteration --- */
887
888 if (qvl - qv > scale)
889 qv[scale] = q;
890 r = rr;
891 rr = rrr;
892 }
893 }
894
895 /* --- Now fiddle with unnormalizing and things --- */
896
897 mpx_lsr(rv, rvl, rv, rvl, norm);
898 }
899
900 /*----- That's all, folks -------------------------------------------------*/