3 * $Id: mpx.c,v 1.5 1999/11/20 22:23:27 mdw Exp $
5 * Low-level multiprecision arithmetic
7 * (c) 1999 Straylight/Edgeware
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of Catacomb.
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
30 /*----- Revision history --------------------------------------------------*
33 * Revision 1.5 1999/11/20 22:23:27 mdw
34 * Add function versions of some low-level macros with wider use.
36 * Revision 1.4 1999/11/17 18:04:09 mdw
37 * Add two's-complement functionality. Improve mpx_udiv a little by
38 * performing the multiplication of the divisor by q with the subtraction
41 * Revision 1.3 1999/11/13 01:57:31 mdw
42 * Remove stray debugging code.
44 * Revision 1.2 1999/11/13 01:50:59 mdw
45 * Multiprecision routines finished and tested.
47 * Revision 1.1 1999/09/03 08:41:12 mdw
52 /*----- Header files ------------------------------------------------------*/
59 #include <mLib/bits.h>
64 /*----- Loading and storing -----------------------------------------------*/
66 /* --- @mpx_storel@ --- *
68 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
69 * @void *pp@ = pointer to octet array
70 * @size_t sz@ = size of octet array
74 * Use: Stores an MP in an octet array, least significant octet
75 * first. High-end octets are silently discarded if there
76 * isn't enough space for them.
79 void mpx_storel(const mpw
*v
, const mpw
*vl
, void *pp
, size_t sz
)
82 octet
*p
= pp
, *q
= p
+ sz
;
92 *p
++ = U8(w
| n
<< bits
);
104 /* --- @mpx_loadl@ --- *
106 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
107 * @const void *pp@ = pointer to octet array
108 * @size_t sz@ = size of octet array
112 * Use: Loads an MP in an octet array, least significant octet
113 * first. High-end octets are ignored if there isn't enough
117 void mpx_loadl(mpw
*v
, mpw
*vl
, const void *pp
, size_t sz
)
121 const octet
*p
= pp
, *q
= p
+ sz
;
130 if (bits
>= MPW_BITS
) {
132 w
= n
>> (MPW_BITS
- bits
+ 8);
142 /* --- @mpx_storeb@ --- *
144 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
145 * @void *pp@ = pointer to octet array
146 * @size_t sz@ = size of octet array
150 * Use: Stores an MP in an octet array, most significant octet
151 * first. High-end octets are silently discarded if there
152 * isn't enough space for them.
155 void mpx_storeb(const mpw
*v
, const mpw
*vl
, void *pp
, size_t sz
)
158 octet
*p
= pp
, *q
= p
+ sz
;
168 *--q
= U8(w
| n
<< bits
);
170 bits
+= MPW_BITS
- 8;
180 /* --- @mpx_loadb@ --- *
182 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
183 * @const void *pp@ = pointer to octet array
184 * @size_t sz@ = size of octet array
188 * Use: Loads an MP in an octet array, most significant octet
189 * first. High-end octets are ignored if there isn't enough
193 void mpx_loadb(mpw
*v
, mpw
*vl
, const void *pp
, size_t sz
)
197 const octet
*p
= pp
, *q
= p
+ sz
;
206 if (bits
>= MPW_BITS
) {
208 w
= n
>> (MPW_BITS
- bits
+ 8);
218 /*----- Logical shifting --------------------------------------------------*/
220 /* --- @mpx_lsl@ --- *
222 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
223 * @const mpw *av, *avl@ = source vector base and limit
224 * @size_t n@ = number of bit positions to shift by
228 * Use: Performs a logical shift left operation on an integer.
231 void mpx_lsl(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
, size_t n
)
236 /* --- Trivial special case --- */
239 MPX_COPY(dv
, dvl
, av
, avl
);
241 /* --- Single bit shifting --- */
250 *dv
++ = MPW((t
<< 1) | w
);
251 w
= t
>> (MPW_BITS
- 1);
260 /* --- Break out word and bit shifts for more sophisticated work --- */
265 /* --- Handle a shift by a multiple of the word size --- */
268 MPX_COPY(dv
+ nw
, dvl
, av
, avl
);
269 memset(dv
, 0, MPWS(nw
));
272 /* --- And finally the difficult case --- *
274 * This is a little convoluted, because I have to start from the end and
275 * work backwards to avoid overwriting the source, if they're both the same
281 size_t nr
= MPW_BITS
- nb
;
282 size_t dvn
= dvl
- dv
;
283 size_t avn
= avl
- av
;
290 if (dvn
> avn
+ nw
) {
291 size_t off
= avn
+ nw
+ 1;
292 MPX_ZERO(dv
+ off
, dvl
);
302 *--dvl
= (t
>> nr
) | w
;
313 /* --- @mpx_lsr@ --- *
315 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
316 * @const mpw *av, *avl@ = source vector base and limit
317 * @size_t n@ = number of bit positions to shift by
321 * Use: Performs a logical shift right operation on an integer.
324 void mpx_lsr(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
, size_t n
)
329 /* --- Trivial special case --- */
332 MPX_COPY(dv
, dvl
, av
, avl
);
334 /* --- Single bit shifting --- */
343 *dv
++ = MPW((t
<< (MPW_BITS
- 1)) | w
);
353 /* --- Break out word and bit shifts for more sophisticated work --- */
358 /* --- Handle a shift by a multiple of the word size --- */
361 MPX_COPY(dv
, dvl
, av
+ nw
, avl
);
363 /* --- And finally the difficult case --- */
367 size_t nr
= MPW_BITS
- nb
;
376 *dv
++ = MPW((w
>> nb
) | (t
<< nr
));
380 *dv
++ = MPW(w
>> nb
);
388 /*----- Unsigned arithmetic -----------------------------------------------*/
390 /* --- @mpx_2c@ --- *
392 * Arguments: @mpw *dv, *dvl@ = destination vector
393 * @const mpw *v, *vl@ = source vector
397 * Use: Calculates the two's complement of @v@.
400 void mpx_2c(mpw
*dv
, mpw
*dvl
, const mpw
*v
, const mpw
*vl
)
403 while (dv
< dvl
&& v
< vl
)
404 *dv
++ = c
= MPW(~*v
++);
411 MPX_UADDN(dv
, dvl
, 1);
414 /* --- @mpx_ucmp@ --- *
416 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
417 * @const mpw *bv, *bvl@ = second argument vector base and limit
419 * Returns: Less than, equal to, or greater than zero depending on
420 * whether @a@ is less than, equal to or greater than @b@,
423 * Use: Performs an unsigned integer comparison.
426 int mpx_ucmp(const mpw
*av
, const mpw
*avl
, const mpw
*bv
, const mpw
*bvl
)
431 if (avl
- av
> bvl
- bv
)
433 else if (avl
- av
< bvl
- bv
)
435 else while (avl
> av
) {
436 mpw a
= *--avl
, b
= *--bvl
;
445 /* --- @mpx_uadd@ --- *
447 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
448 * @const mpw *av, *avl@ = first addend vector base and limit
449 * @const mpw *bv, *bvl@ = second addend vector base and limit
453 * Use: Performs unsigned integer addition. If the result overflows
454 * the destination vector, high-order bits are discarded. This
455 * means that two's complement addition happens more or less for
456 * free, although that's more a side-effect than anything else.
457 * The result vector may be equal to either or both source
458 * vectors, but may not otherwise overlap them.
461 void mpx_uadd(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
,
462 const mpw
*bv
, const mpw
*bvl
)
466 while (av
< avl
|| bv
< bvl
) {
471 a
= (av
< avl
) ?
*av
++ : 0;
472 b
= (bv
< bvl
) ?
*bv
++ : 0;
473 x
= (mpd
)a
+ (mpd
)b
+ c
;
483 /* --- @mpx_uaddn@ --- *
485 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
486 * @mpw n@ = other addend
490 * Use: Adds a small integer to a multiprecision number.
493 void mpx_uaddn(mpw
*dv
, mpw
*dvl
, mpw n
) { MPX_UADDN(dv
, dvl
, n
); }
495 /* --- @mpx_usub@ --- *
497 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
498 * @const mpw *av, *avl@ = first argument vector base and limit
499 * @const mpw *bv, *bvl@ = second argument vector base and limit
503 * Use: Performs unsigned integer subtraction. If the result
504 * overflows the destination vector, high-order bits are
505 * discarded. This means that two's complement subtraction
506 * happens more or less for free, althuogh that's more a side-
507 * effect than anything else. The result vector may be equal to
508 * either or both source vectors, but may not otherwise overlap
512 void mpx_usub(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
,
513 const mpw
*bv
, const mpw
*bvl
)
517 while (av
< avl
|| bv
< bvl
) {
522 a
= (av
< avl
) ?
*av
++ : 0;
523 b
= (bv
< bvl
) ?
*bv
++ : 0;
524 x
= (mpd
)a
- (mpd
)b
- c
;
537 /* --- @mpx_usubn@ --- *
539 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
544 * Use: Subtracts a small integer from a multiprecision number.
547 void mpx_usubn(mpw
*dv
, mpw
*dvl
, mpw n
) { MPX_USUBN(dv
, dvl
, n
); }
549 /* --- @mpx_umul@ --- *
551 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
552 * @const mpw *av, *avl@ = multiplicand vector base and limit
553 * @const mpw *bv, *bvl@ = multiplier vector base and limit
557 * Use: Performs unsigned integer multiplication. If the result
558 * overflows the desination vector, high-order bits are
559 * discarded. The result vector may not overlap the argument
560 * vectors in any way.
563 void mpx_umul(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
,
564 const mpw
*bv
, const mpw
*bvl
)
566 /* --- This is probably worthwhile on a multiply --- */
571 /* --- Deal with a multiply by zero --- */
578 /* --- Do the initial multiply and initialize the accumulator --- */
580 MPX_UMULN(dv
, dvl
, av
, avl
, *bv
++);
582 /* --- Do the remaining multiply/accumulates --- */
584 while (dv
< dvl
&& bv
< bvl
) {
594 x
= (mpd
)*dvv
+ (mpd
)m
* (mpd
)*avv
++ + c
;
598 MPX_UADDN(dvv
, dvl
, c
);
603 /* --- @mpx_umuln@ --- *
605 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
606 * @const mpw *av, *avl@ = multiplicand vector base and limit
607 * @mpw m@ = multiplier
611 * Use: Multiplies a multiprecision integer by a single-word value.
612 * The destination and source may be equal. The destination
613 * is completely cleared after use.
616 void mpx_umuln(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
, mpw m
)
618 MPX_UMULN(dv
, dvl
, av
, avl
, m
);
621 /* --- @mpx_umlan@ --- *
623 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
624 * @const mpw *av, *avl@ = multiplicand vector base and limit
625 * @mpw m@ = multiplier
629 * Use: Multiplies a multiprecision integer by a single-word value
630 * and adds the result to an accumulator.
633 void mpx_umlan(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
, mpw m
)
635 MPX_UMLAN(dv
, dvl
, av
, avl
, m
);
638 /* --- @mpx_usqr@ --- *
640 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
641 * @const mpw *av, *av@ = source vector base and limit
645 * Use: Performs unsigned integer squaring. The result vector must
646 * not overlap the source vector in any way.
649 void mpx_usqr(mpw
*dv
, mpw
*dvl
, const mpw
*av
, const mpw
*avl
)
653 /* --- Main loop --- */
661 /* --- Stop if I've run out of destination --- */
666 /* --- Work out the square at this point in the proceedings --- */
669 mpd x
= (mpd
)a
* (mpd
)a
+ *dvv
;
671 c
= MPW(x
>> MPW_BITS
);
674 /* --- Now fix up the rest of the vector upwards --- */
677 while (dvv
< dvl
&& avv
< avl
) {
678 mpd x
= (mpd
)a
* (mpd
)*avv
++;
679 mpd y
= ((x
<< 1) & MPW_MAX
) + c
+ *dvv
;
680 c
= (x
>> (MPW_BITS
- 1)) + (y
>> MPW_BITS
);
683 while (dvv
< dvl
&& c
) {
689 /* --- Get ready for the next round --- */
696 /* --- @mpx_udiv@ --- *
698 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
699 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
700 * @const mpw *dv, *dvl@ = divisor vector base and limit
701 * @mpw *sv, *svl@ = scratch workspace
705 * Use: Performs unsigned integer division. If the result overflows
706 * the quotient vector, high-order bits are discarded. (Clearly
707 * the remainder vector can't overflow.) The various vectors
708 * may not overlap in any way. Yes, I know it's a bit odd
709 * requiring the dividend to be in the result position but it
710 * does make some sense really. The remainder must have
711 * headroom for at least two extra words. The scratch space
712 * must be at least one word larger than the divisor.
715 void mpx_udiv(mpw
*qv
, mpw
*qvl
, mpw
*rv
, mpw
*rvl
,
716 const mpw
*dv
, const mpw
*dvl
,
723 /* --- Initialize the quotient --- */
727 /* --- Perform some sanity checks --- */
730 assert(((void)"division by zero in mpx_udiv", dv
< dvl
));
732 /* --- Normalize the divisor --- *
734 * The algorithm requires that the divisor be at least two digits long.
735 * This is easy to fix.
742 for (b
= MPW_BITS
/ 2; b
; b
>>= 1) {
743 if (d
< (MPW_MAX
>> b
)) {
752 /* --- Normalize the dividend/remainder to match --- */
755 mpx_lsl(rv
, rvl
, rv
, rvl
, norm
);
756 mpx_lsl(sv
, svl
, dv
, dvl
, norm
);
766 /* --- Work out the relative scales --- */
769 size_t rvn
= rvl
- rv
;
770 size_t dvn
= dvl
- dv
;
772 /* --- If the divisor is clearly larger, notice this --- */
775 mpx_lsr(rv
, rvl
, rv
, rvl
, norm
);
782 /* --- Calculate the most significant quotient digit --- *
784 * Because the divisor has its top bit set, this can only happen once. The
785 * pointer arithmetic is a little contorted, to make sure that the
786 * behaviour is defined.
789 if (MPX_UCMP(rv
+ scale
, rvl
, >=, dv
, dvl
)) {
790 mpx_usub(rv
+ scale
, rvl
, rv
+ scale
, rvl
, dv
, dvl
);
791 if (qvl
- qv
> scale
)
795 /* --- Now for the main loop --- */
804 /* --- Get an estimate for the next quotient digit --- */
811 rh
= ((mpd
)r
<< MPW_BITS
) | rr
;
817 /* --- Refine the estimate --- */
821 mpd yl
= (mpd
)dd
* q
;
824 yh
+= yl
>> MPW_BITS
;
828 while (yh
> rh
|| (yh
== rh
&& yl
> rrr
)) {
839 /* --- Remove a chunk from the dividend --- */
846 /* --- Calculate the size of the chunk --- *
848 * This does the whole job of calculating @r >> scale - qd@.
851 for (svv
= rv
+ scale
, dvv
= dv
;
852 dvv
< dvl
&& svv
< rvl
;
854 mpd x
= (mpd
)*dvv
* (mpd
)q
+ mc
;
856 x
= (mpd
)*svv
- MPW(x
) - sc
;
865 mpd x
= (mpd
)*svv
- mc
- sc
;
875 /* --- Fix if the quotient was too large --- *
877 * This doesn't seem to happen very often.
880 if (rvl
[-1] > MPW_MAX
/ 2) {
881 mpx_uadd(rv
+ scale
, rvl
, rv
+ scale
, rvl
, dv
, dvl
);
886 /* --- Done for another iteration --- */
888 if (qvl
- qv
> scale
)
895 /* --- Now fiddle with unnormalizing and things --- */
897 mpx_lsr(rv
, rvl
, rv
, rvl
, norm
);
900 /*----- That's all, folks -------------------------------------------------*/