Rename Karatsuba constants now that we have @gfx_kmul@ too.
[u/mdw/catacomb] / mpx.h
1 /* -*-c-*-
2 *
3 * $Id: mpx.h,v 1.11 2000/10/08 15:48:35 mdw Exp $
4 *
5 * Low level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10 /*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30 /*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.h,v $
33 * Revision 1.11 2000/10/08 15:48:35 mdw
34 * Rename Karatsuba constants now that we have @gfx_kmul@ too.
35 *
36 * Revision 1.10 2000/10/08 12:06:12 mdw
37 * Provide @mpx_ueq@ for rapidly testing equality of two integers.
38 *
39 * Revision 1.9 1999/12/22 15:49:07 mdw
40 * New function for division by a small integer.
41 *
42 * Revision 1.8 1999/12/11 10:57:43 mdw
43 * Karatsuba squaring algorithm.
44 *
45 * Revision 1.7 1999/12/11 01:51:28 mdw
46 * Change Karatsuba parameters slightly.
47 *
48 * Revision 1.6 1999/12/10 23:23:51 mdw
49 * Karatsuba-Ofman multiplication algorithm.
50 *
51 * Revision 1.5 1999/11/20 22:23:27 mdw
52 * Add function versions of some low-level macros with wider use.
53 *
54 * Revision 1.4 1999/11/17 18:04:43 mdw
55 * Add two's complement support. Fix a bug in MPX_UMLAN.
56 *
57 * Revision 1.3 1999/11/13 01:51:29 mdw
58 * Minor interface changes. Should be stable now.
59 *
60 * Revision 1.2 1999/11/11 17:47:55 mdw
61 * Minor changes for different `mptypes.h' format.
62 *
63 * Revision 1.1 1999/09/03 08:41:12 mdw
64 * Initial import.
65 *
66 */
67
68 #ifndef CATACOMB_MPX_H
69 #define CATACOMB_MPX_H
70
71 #ifdef __cplusplus
72 extern "C" {
73 #endif
74
75 /*----- The idea ----------------------------------------------------------*
76 *
77 * This file provides functions and macros which work on vectors of words as
78 * unsigned multiprecision integers. The interface works in terms of base
79 * and limit pointers (i.e., a pointer to the start of a vector, and a
80 * pointer just past its end) rather than base pointer and length, because
81 * that requires more arithmetic and state to work on.
82 *
83 * The interfaces are slightly bizarre in other ways. Try to use the
84 * higher-level functions where you can: they're rather better designed to
85 * actually be friendly and useful.
86 */
87
88 /*----- Header files ------------------------------------------------------*/
89
90 #include <string.h>
91
92 #ifndef CATACOMB_MPW_H
93 # include "mpw.h"
94 #endif
95
96 /*----- General manipulation ----------------------------------------------*/
97
98 /* --- @MPX_SHRINK@ --- *
99 *
100 * Arguments: @const mpw *v@ = pointer to vector of words
101 * @const mpw *vl@ = (updated) current limit of vector
102 *
103 * Use: Shrinks down the limit of a multiprecision integer vector.
104 */
105
106 #define MPX_SHRINK(v, vl) do { \
107 const mpw *_vv = (v), *_vvl = (vl); \
108 while (_vvl > _vv && !_vvl[-1]) \
109 _vvl--; \
110 (vl) = (mpw *)_vvl; \
111 } while (0)
112
113 /* --- @MPX_BITS@ --- *
114 *
115 * Arguments: @unsigned long b@ = result variable
116 * @const mpw *v@ = pointer to array of words
117 * @const mpw *vl@ = limit of vector (from @MPX_SHRINK@)
118 *
119 * Use: Calculates the number of bits in a multiprecision value.
120 */
121
122 #define MPX_BITS(b, v, vl) do { \
123 const mpw *_v = (v), *_vl = (vl); \
124 MPX_SHRINK(_v, _vl); \
125 if (_v == _vl) \
126 (b) = 0; \
127 else { \
128 unsigned long _b = MPW_BITS * (_vl - _v - 1) + 1; \
129 mpw _w = _vl[-1]; \
130 unsigned _k = MPW_BITS / 2; \
131 while (_k) { \
132 if (_w >> _k) { \
133 _w >>= _k; \
134 _b += _k; \
135 } \
136 _k >>= 1; \
137 } \
138 (b) = _b; \
139 } \
140 } while (0)
141
142 /* --- @MPX_OCTETS@ --- *
143 *
144 * Arguments: @size_t o@ = result variable
145 * @const mpw *v, *vl@ = pointer to array of words
146 *
147 * Use: Calculates the number of octets in a multiprecision value.
148 */
149
150 #define MPX_OCTETS(o, v, vl) do { \
151 const mpw *_v = (v), *_vl = (vl); \
152 MPX_SHRINK(_v, _vl); \
153 if (_v == _vl) \
154 (o) = 0; \
155 else { \
156 size_t _o = (MPW_BITS / 8) * (_vl - _v - 1); \
157 mpw _w = _vl[-1]; \
158 unsigned _k = MPW_BITS / 2; \
159 while (_k >= 8) { \
160 if (_w >> _k) { \
161 _w >>= _k; \
162 _o += _k >> 3; \
163 } \
164 _k >>= 1; \
165 } \
166 if (_w) \
167 _o++; \
168 (o) = _o; \
169 } \
170 } while (0)
171
172 /* --- @MPX_COPY@ --- *
173 *
174 * Arguments: @dv, dvl@ = destination vector base and limit
175 * @av, avl@ = source vector base and limit
176 *
177 * Use: Copies a multiprecision integer.
178 */
179
180 #define MPX_COPY(dv, dvl, av, avl) do { \
181 mpw *_dv = (dv), *_dvl = (dvl); \
182 size_t _dn = _dvl - _dv; \
183 const mpw *_av = (av), *_avl = (avl); \
184 size_t _an = _avl - _av; \
185 if (_av == _dv) { \
186 if (_dvl > _avl) \
187 memset(_dv, 0, MPWS(_dn - _an)); \
188 } else if (_an >= _dn) \
189 memmove(_dv, _av, MPWS(_dn)); \
190 else { \
191 memmove(_dv, _av, MPWS(_an)); \
192 memset(_dv + _an, 0, MPWS(_dn - _an)); \
193 } \
194 } while (0)
195
196 /* --- @MPX_ZERO@ --- *
197 *
198 * Arguments: @v, vl@ = base and limit of vector to clear
199 *
200 * Use: Zeroes the area between the two vector pointers.
201 */
202
203 #define MPX_ZERO(v, vl) do { \
204 mpw *_v = (v), *_vl = (vl); \
205 if (_v < _vl) \
206 memset(_v, 0, MPWS(_vl - _v)); \
207 } while (0)
208
209 /*----- Loading and storing -----------------------------------------------*/
210
211 /* --- @mpx_storel@ --- *
212 *
213 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
214 * @void *p@ = pointer to octet array
215 * @size_t sz@ = size of octet array
216 *
217 * Returns: ---
218 *
219 * Use: Stores an MP in an octet array, least significant octet
220 * first. High-end octets are silently discarded if there
221 * isn't enough space for them.
222 */
223
224 extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
225 void */*p*/, size_t /*sz*/);
226
227 /* --- @mpx_loadl@ --- *
228 *
229 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
230 * @const void *p@ = pointer to octet array
231 * @size_t sz@ = size of octet array
232 *
233 * Returns: ---
234 *
235 * Use: Loads an MP in an octet array, least significant octet
236 * first. High-end octets are ignored if there isn't enough
237 * space for them.
238 */
239
240 extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
241 const void */*p*/, size_t /*sz*/);
242
243 /* --- @mpx_storeb@ --- *
244 *
245 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
246 * @void *p@ = pointer to octet array
247 * @size_t sz@ = size of octet array
248 *
249 * Returns: ---
250 *
251 * Use: Stores an MP in an octet array, most significant octet
252 * first. High-end octets are silently discarded if there
253 * isn't enough space for them.
254 */
255
256 extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
257 void */*p*/, size_t /*sz*/);
258
259 /* --- @mpx_loadb@ --- *
260 *
261 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
262 * @const void *p@ = pointer to octet array
263 * @size_t sz@ = size of octet array
264 *
265 * Returns: ---
266 *
267 * Use: Loads an MP in an octet array, most significant octet
268 * first. High-end octets are ignored if there isn't enough
269 * space for them.
270 */
271
272 extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
273 const void */*p*/, size_t /*sz*/);
274
275 /*----- Logical shifting --------------------------------------------------*/
276
277 /* --- @mpx_lsl@ --- *
278 *
279 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
280 * @const mpw *av, *avl@ = source vector base and limit
281 * @size_t n@ = number of bit positions to shift by
282 *
283 * Returns: ---
284 *
285 * Use: Performs a logical shift left operation on an integer.
286 */
287
288 extern void mpx_lsl(mpw */*dv*/, mpw */*dvl*/,
289 const mpw */*av*/, const mpw */*avl*/,
290 size_t /*n*/);
291
292 /* --- @mpx_lsr@ --- *
293 *
294 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
295 * @const mpw *av, *avl@ = source vector base and limit
296 * @size_t n@ = number of bit positions to shift by
297 *
298 * Returns: ---
299 *
300 * Use: Performs a logical shift right operation on an integer.
301 */
302
303 extern void mpx_lsr(mpw */*dv*/, mpw */*dvl*/,
304 const mpw */*av*/, const mpw */*avl*/,
305 size_t /*n*/);
306
307 /*----- Unsigned arithmetic -----------------------------------------------*/
308
309 /* --- @mpx_2c@ --- *
310 *
311 * Arguments: @mpw *dv, *dvl@ = destination vector
312 * @const mpw *v, *vl@ = source vector
313 *
314 * Returns: ---
315 *
316 * Use: Calculates the two's complement of @v@.
317 */
318
319 extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
320 const mpw */*v*/, const mpw */*vl*/);
321
322 /* --- @mpx_ueq@ --- *
323 *
324 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
325 * @const mpw *bv, *bvl@ = second argument vector base and limit
326 *
327 * Returns: Nonzero if the two vectors are equal.
328 *
329 * Use: Performs an unsigned integer test for equality.
330 */
331
332 extern int mpx_ueq(const mpw */*av*/, const mpw */*avl*/,
333 const mpw */*bv*/, const mpw */*bvl*/);
334
335 /* --- @mpx_ucmp@ --- *
336 *
337 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
338 * @const mpw *bv, *bvl@ = second argument vector base and limit
339 *
340 * Returns: Less than, equal to, or greater than zero depending on
341 * whether @a@ is less than, equal to or greater than @b@,
342 * respectively.
343 *
344 * Use: Performs an unsigned integer comparison.
345 */
346
347 #define MPX_UCMP(av, avl, op, dv, dvl) \
348 (mpx_ucmp((av), (avl), (dv), (dvl)) op 0)
349
350 extern int mpx_ucmp(const mpw */*av*/, const mpw */*avl*/,
351 const mpw */*bv*/, const mpw */*bvl*/);
352
353 /* --- @mpx_uadd@ --- *
354 *
355 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
356 * @const mpw *av, *avl@ = first addend vector base and limit
357 * @const mpw *bv, *bvl@ = second addend vector base and limit
358 *
359 * Returns: ---
360 *
361 * Use: Performs unsigned integer addition. If the result overflows
362 * the destination vector, high-order bits are discarded. This
363 * means that two's complement addition happens more or less for
364 * free, although that's more a side-effect than anything else.
365 * The result vector may be equal to either or both source
366 * vectors, but may not otherwise overlap them.
367 */
368
369 extern void mpx_uadd(mpw */*dv*/, mpw */*dvl*/,
370 const mpw */*av*/, const mpw */*avl*/,
371 const mpw */*bv*/, const mpw */*bvl*/);
372
373 /* --- @mpx_uaddn@ --- *
374 *
375 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
376 * @mpw n@ = other addend
377 *
378 * Returns: ---
379 *
380 * Use: Adds a small integer to a multiprecision number.
381 */
382
383 #define MPX_UADDN(dv, dvl, n) do { \
384 mpw *_ddv = (dv), *_ddvl = (dvl); \
385 mpw _c = (n); \
386 \
387 while (_c && _ddv < _ddvl) { \
388 mpd _x = (mpd)*_ddv + (mpd)_c; \
389 *_ddv++ = MPW(_x); \
390 _c = _x >> MPW_BITS; \
391 } \
392 } while (0)
393
394 extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
395
396 /* --- @mpx_usub@ --- *
397 *
398 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
399 * @const mpw *av, *avl@ = first argument vector base and limit
400 * @const mpw *bv, *bvl@ = second argument vector base and limit
401 *
402 * Returns: ---
403 *
404 * Use: Performs unsigned integer subtraction. If the result
405 * overflows the destination vector, high-order bits are
406 * discarded. This means that two's complement subtraction
407 * happens more or less for free, although that's more a side-
408 * effect than anything else. The result vector may be equal to
409 * either or both source vectors, but may not otherwise overlap
410 * them.
411 */
412
413 extern void mpx_usub(mpw */*dv*/, mpw */*dvl*/,
414 const mpw */*av*/, const mpw */*avl*/,
415 const mpw */*bv*/, const mpw */*bvl*/);
416
417 /* --- @mpx_usubn@ --- *
418 *
419 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
420 * @n@ = subtrahend
421 *
422 * Returns: ---
423 *
424 * Use: Subtracts a small integer from a multiprecision number.
425 */
426
427 #define MPX_USUBN(dv, dvl, n) do { \
428 mpw *_ddv = (dv), *_ddvl = (dvl); \
429 mpw _c = (n); \
430 \
431 while (_ddv < _ddvl) { \
432 mpd _x = (mpd)*_ddv - (mpd)_c; \
433 *_ddv++ = MPW(_x); \
434 if (_x >> MPW_BITS) \
435 _c = 1; \
436 else \
437 break; \
438 } \
439 } while (0)
440
441 extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
442
443 /* --- @mpx_umul@ --- *
444 *
445 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
446 * @const mpw *av, *avl@ = multiplicand vector base and limit
447 * @const mpw *bv, *bvl@ = multiplier vector base and limit
448 *
449 * Returns: ---
450 *
451 * Use: Performs unsigned integer multiplication. If the result
452 * overflows the desination vector, high-order bits are
453 * discarded. The result vector may not overlap the argument
454 * vectors in any way.
455 */
456
457 extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
458 const mpw */*av*/, const mpw */*avl*/,
459 const mpw */*bv*/, const mpw */*bvl*/);
460
461 /* --- @mpx_umuln@ --- *
462 *
463 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
464 * @const mpw *av, *avl@ = multiplicand vector base and limit
465 * @mpw m@ = multiplier
466 *
467 * Returns: ---
468 *
469 * Use: Multiplies a multiprecision integer by a single-word value.
470 * The destination and source may be equal. The destination
471 * is completely cleared after use.
472 */
473
474 #define MPX_UMULN(dv, dvl, av, avl, m) do { \
475 mpw *_dv = (dv), *_dvl = (dvl); \
476 const mpw *_av = (av), *_avl = (avl); \
477 mpw _c = 0; \
478 mpd _m = (m); \
479 \
480 while (_av < _avl) { \
481 mpd _x; \
482 if (_dv >= _dvl) \
483 break; \
484 _x = (mpd)_m * (mpd)*_av++ + _c; \
485 *_dv++ = MPW(_x); \
486 _c = _x >> MPW_BITS; \
487 } \
488 if (_dv < _dvl) { \
489 *_dv++ = MPW(_c); \
490 MPX_ZERO(_dv, _dvl); \
491 } \
492 } while (0)
493
494 extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
495 const mpw */*av*/, const mpw */*avl*/, mpw m);
496
497 /* --- @mpx_umlan@ --- *
498 *
499 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
500 * @const mpw *av, *avl@ = multiplicand vector base and limit
501 * @mpw m@ = multiplier
502 *
503 * Returns: ---
504 *
505 * Use: Multiplies a multiprecision integer by a single-word value
506 * and adds the result to an accumulator.
507 */
508
509 #define MPX_UMLAN(dv, dvl, av, avl, m) do { \
510 mpw *_dv = (dv), *_dvl = (dvl); \
511 const mpw *_av = (av), *_avl = (avl); \
512 mpw _cc = 0; \
513 mpd _m = (m); \
514 \
515 while (_dv < _dvl && _av < _avl) { \
516 mpd _x; \
517 _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
518 *_dv++ = MPW(_x); \
519 _cc = _x >> MPW_BITS; \
520 } \
521 MPX_UADDN(_dv, _dvl, _cc); \
522 } while (0)
523
524 extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
525 const mpw */*av*/, const mpw */*avl*/, mpw m);
526
527 /* --- @mpx_usqr@ --- *
528 *
529 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
530 * @const mpw *av, *av@ = source vector base and limit
531 *
532 * Returns: ---
533 *
534 * Use: Performs unsigned integer squaring. The result vector must
535 * not overlap the source vector in any way.
536 */
537
538 extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
539 const mpw */*av*/, const mpw */*avl*/);
540
541 /* --- @mpx_udiv@ --- *
542 *
543 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
544 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
545 * @const mpw *dv, *dvl@ = divisor vector base and limit
546 * @mpw *sv, *svl@ = scratch workspace
547 *
548 * Returns: ---
549 *
550 * Use: Performs unsigned integer division. If the result overflows
551 * the quotient vector, high-order bits are discarded. (Clearly
552 * the remainder vector can't overflow.) The various vectors
553 * may not overlap in any way. Yes, I know it's a bit odd
554 * requiring the dividend to be in the result position but it
555 * does make some sense really. The remainder must have
556 * headroom for at least two extra words. The scratch space
557 * must be at least one word larger than the divisor.
558 */
559
560 extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
561 const mpw */*dv*/, const mpw */*dvl*/,
562 mpw */*sv*/, mpw */*svl*/);
563
564 /* --- @mpx_udivn@ --- *
565 *
566 * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap
567 * dividend)
568 * @const mpw *rv, *rvl@ = dividend
569 * @mpw d@ = single-precision divisor
570 *
571 * Returns: Remainder after divison.
572 *
573 * Use: Performs a single-precision division operation.
574 */
575
576 extern mpw mpx_udivn(mpw */*qv*/, mpw */*qvl*/,
577 const mpw */*rv*/, const mpw */*rvl*/, mpw /*d*/);
578
579 /*----- Karatsuba multiplication algorithms -------------------------------*/
580
581 /* --- @MPK_THRESH@ --- *
582 *
583 * This is the limiting length for using Karatsuba algorithms. It's best to
584 * use the simpler classical multiplication method on numbers smaller than
585 * this.
586 */
587
588 #define MPK_THRESH 16
589
590 /* --- @MPK_SLOP@ --- *
591 *
592 * The extra number of words required as scratch space by the Karatsuba
593 * routines. This is a (generous) guess, since the actual amount of space
594 * required is proportional to the recursion depth.
595 */
596
597 #define MPK_SLOP 64
598
599 /* --- @mpx_kmul@ --- *
600 *
601 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
602 * @const mpw *av, *avl@ = pointer to first argument
603 * @const mpw *bv, *bvl@ = pointer to second argument
604 * @mpw *sv, *svl@ = pointer to scratch workspace
605 *
606 * Returns: ---
607 *
608 * Use: Multiplies two multiprecision integers using Karatsuba's
609 * algorithm. This is rather faster than traditional long
610 * multiplication (e.g., @mpx_umul@) on large numbers, although
611 * more expensive on small ones.
612 *
613 * The destination and scratch buffers must be twice as large as
614 * the larger argument. The scratch space must be twice as
615 * large as the larger argument, plus the magic number
616 * @MPK_SLOP@.
617 */
618
619 extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
620 const mpw */*av*/, const mpw */*avl*/,
621 const mpw */*bv*/, const mpw */*bvl*/,
622 mpw */*sv*/, mpw */*svl*/);
623
624 /* --- @mpx_ksqr@ --- *
625 *
626 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
627 * @const mpw *av, *avl@ = pointer to first argument
628 * @mpw *sv, *svl@ = pointer to scratch workspace
629 *
630 * Returns: ---
631 *
632 * Use: Squares a multiprecision integers using something similar to
633 * Karatsuba's multiplication algorithm. This is rather faster
634 * than traditional long multiplication (e.g., @mpx_umul@) on
635 * large numbers, although more expensive on small ones, and
636 * rather simpler than full-blown Karatsuba multiplication.
637 *
638 * The destination must be twice as large as the argument. The
639 * scratch space must be twice as large as the argument, plus
640 * the magic number @MPK_SLOP@.
641 */
642
643 extern void mpx_ksqr(mpw */*dv*/, mpw */*dvl*/,
644 const mpw */*av*/, const mpw */*avl*/,
645 mpw */*sv*/, mpw */*svl*/);
646
647 /*----- That's all, folks -------------------------------------------------*/
648
649 #ifdef __cplusplus
650 }
651 #endif
652
653 #endif