More changes. Still embryonic.
[u/mdw/catacomb] / mpx.h
CommitLineData
d03ab969 1/* -*-c-*-
2 *
5bf74dea 3 * $Id: mpx.h,v 1.8 1999/12/11 10:57:43 mdw Exp $
d03ab969 4 *
5 * Low level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.h,v $
5bf74dea 33 * Revision 1.8 1999/12/11 10:57:43 mdw
34 * Karatsuba squaring algorithm.
35 *
652a6acf 36 * Revision 1.7 1999/12/11 01:51:28 mdw
37 * Change Karatsuba parameters slightly.
38 *
a86e33af 39 * Revision 1.6 1999/12/10 23:23:51 mdw
40 * Karatsuba-Ofman multiplication algorithm.
41 *
dd517851 42 * Revision 1.5 1999/11/20 22:23:27 mdw
43 * Add function versions of some low-level macros with wider use.
44 *
7c13f461 45 * Revision 1.4 1999/11/17 18:04:43 mdw
46 * Add two's complement support. Fix a bug in MPX_UMLAN.
47 *
3c9ede17 48 * Revision 1.3 1999/11/13 01:51:29 mdw
49 * Minor interface changes. Should be stable now.
50 *
b9b1c853 51 * Revision 1.2 1999/11/11 17:47:55 mdw
52 * Minor changes for different `mptypes.h' format.
53 *
d03ab969 54 * Revision 1.1 1999/09/03 08:41:12 mdw
55 * Initial import.
56 *
57 */
58
a86e33af 59#ifndef CATACOMB_MPX_H
60#define CATACOMB_MPX_H
d03ab969 61
62#ifdef __cplusplus
63 extern "C" {
64#endif
65
66/*----- The idea ----------------------------------------------------------*
67 *
68 * This file provides functions and macros which work on vectors of words as
69 * unsigned multiprecision integers. The interface works in terms of base
70 * and limit pointers (i.e., a pointer to the start of a vector, and a
71 * pointer just past its end) rather than base pointer and length, because
72 * that requires more arithmetic and state to work on.
73 *
74 * The interfaces are slightly bizarre in other ways. Try to use the
75 * higher-level functions where you can: they're rather better designed to
76 * actually be friendly and useful.
77 */
78
79/*----- Header files ------------------------------------------------------*/
80
81#include <string.h>
82
a86e33af 83#ifndef CATACOMB_MPW_H
3c9ede17 84# include "mpw.h"
d03ab969 85#endif
86
87/*----- General manipulation ----------------------------------------------*/
88
89/* --- @MPX_SHRINK@ --- *
90 *
91 * Arguments: @const mpw *v@ = pointer to vector of words
92 * @const mpw *vl@ = (updated) current limit of vector
93 *
94 * Use: Shrinks down the limit of a multiprecision integer vector.
95 */
96
97#define MPX_SHRINK(v, vl) do { \
3c9ede17 98 const mpw *_vv = (v), *_vvl = (vl); \
99 while (_vvl > _vv && !_vvl[-1]) \
100 _vvl--; \
101 (vl) = (mpw *)_vvl; \
d03ab969 102} while (0)
103
104/* --- @MPX_BITS@ --- *
105 *
106 * Arguments: @unsigned long b@ = result variable
107 * @const mpw *v@ = pointer to array of words
108 * @const mpw *vl@ = limit of vector (from @MPX_SHRINK@)
109 *
110 * Use: Calculates the number of bits in a multiprecision value.
111 */
112
113#define MPX_BITS(b, v, vl) do { \
114 const mpw *_v = (v), *_vl = (vl); \
3c9ede17 115 MPX_SHRINK(_v, _vl); \
d03ab969 116 if (_v == _vl) \
117 (b) = 0; \
118 else { \
119 unsigned long _b = MPW_BITS * (_vl - _v - 1) + 1; \
120 mpw _w = _vl[-1]; \
121 unsigned _k = MPW_BITS / 2; \
122 while (_k) { \
123 if (_w >> _k) { \
124 _w >>= _k; \
125 _b += _k; \
126 } \
127 _k >>= 1; \
128 } \
129 (b) = _b; \
130 } \
131} while (0)
132
133/* --- @MPX_OCTETS@ --- *
134 *
135 * Arguments: @size_t o@ = result variable
3c9ede17 136 * @const mpw *v, *vl@ = pointer to array of words
d03ab969 137 *
138 * Use: Calculates the number of octets in a multiprecision value.
139 */
140
3c9ede17 141#define MPX_OCTETS(o, v, vl) do { \
d03ab969 142 const mpw *_v = (v), *_vl = (vl); \
3c9ede17 143 MPX_SHRINK(_v, _vl); \
d03ab969 144 if (_v == _vl) \
145 (o) = 0; \
146 else { \
3c9ede17 147 size_t _o = (MPW_BITS / 8) * (_vl - _v - 1); \
d03ab969 148 mpw _w = _vl[-1]; \
149 unsigned _k = MPW_BITS / 2; \
3c9ede17 150 while (_k >= 8) { \
d03ab969 151 if (_w >> _k) { \
152 _w >>= _k; \
3c9ede17 153 _o += _k >> 3; \
d03ab969 154 } \
155 _k >>= 1; \
156 } \
157 if (_w) \
158 _o++; \
159 (o) = _o; \
160 } \
161} while (0)
162
163/* --- @MPX_COPY@ --- *
164 *
165 * Arguments: @dv, dvl@ = destination vector base and limit
166 * @av, avl@ = source vector base and limit
167 *
168 * Use: Copies a multiprecision integer.
169 */
170
3c9ede17 171#define MPX_COPY(dv, dvl, av, avl) do { \
172 mpw *_dv = (dv), *_dvl = (dvl); \
173 size_t _dn = _dvl - _dv; \
174 const mpw *_av = (av), *_avl = (avl); \
175 size_t _an = _avl - _av; \
d03ab969 176 if (_av == _dv) { \
177 if (_dvl > _avl) \
3c9ede17 178 memset(_dv, 0, MPWS(_dn - _an)); \
d03ab969 179 } else if (_an >= _dn) \
180 memmove(_dv, _av, MPWS(_dn)); \
181 else { \
182 memmove(_dv, _av, MPWS(_an)); \
183 memset(_dv + _an, 0, MPWS(_dn - _an)); \
184 } \
185} while (0)
186
187/* --- @MPX_ZERO@ --- *
188 *
189 * Arguments: @v, vl@ = base and limit of vector to clear
190 *
191 * Use: Zeroes the area between the two vector pointers.
192 */
193
3c9ede17 194#define MPX_ZERO(v, vl) do { \
d03ab969 195 mpw *_v = (v), *_vl = (vl); \
3c9ede17 196 if (_v < _vl) \
197 memset(_v, 0, MPWS(_vl - _v)); \
d03ab969 198} while (0)
199
200/*----- Loading and storing -----------------------------------------------*/
201
202/* --- @mpx_storel@ --- *
203 *
204 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 205 * @void *p@ = pointer to octet array
d03ab969 206 * @size_t sz@ = size of octet array
207 *
208 * Returns: ---
209 *
210 * Use: Stores an MP in an octet array, least significant octet
211 * first. High-end octets are silently discarded if there
212 * isn't enough space for them.
213 */
214
215extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 216 void */*p*/, size_t /*sz*/);
d03ab969 217
218/* --- @mpx_loadl@ --- *
219 *
220 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 221 * @const void *p@ = pointer to octet array
d03ab969 222 * @size_t sz@ = size of octet array
223 *
224 * Returns: ---
225 *
226 * Use: Loads an MP in an octet array, least significant octet
227 * first. High-end octets are ignored if there isn't enough
228 * space for them.
229 */
230
231extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
3c9ede17 232 const void */*p*/, size_t /*sz*/);
d03ab969 233
234/* --- @mpx_storeb@ --- *
235 *
236 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 237 * @void *p@ = pointer to octet array
d03ab969 238 * @size_t sz@ = size of octet array
239 *
240 * Returns: ---
241 *
242 * Use: Stores an MP in an octet array, most significant octet
243 * first. High-end octets are silently discarded if there
244 * isn't enough space for them.
245 */
246
247extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 248 void */*p*/, size_t /*sz*/);
d03ab969 249
250/* --- @mpx_loadb@ --- *
251 *
252 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 253 * @const void *p@ = pointer to octet array
d03ab969 254 * @size_t sz@ = size of octet array
255 *
256 * Returns: ---
257 *
258 * Use: Loads an MP in an octet array, most significant octet
259 * first. High-end octets are ignored if there isn't enough
260 * space for them.
261 */
262
263extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
3c9ede17 264 const void */*p*/, size_t /*sz*/);
d03ab969 265
266/*----- Logical shifting --------------------------------------------------*/
267
268/* --- @mpx_lsl@ --- *
269 *
270 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
271 * @const mpw *av, *avl@ = source vector base and limit
272 * @size_t n@ = number of bit positions to shift by
273 *
274 * Returns: ---
275 *
276 * Use: Performs a logical shift left operation on an integer.
277 */
278
279extern void mpx_lsl(mpw */*dv*/, mpw */*dvl*/,
280 const mpw */*av*/, const mpw */*avl*/,
281 size_t /*n*/);
282
283/* --- @mpx_lsr@ --- *
284 *
285 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
286 * @const mpw *av, *avl@ = source vector base and limit
287 * @size_t n@ = number of bit positions to shift by
288 *
289 * Returns: ---
290 *
291 * Use: Performs a logical shift right operation on an integer.
292 */
293
294extern void mpx_lsr(mpw */*dv*/, mpw */*dvl*/,
295 const mpw */*av*/, const mpw */*avl*/,
296 size_t /*n*/);
297
298/*----- Unsigned arithmetic -----------------------------------------------*/
299
7c13f461 300/* --- @mpx_2c@ --- *
301 *
302 * Arguments: @mpw *dv, *dvl@ = destination vector
303 * @const mpw *v, *vl@ = source vector
304 *
305 * Returns: ---
306 *
307 * Use: Calculates the two's complement of @v@.
308 */
309
310extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
311 const mpw */*v*/, const mpw */*vl*/);
312
d03ab969 313/* --- @mpx_ucmp@ --- *
314 *
315 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
316 * @const mpw *bv, *bvl@ = second argument vector base and limit
317 *
318 * Returns: Less than, equal to, or greater than zero depending on
319 * whether @a@ is less than, equal to or greater than @b@,
320 * respectively.
321 *
322 * Use: Performs an unsigned integer comparison.
323 */
324
325#define MPX_UCMP(av, avl, op, dv, dvl) \
326 (mpx_ucmp((av), (avl), (dv), (dvl)) op 0)
327
328extern int mpx_ucmp(const mpw */*av*/, const mpw */*avl*/,
329 const mpw */*bv*/, const mpw */*bvl*/);
330
331/* --- @mpx_uadd@ --- *
332 *
333 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
334 * @const mpw *av, *avl@ = first addend vector base and limit
335 * @const mpw *bv, *bvl@ = second addend vector base and limit
336 *
337 * Returns: ---
338 *
339 * Use: Performs unsigned integer addition. If the result overflows
340 * the destination vector, high-order bits are discarded. This
341 * means that two's complement addition happens more or less for
342 * free, although that's more a side-effect than anything else.
343 * The result vector may be equal to either or both source
344 * vectors, but may not otherwise overlap them.
345 */
346
347extern void mpx_uadd(mpw */*dv*/, mpw */*dvl*/,
348 const mpw */*av*/, const mpw */*avl*/,
349 const mpw */*bv*/, const mpw */*bvl*/);
350
dd517851 351/* --- @mpx_uaddn@ --- *
352 *
353 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
354 * @mpw n@ = other addend
3c9ede17 355 *
dd517851 356 * Returns: ---
3c9ede17 357 *
358 * Use: Adds a small integer to a multiprecision number.
359 */
360
361#define MPX_UADDN(dv, dvl, n) do { \
362 mpw *_ddv = (dv), *_ddvl = (dvl); \
363 mpw _c = (n); \
364 \
365 while (_c && _ddv < _ddvl) { \
366 mpd _x = (mpd)*_ddv + (mpd)_c; \
367 *_ddv++ = MPW(_x); \
368 _c = _x >> MPW_BITS; \
369 } \
370} while (0)
371
dd517851 372extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
373
d03ab969 374/* --- @mpx_usub@ --- *
375 *
376 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
377 * @const mpw *av, *avl@ = first argument vector base and limit
378 * @const mpw *bv, *bvl@ = second argument vector base and limit
379 *
380 * Returns: ---
381 *
382 * Use: Performs unsigned integer subtraction. If the result
383 * overflows the destination vector, high-order bits are
384 * discarded. This means that two's complement subtraction
3c9ede17 385 * happens more or less for free, although that's more a side-
d03ab969 386 * effect than anything else. The result vector may be equal to
387 * either or both source vectors, but may not otherwise overlap
388 * them.
389 */
390
391extern void mpx_usub(mpw */*dv*/, mpw */*dvl*/,
392 const mpw */*av*/, const mpw */*avl*/,
393 const mpw */*bv*/, const mpw */*bvl*/);
394
dd517851 395/* --- @mpx_usubn@ --- *
3c9ede17 396 *
dd517851 397 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
398 * @n@ = subtrahend
399 *
400 * Returns: ---
3c9ede17 401 *
402 * Use: Subtracts a small integer from a multiprecision number.
403 */
404
405#define MPX_USUBN(dv, dvl, n) do { \
406 mpw *_ddv = (dv), *_ddvl = (dvl); \
407 mpw _c = (n); \
408 \
409 while (_ddv < _ddvl) { \
410 mpd _x = (mpd)*_ddv - (mpd)_c; \
411 *_ddv++ = MPW(_x); \
412 if (_x >> MPW_BITS) \
413 _c = 1; \
414 else \
415 break; \
416 } \
417} while (0)
418
dd517851 419extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
420
3c9ede17 421/* --- @mpx_umul@ --- *
422 *
423 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
424 * @const mpw *av, *avl@ = multiplicand vector base and limit
425 * @const mpw *bv, *bvl@ = multiplier vector base and limit
426 *
427 * Returns: ---
428 *
429 * Use: Performs unsigned integer multiplication. If the result
430 * overflows the desination vector, high-order bits are
431 * discarded. The result vector may not overlap the argument
432 * vectors in any way.
433 */
434
435extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
436 const mpw */*av*/, const mpw */*avl*/,
437 const mpw */*bv*/, const mpw */*bvl*/);
438
dd517851 439/* --- @mpx_umuln@ --- *
d03ab969 440 *
dd517851 441 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
442 * @const mpw *av, *avl@ = multiplicand vector base and limit
443 * @mpw m@ = multiplier
444 *
445 * Returns: ---
d03ab969 446 *
447 * Use: Multiplies a multiprecision integer by a single-word value.
448 * The destination and source may be equal. The destination
449 * is completely cleared after use.
450 */
451
452#define MPX_UMULN(dv, dvl, av, avl, m) do { \
453 mpw *_dv = (dv), *_dvl = (dvl); \
454 const mpw *_av = (av), *_avl = (avl); \
455 mpw _c = 0; \
456 mpd _m = (m); \
457 \
458 while (_av < _avl) { \
459 mpd _x; \
460 if (_dv >= _dvl) \
461 break; \
3c9ede17 462 _x = (mpd)_m * (mpd)*_av++ + _c; \
d03ab969 463 *_dv++ = MPW(_x); \
464 _c = _x >> MPW_BITS; \
465 } \
466 if (_dv < _dvl) { \
467 *_dv++ = MPW(_c); \
468 MPX_ZERO(_dv, _dvl); \
469 } \
470} while (0)
471
dd517851 472extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
473 const mpw */*av*/, const mpw */*avl*/, mpw m);
474
475/* --- @mpx_umlan@ --- *
d03ab969 476 *
dd517851 477 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
478 * @const mpw *av, *avl@ = multiplicand vector base and limit
479 * @mpw m@ = multiplier
480 *
481 * Returns: ---
d03ab969 482 *
483 * Use: Multiplies a multiprecision integer by a single-word value
484 * and adds the result to an accumulator.
485 */
486
487#define MPX_UMLAN(dv, dvl, av, avl, m) do { \
488 mpw *_dv = (dv), *_dvl = (dvl); \
489 const mpw *_av = (av), *_avl = (avl); \
7c13f461 490 mpw _cc = 0; \
d03ab969 491 mpd _m = (m); \
492 \
5bf74dea 493 while (_dv < _dvl && _av < _avl) { \
d03ab969 494 mpd _x; \
7c13f461 495 _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
d03ab969 496 *_dv++ = MPW(_x); \
7c13f461 497 _cc = _x >> MPW_BITS; \
d03ab969 498 } \
7c13f461 499 MPX_UADDN(_dv, _dvl, _cc); \
d03ab969 500} while (0)
501
dd517851 502extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
503 const mpw */*av*/, const mpw */*avl*/, mpw m);
504
3c9ede17 505/* --- @mpx_usqr@ --- *
d03ab969 506 *
507 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
3c9ede17 508 * @const mpw *av, *av@ = source vector base and limit
d03ab969 509 *
510 * Returns: ---
511 *
3c9ede17 512 * Use: Performs unsigned integer squaring. The result vector must
513 * not overlap the source vector in any way.
d03ab969 514 */
515
3c9ede17 516extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
517 const mpw */*av*/, const mpw */*avl*/);
d03ab969 518
5bf74dea 519/* --- @mpx_udiv@ --- *
520 *
521 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
522 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
523 * @const mpw *dv, *dvl@ = divisor vector base and limit
524 * @mpw *sv, *svl@ = scratch workspace
525 *
526 * Returns: ---
527 *
528 * Use: Performs unsigned integer division. If the result overflows
529 * the quotient vector, high-order bits are discarded. (Clearly
530 * the remainder vector can't overflow.) The various vectors
531 * may not overlap in any way. Yes, I know it's a bit odd
532 * requiring the dividend to be in the result position but it
533 * does make some sense really. The remainder must have
534 * headroom for at least two extra words. The scratch space
535 * must be at least one word larger than the divisor.
536 */
537
538extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
539 const mpw */*dv*/, const mpw */*dvl*/,
540 mpw */*sv*/, mpw */*svl*/);
541
542/*----- Karatsuba multiplication algorithms -------------------------------*/
543
544/* --- @KARATSUBA_CUTOFF@ --- *
545 *
546 * This is the limiting length for using Karatsuba algorithms. It's best to
547 * use the simpler classical multiplication method on numbers smaller than
548 * this.
549 */
550
551#define KARATSUBA_CUTOFF 16
552
553/* --- @KARATSUBA_SLOP@ --- *
554 *
555 * The extra number of words required as scratch space by the Karatsuba
556 * routines. This is a (generous) guess, since the actual amount of space
557 * required is proportional to the recursion depth.
558 */
559
560#define KARATSUBA_SLOP 32
561
a86e33af 562/* --- @mpx_kmul@ --- *
563 *
564 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
565 * @const mpw *av, *avl@ = pointer to first argument
566 * @const mpw *bv, *bvl@ = pointer to second argument
567 * @mpw *sv, *svl@ = pointer to scratch workspace
568 *
569 * Returns: ---
570 *
571 * Use: Multiplies two multiprecision integers using Karatsuba's
572 * algorithm. This is rather faster than traditional long
573 * multiplication (e.g., @mpx_umul@) on large numbers, although
574 * more expensive on small ones.
575 *
576 * The destination and scratch buffers must be twice as large as
5bf74dea 577 * the larger argument. The scratch space must be twice as
578 * large as the larger argument, plus the magic number
579 * @KARATSUBA_SLOP@.
a86e33af 580 */
581
a86e33af 582extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
583 const mpw */*av*/, const mpw */*avl*/,
584 const mpw */*bv*/, const mpw */*bvl*/,
585 mpw */*sv*/, mpw */*svl*/);
586
5bf74dea 587/* --- @mpx_ksqr@ --- *
d03ab969 588 *
5bf74dea 589 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
590 * @const mpw *av, *avl@ = pointer to first argument
591 * @mpw *sv, *svl@ = pointer to scratch workspace
d03ab969 592 *
593 * Returns: ---
594 *
5bf74dea 595 * Use: Squares a multiprecision integers using something similar to
596 * Karatsuba's multiplication algorithm. This is rather faster
597 * than traditional long multiplication (e.g., @mpx_umul@) on
598 * large numbers, although more expensive on small ones, and
599 * rather simpler than full-blown Karatsuba multiplication.
600 *
601 * The destination must be twice as large as the argument. The
602 * scratch space must be twice as large as the argument, plus
603 * the magic number @KARATSUBA_SLOP@.
d03ab969 604 */
605
5bf74dea 606extern void mpx_ksqr(mpw */*dv*/, mpw */*dvl*/,
607 const mpw */*av*/, const mpw */*avl*/,
3c9ede17 608 mpw */*sv*/, mpw */*svl*/);
d03ab969 609
610/*----- That's all, folks -------------------------------------------------*/
611
612#ifdef __cplusplus
613 }
614#endif
615
616#endif