Change Karatsuba parameters slightly.
[u/mdw/catacomb] / mpx.h
CommitLineData
d03ab969 1/* -*-c-*-
2 *
652a6acf 3 * $Id: mpx.h,v 1.7 1999/12/11 01:51:28 mdw Exp $
d03ab969 4 *
5 * Low level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.h,v $
652a6acf 33 * Revision 1.7 1999/12/11 01:51:28 mdw
34 * Change Karatsuba parameters slightly.
35 *
a86e33af 36 * Revision 1.6 1999/12/10 23:23:51 mdw
37 * Karatsuba-Ofman multiplication algorithm.
38 *
dd517851 39 * Revision 1.5 1999/11/20 22:23:27 mdw
40 * Add function versions of some low-level macros with wider use.
41 *
7c13f461 42 * Revision 1.4 1999/11/17 18:04:43 mdw
43 * Add two's complement support. Fix a bug in MPX_UMLAN.
44 *
3c9ede17 45 * Revision 1.3 1999/11/13 01:51:29 mdw
46 * Minor interface changes. Should be stable now.
47 *
b9b1c853 48 * Revision 1.2 1999/11/11 17:47:55 mdw
49 * Minor changes for different `mptypes.h' format.
50 *
d03ab969 51 * Revision 1.1 1999/09/03 08:41:12 mdw
52 * Initial import.
53 *
54 */
55
a86e33af 56#ifndef CATACOMB_MPX_H
57#define CATACOMB_MPX_H
d03ab969 58
59#ifdef __cplusplus
60 extern "C" {
61#endif
62
63/*----- The idea ----------------------------------------------------------*
64 *
65 * This file provides functions and macros which work on vectors of words as
66 * unsigned multiprecision integers. The interface works in terms of base
67 * and limit pointers (i.e., a pointer to the start of a vector, and a
68 * pointer just past its end) rather than base pointer and length, because
69 * that requires more arithmetic and state to work on.
70 *
71 * The interfaces are slightly bizarre in other ways. Try to use the
72 * higher-level functions where you can: they're rather better designed to
73 * actually be friendly and useful.
74 */
75
76/*----- Header files ------------------------------------------------------*/
77
78#include <string.h>
79
a86e33af 80#ifndef CATACOMB_MPW_H
3c9ede17 81# include "mpw.h"
d03ab969 82#endif
83
84/*----- General manipulation ----------------------------------------------*/
85
86/* --- @MPX_SHRINK@ --- *
87 *
88 * Arguments: @const mpw *v@ = pointer to vector of words
89 * @const mpw *vl@ = (updated) current limit of vector
90 *
91 * Use: Shrinks down the limit of a multiprecision integer vector.
92 */
93
94#define MPX_SHRINK(v, vl) do { \
3c9ede17 95 const mpw *_vv = (v), *_vvl = (vl); \
96 while (_vvl > _vv && !_vvl[-1]) \
97 _vvl--; \
98 (vl) = (mpw *)_vvl; \
d03ab969 99} while (0)
100
101/* --- @MPX_BITS@ --- *
102 *
103 * Arguments: @unsigned long b@ = result variable
104 * @const mpw *v@ = pointer to array of words
105 * @const mpw *vl@ = limit of vector (from @MPX_SHRINK@)
106 *
107 * Use: Calculates the number of bits in a multiprecision value.
108 */
109
110#define MPX_BITS(b, v, vl) do { \
111 const mpw *_v = (v), *_vl = (vl); \
3c9ede17 112 MPX_SHRINK(_v, _vl); \
d03ab969 113 if (_v == _vl) \
114 (b) = 0; \
115 else { \
116 unsigned long _b = MPW_BITS * (_vl - _v - 1) + 1; \
117 mpw _w = _vl[-1]; \
118 unsigned _k = MPW_BITS / 2; \
119 while (_k) { \
120 if (_w >> _k) { \
121 _w >>= _k; \
122 _b += _k; \
123 } \
124 _k >>= 1; \
125 } \
126 (b) = _b; \
127 } \
128} while (0)
129
130/* --- @MPX_OCTETS@ --- *
131 *
132 * Arguments: @size_t o@ = result variable
3c9ede17 133 * @const mpw *v, *vl@ = pointer to array of words
d03ab969 134 *
135 * Use: Calculates the number of octets in a multiprecision value.
136 */
137
3c9ede17 138#define MPX_OCTETS(o, v, vl) do { \
d03ab969 139 const mpw *_v = (v), *_vl = (vl); \
3c9ede17 140 MPX_SHRINK(_v, _vl); \
d03ab969 141 if (_v == _vl) \
142 (o) = 0; \
143 else { \
3c9ede17 144 size_t _o = (MPW_BITS / 8) * (_vl - _v - 1); \
d03ab969 145 mpw _w = _vl[-1]; \
146 unsigned _k = MPW_BITS / 2; \
3c9ede17 147 while (_k >= 8) { \
d03ab969 148 if (_w >> _k) { \
149 _w >>= _k; \
3c9ede17 150 _o += _k >> 3; \
d03ab969 151 } \
152 _k >>= 1; \
153 } \
154 if (_w) \
155 _o++; \
156 (o) = _o; \
157 } \
158} while (0)
159
160/* --- @MPX_COPY@ --- *
161 *
162 * Arguments: @dv, dvl@ = destination vector base and limit
163 * @av, avl@ = source vector base and limit
164 *
165 * Use: Copies a multiprecision integer.
166 */
167
3c9ede17 168#define MPX_COPY(dv, dvl, av, avl) do { \
169 mpw *_dv = (dv), *_dvl = (dvl); \
170 size_t _dn = _dvl - _dv; \
171 const mpw *_av = (av), *_avl = (avl); \
172 size_t _an = _avl - _av; \
d03ab969 173 if (_av == _dv) { \
174 if (_dvl > _avl) \
3c9ede17 175 memset(_dv, 0, MPWS(_dn - _an)); \
d03ab969 176 } else if (_an >= _dn) \
177 memmove(_dv, _av, MPWS(_dn)); \
178 else { \
179 memmove(_dv, _av, MPWS(_an)); \
180 memset(_dv + _an, 0, MPWS(_dn - _an)); \
181 } \
182} while (0)
183
184/* --- @MPX_ZERO@ --- *
185 *
186 * Arguments: @v, vl@ = base and limit of vector to clear
187 *
188 * Use: Zeroes the area between the two vector pointers.
189 */
190
3c9ede17 191#define MPX_ZERO(v, vl) do { \
d03ab969 192 mpw *_v = (v), *_vl = (vl); \
3c9ede17 193 if (_v < _vl) \
194 memset(_v, 0, MPWS(_vl - _v)); \
d03ab969 195} while (0)
196
197/*----- Loading and storing -----------------------------------------------*/
198
199/* --- @mpx_storel@ --- *
200 *
201 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 202 * @void *p@ = pointer to octet array
d03ab969 203 * @size_t sz@ = size of octet array
204 *
205 * Returns: ---
206 *
207 * Use: Stores an MP in an octet array, least significant octet
208 * first. High-end octets are silently discarded if there
209 * isn't enough space for them.
210 */
211
212extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 213 void */*p*/, size_t /*sz*/);
d03ab969 214
215/* --- @mpx_loadl@ --- *
216 *
217 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 218 * @const void *p@ = pointer to octet array
d03ab969 219 * @size_t sz@ = size of octet array
220 *
221 * Returns: ---
222 *
223 * Use: Loads an MP in an octet array, least significant octet
224 * first. High-end octets are ignored if there isn't enough
225 * space for them.
226 */
227
228extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
3c9ede17 229 const void */*p*/, size_t /*sz*/);
d03ab969 230
231/* --- @mpx_storeb@ --- *
232 *
233 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 234 * @void *p@ = pointer to octet array
d03ab969 235 * @size_t sz@ = size of octet array
236 *
237 * Returns: ---
238 *
239 * Use: Stores an MP in an octet array, most significant octet
240 * first. High-end octets are silently discarded if there
241 * isn't enough space for them.
242 */
243
244extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 245 void */*p*/, size_t /*sz*/);
d03ab969 246
247/* --- @mpx_loadb@ --- *
248 *
249 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 250 * @const void *p@ = pointer to octet array
d03ab969 251 * @size_t sz@ = size of octet array
252 *
253 * Returns: ---
254 *
255 * Use: Loads an MP in an octet array, most significant octet
256 * first. High-end octets are ignored if there isn't enough
257 * space for them.
258 */
259
260extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
3c9ede17 261 const void */*p*/, size_t /*sz*/);
d03ab969 262
263/*----- Logical shifting --------------------------------------------------*/
264
265/* --- @mpx_lsl@ --- *
266 *
267 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
268 * @const mpw *av, *avl@ = source vector base and limit
269 * @size_t n@ = number of bit positions to shift by
270 *
271 * Returns: ---
272 *
273 * Use: Performs a logical shift left operation on an integer.
274 */
275
276extern void mpx_lsl(mpw */*dv*/, mpw */*dvl*/,
277 const mpw */*av*/, const mpw */*avl*/,
278 size_t /*n*/);
279
280/* --- @mpx_lsr@ --- *
281 *
282 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
283 * @const mpw *av, *avl@ = source vector base and limit
284 * @size_t n@ = number of bit positions to shift by
285 *
286 * Returns: ---
287 *
288 * Use: Performs a logical shift right operation on an integer.
289 */
290
291extern void mpx_lsr(mpw */*dv*/, mpw */*dvl*/,
292 const mpw */*av*/, const mpw */*avl*/,
293 size_t /*n*/);
294
295/*----- Unsigned arithmetic -----------------------------------------------*/
296
7c13f461 297/* --- @mpx_2c@ --- *
298 *
299 * Arguments: @mpw *dv, *dvl@ = destination vector
300 * @const mpw *v, *vl@ = source vector
301 *
302 * Returns: ---
303 *
304 * Use: Calculates the two's complement of @v@.
305 */
306
307extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
308 const mpw */*v*/, const mpw */*vl*/);
309
d03ab969 310/* --- @mpx_ucmp@ --- *
311 *
312 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
313 * @const mpw *bv, *bvl@ = second argument vector base and limit
314 *
315 * Returns: Less than, equal to, or greater than zero depending on
316 * whether @a@ is less than, equal to or greater than @b@,
317 * respectively.
318 *
319 * Use: Performs an unsigned integer comparison.
320 */
321
322#define MPX_UCMP(av, avl, op, dv, dvl) \
323 (mpx_ucmp((av), (avl), (dv), (dvl)) op 0)
324
325extern int mpx_ucmp(const mpw */*av*/, const mpw */*avl*/,
326 const mpw */*bv*/, const mpw */*bvl*/);
327
328/* --- @mpx_uadd@ --- *
329 *
330 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
331 * @const mpw *av, *avl@ = first addend vector base and limit
332 * @const mpw *bv, *bvl@ = second addend vector base and limit
333 *
334 * Returns: ---
335 *
336 * Use: Performs unsigned integer addition. If the result overflows
337 * the destination vector, high-order bits are discarded. This
338 * means that two's complement addition happens more or less for
339 * free, although that's more a side-effect than anything else.
340 * The result vector may be equal to either or both source
341 * vectors, but may not otherwise overlap them.
342 */
343
344extern void mpx_uadd(mpw */*dv*/, mpw */*dvl*/,
345 const mpw */*av*/, const mpw */*avl*/,
346 const mpw */*bv*/, const mpw */*bvl*/);
347
dd517851 348/* --- @mpx_uaddn@ --- *
349 *
350 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
351 * @mpw n@ = other addend
3c9ede17 352 *
dd517851 353 * Returns: ---
3c9ede17 354 *
355 * Use: Adds a small integer to a multiprecision number.
356 */
357
358#define MPX_UADDN(dv, dvl, n) do { \
359 mpw *_ddv = (dv), *_ddvl = (dvl); \
360 mpw _c = (n); \
361 \
362 while (_c && _ddv < _ddvl) { \
363 mpd _x = (mpd)*_ddv + (mpd)_c; \
364 *_ddv++ = MPW(_x); \
365 _c = _x >> MPW_BITS; \
366 } \
367} while (0)
368
dd517851 369extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
370
d03ab969 371/* --- @mpx_usub@ --- *
372 *
373 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
374 * @const mpw *av, *avl@ = first argument vector base and limit
375 * @const mpw *bv, *bvl@ = second argument vector base and limit
376 *
377 * Returns: ---
378 *
379 * Use: Performs unsigned integer subtraction. If the result
380 * overflows the destination vector, high-order bits are
381 * discarded. This means that two's complement subtraction
3c9ede17 382 * happens more or less for free, although that's more a side-
d03ab969 383 * effect than anything else. The result vector may be equal to
384 * either or both source vectors, but may not otherwise overlap
385 * them.
386 */
387
388extern void mpx_usub(mpw */*dv*/, mpw */*dvl*/,
389 const mpw */*av*/, const mpw */*avl*/,
390 const mpw */*bv*/, const mpw */*bvl*/);
391
dd517851 392/* --- @mpx_usubn@ --- *
3c9ede17 393 *
dd517851 394 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
395 * @n@ = subtrahend
396 *
397 * Returns: ---
3c9ede17 398 *
399 * Use: Subtracts a small integer from a multiprecision number.
400 */
401
402#define MPX_USUBN(dv, dvl, n) do { \
403 mpw *_ddv = (dv), *_ddvl = (dvl); \
404 mpw _c = (n); \
405 \
406 while (_ddv < _ddvl) { \
407 mpd _x = (mpd)*_ddv - (mpd)_c; \
408 *_ddv++ = MPW(_x); \
409 if (_x >> MPW_BITS) \
410 _c = 1; \
411 else \
412 break; \
413 } \
414} while (0)
415
dd517851 416extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
417
3c9ede17 418/* --- @mpx_umul@ --- *
419 *
420 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
421 * @const mpw *av, *avl@ = multiplicand vector base and limit
422 * @const mpw *bv, *bvl@ = multiplier vector base and limit
423 *
424 * Returns: ---
425 *
426 * Use: Performs unsigned integer multiplication. If the result
427 * overflows the desination vector, high-order bits are
428 * discarded. The result vector may not overlap the argument
429 * vectors in any way.
430 */
431
432extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
433 const mpw */*av*/, const mpw */*avl*/,
434 const mpw */*bv*/, const mpw */*bvl*/);
435
dd517851 436/* --- @mpx_umuln@ --- *
d03ab969 437 *
dd517851 438 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
439 * @const mpw *av, *avl@ = multiplicand vector base and limit
440 * @mpw m@ = multiplier
441 *
442 * Returns: ---
d03ab969 443 *
444 * Use: Multiplies a multiprecision integer by a single-word value.
445 * The destination and source may be equal. The destination
446 * is completely cleared after use.
447 */
448
449#define MPX_UMULN(dv, dvl, av, avl, m) do { \
450 mpw *_dv = (dv), *_dvl = (dvl); \
451 const mpw *_av = (av), *_avl = (avl); \
452 mpw _c = 0; \
453 mpd _m = (m); \
454 \
455 while (_av < _avl) { \
456 mpd _x; \
457 if (_dv >= _dvl) \
458 break; \
3c9ede17 459 _x = (mpd)_m * (mpd)*_av++ + _c; \
d03ab969 460 *_dv++ = MPW(_x); \
461 _c = _x >> MPW_BITS; \
462 } \
463 if (_dv < _dvl) { \
464 *_dv++ = MPW(_c); \
465 MPX_ZERO(_dv, _dvl); \
466 } \
467} while (0)
468
dd517851 469extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
470 const mpw */*av*/, const mpw */*avl*/, mpw m);
471
472/* --- @mpx_umlan@ --- *
d03ab969 473 *
dd517851 474 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
475 * @const mpw *av, *avl@ = multiplicand vector base and limit
476 * @mpw m@ = multiplier
477 *
478 * Returns: ---
d03ab969 479 *
480 * Use: Multiplies a multiprecision integer by a single-word value
481 * and adds the result to an accumulator.
482 */
483
484#define MPX_UMLAN(dv, dvl, av, avl, m) do { \
485 mpw *_dv = (dv), *_dvl = (dvl); \
486 const mpw *_av = (av), *_avl = (avl); \
7c13f461 487 mpw _cc = 0; \
d03ab969 488 mpd _m = (m); \
489 \
490 while (_av < _avl) { \
491 mpd _x; \
492 if (_dv >= _dvl) \
493 break; \
7c13f461 494 _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
d03ab969 495 *_dv++ = MPW(_x); \
7c13f461 496 _cc = _x >> MPW_BITS; \
d03ab969 497 } \
7c13f461 498 MPX_UADDN(_dv, _dvl, _cc); \
d03ab969 499} while (0)
500
dd517851 501extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
502 const mpw */*av*/, const mpw */*avl*/, mpw m);
503
3c9ede17 504/* --- @mpx_usqr@ --- *
d03ab969 505 *
506 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
3c9ede17 507 * @const mpw *av, *av@ = source vector base and limit
d03ab969 508 *
509 * Returns: ---
510 *
3c9ede17 511 * Use: Performs unsigned integer squaring. The result vector must
512 * not overlap the source vector in any way.
d03ab969 513 */
514
3c9ede17 515extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
516 const mpw */*av*/, const mpw */*avl*/);
d03ab969 517
a86e33af 518/* --- @mpx_kmul@ --- *
519 *
520 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
521 * @const mpw *av, *avl@ = pointer to first argument
522 * @const mpw *bv, *bvl@ = pointer to second argument
523 * @mpw *sv, *svl@ = pointer to scratch workspace
524 *
525 * Returns: ---
526 *
527 * Use: Multiplies two multiprecision integers using Karatsuba's
528 * algorithm. This is rather faster than traditional long
529 * multiplication (e.g., @mpx_umul@) on large numbers, although
530 * more expensive on small ones.
531 *
532 * The destination and scratch buffers must be twice as large as
533 * the larger argument.
534 */
535
652a6acf 536#define KARATSUBA_CUTOFF 20
a86e33af 537#define KARATSUBA_SLOP 32
538
539extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
540 const mpw */*av*/, const mpw */*avl*/,
541 const mpw */*bv*/, const mpw */*bvl*/,
542 mpw */*sv*/, mpw */*svl*/);
543
d03ab969 544/* --- @mpx_udiv@ --- *
545 *
546 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
547 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
548 * @const mpw *dv, *dvl@ = divisor vector base and limit
3c9ede17 549 * @mpw *sv, *svl@ = scratch workspace
d03ab969 550 *
551 * Returns: ---
552 *
553 * Use: Performs unsigned integer division. If the result overflows
554 * the quotient vector, high-order bits are discarded. (Clearly
555 * the remainder vector can't overflow.) The various vectors
556 * may not overlap in any way. Yes, I know it's a bit odd
557 * requiring the dividend to be in the result position but it
3c9ede17 558 * does make some sense really. The remainder must have
559 * headroom for at least two extra words. The scratch space
7c13f461 560 * must be at least one word larger than the divisor.
d03ab969 561 */
562
563extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
3c9ede17 564 const mpw */*dv*/, const mpw */*dvl*/,
565 mpw */*sv*/, mpw */*svl*/);
d03ab969 566
567/*----- That's all, folks -------------------------------------------------*/
568
569#ifdef __cplusplus
570 }
571#endif
572
573#endif