Fix bogus type name.
[u/mdw/catacomb] / mpx.h
CommitLineData
d03ab969 1/* -*-c-*-
2 *
f09e814a 3 * $Id: mpx.h,v 1.13 2002/10/06 22:52:50 mdw Exp $
d03ab969 4 *
5 * Low level multiprecision arithmetic
6 *
7 * (c) 1999 Straylight/Edgeware
8 */
9
10/*----- Licensing notice --------------------------------------------------*
11 *
12 * This file is part of Catacomb.
13 *
14 * Catacomb is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
18 *
19 * Catacomb is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
23 *
24 * You should have received a copy of the GNU Library General Public
25 * License along with Catacomb; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27 * MA 02111-1307, USA.
28 */
29
30/*----- Revision history --------------------------------------------------*
31 *
32 * $Log: mpx.h,v $
f09e814a 33 * Revision 1.13 2002/10/06 22:52:50 mdw
34 * Pile of changes for supporting two's complement properly.
35 *
0f32e0f8 36 * Revision 1.12 2001/04/03 19:36:05 mdw
37 * Add some simple bitwise operations so that Perl can use them.
38 *
52cdaca9 39 * Revision 1.11 2000/10/08 15:48:35 mdw
40 * Rename Karatsuba constants now that we have @gfx_kmul@ too.
41 *
1a05a8ef 42 * Revision 1.10 2000/10/08 12:06:12 mdw
43 * Provide @mpx_ueq@ for rapidly testing equality of two integers.
44 *
698bd937 45 * Revision 1.9 1999/12/22 15:49:07 mdw
46 * New function for division by a small integer.
47 *
5bf74dea 48 * Revision 1.8 1999/12/11 10:57:43 mdw
49 * Karatsuba squaring algorithm.
50 *
652a6acf 51 * Revision 1.7 1999/12/11 01:51:28 mdw
52 * Change Karatsuba parameters slightly.
53 *
a86e33af 54 * Revision 1.6 1999/12/10 23:23:51 mdw
55 * Karatsuba-Ofman multiplication algorithm.
56 *
dd517851 57 * Revision 1.5 1999/11/20 22:23:27 mdw
58 * Add function versions of some low-level macros with wider use.
59 *
7c13f461 60 * Revision 1.4 1999/11/17 18:04:43 mdw
61 * Add two's complement support. Fix a bug in MPX_UMLAN.
62 *
3c9ede17 63 * Revision 1.3 1999/11/13 01:51:29 mdw
64 * Minor interface changes. Should be stable now.
65 *
b9b1c853 66 * Revision 1.2 1999/11/11 17:47:55 mdw
67 * Minor changes for different `mptypes.h' format.
68 *
d03ab969 69 * Revision 1.1 1999/09/03 08:41:12 mdw
70 * Initial import.
71 *
72 */
73
a86e33af 74#ifndef CATACOMB_MPX_H
75#define CATACOMB_MPX_H
d03ab969 76
77#ifdef __cplusplus
78 extern "C" {
79#endif
80
81/*----- The idea ----------------------------------------------------------*
82 *
83 * This file provides functions and macros which work on vectors of words as
84 * unsigned multiprecision integers. The interface works in terms of base
85 * and limit pointers (i.e., a pointer to the start of a vector, and a
86 * pointer just past its end) rather than base pointer and length, because
87 * that requires more arithmetic and state to work on.
88 *
89 * The interfaces are slightly bizarre in other ways. Try to use the
90 * higher-level functions where you can: they're rather better designed to
91 * actually be friendly and useful.
92 */
93
94/*----- Header files ------------------------------------------------------*/
95
96#include <string.h>
97
a86e33af 98#ifndef CATACOMB_MPW_H
3c9ede17 99# include "mpw.h"
d03ab969 100#endif
101
102/*----- General manipulation ----------------------------------------------*/
103
104/* --- @MPX_SHRINK@ --- *
105 *
106 * Arguments: @const mpw *v@ = pointer to vector of words
107 * @const mpw *vl@ = (updated) current limit of vector
108 *
109 * Use: Shrinks down the limit of a multiprecision integer vector.
110 */
111
112#define MPX_SHRINK(v, vl) do { \
3c9ede17 113 const mpw *_vv = (v), *_vvl = (vl); \
114 while (_vvl > _vv && !_vvl[-1]) \
115 _vvl--; \
116 (vl) = (mpw *)_vvl; \
d03ab969 117} while (0)
118
119/* --- @MPX_BITS@ --- *
120 *
121 * Arguments: @unsigned long b@ = result variable
122 * @const mpw *v@ = pointer to array of words
123 * @const mpw *vl@ = limit of vector (from @MPX_SHRINK@)
124 *
125 * Use: Calculates the number of bits in a multiprecision value.
126 */
127
128#define MPX_BITS(b, v, vl) do { \
129 const mpw *_v = (v), *_vl = (vl); \
3c9ede17 130 MPX_SHRINK(_v, _vl); \
d03ab969 131 if (_v == _vl) \
132 (b) = 0; \
133 else { \
134 unsigned long _b = MPW_BITS * (_vl - _v - 1) + 1; \
135 mpw _w = _vl[-1]; \
136 unsigned _k = MPW_BITS / 2; \
137 while (_k) { \
138 if (_w >> _k) { \
139 _w >>= _k; \
140 _b += _k; \
141 } \
142 _k >>= 1; \
143 } \
144 (b) = _b; \
145 } \
146} while (0)
147
148/* --- @MPX_OCTETS@ --- *
149 *
150 * Arguments: @size_t o@ = result variable
3c9ede17 151 * @const mpw *v, *vl@ = pointer to array of words
d03ab969 152 *
153 * Use: Calculates the number of octets in a multiprecision value.
154 */
155
3c9ede17 156#define MPX_OCTETS(o, v, vl) do { \
f09e814a 157 unsigned long _bb; \
158 MPX_BITS(_bb, (v), (vl)); \
159 (o) = (_bb + 7) >> 3; \
160} while (0)
161
162/* --- @MPX_OCTETS2C@ --- *
163 *
164 * Arguments: @size_t o@ = result variable
165 * @const mpw *v, *vl@ = pointer to array of words
166 *
167 * Use: Calculates the number of octets in a multiprecision value, if
168 * you represent it as two's complement.
169 */
170
171#define MPX_OCTETS2C(o, v, vl) do { \
172 unsigned long _bb; \
173 MPX_BITS(_bb, (v), (vl)); \
174 (o) = (_bb >> 3) + 1; \
d03ab969 175} while (0)
176
177/* --- @MPX_COPY@ --- *
178 *
179 * Arguments: @dv, dvl@ = destination vector base and limit
180 * @av, avl@ = source vector base and limit
181 *
182 * Use: Copies a multiprecision integer.
183 */
184
3c9ede17 185#define MPX_COPY(dv, dvl, av, avl) do { \
186 mpw *_dv = (dv), *_dvl = (dvl); \
187 size_t _dn = _dvl - _dv; \
188 const mpw *_av = (av), *_avl = (avl); \
189 size_t _an = _avl - _av; \
d03ab969 190 if (_av == _dv) { \
191 if (_dvl > _avl) \
3c9ede17 192 memset(_dv, 0, MPWS(_dn - _an)); \
d03ab969 193 } else if (_an >= _dn) \
194 memmove(_dv, _av, MPWS(_dn)); \
195 else { \
196 memmove(_dv, _av, MPWS(_an)); \
197 memset(_dv + _an, 0, MPWS(_dn - _an)); \
198 } \
199} while (0)
200
201/* --- @MPX_ZERO@ --- *
202 *
203 * Arguments: @v, vl@ = base and limit of vector to clear
204 *
205 * Use: Zeroes the area between the two vector pointers.
206 */
207
3c9ede17 208#define MPX_ZERO(v, vl) do { \
d03ab969 209 mpw *_v = (v), *_vl = (vl); \
3c9ede17 210 if (_v < _vl) \
211 memset(_v, 0, MPWS(_vl - _v)); \
d03ab969 212} while (0)
213
214/*----- Loading and storing -----------------------------------------------*/
215
216/* --- @mpx_storel@ --- *
217 *
218 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 219 * @void *p@ = pointer to octet array
d03ab969 220 * @size_t sz@ = size of octet array
221 *
222 * Returns: ---
223 *
224 * Use: Stores an MP in an octet array, least significant octet
225 * first. High-end octets are silently discarded if there
226 * isn't enough space for them.
227 */
228
229extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 230 void */*p*/, size_t /*sz*/);
d03ab969 231
232/* --- @mpx_loadl@ --- *
233 *
234 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 235 * @const void *p@ = pointer to octet array
d03ab969 236 * @size_t sz@ = size of octet array
237 *
238 * Returns: ---
239 *
240 * Use: Loads an MP in an octet array, least significant octet
241 * first. High-end octets are ignored if there isn't enough
242 * space for them.
243 */
244
245extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
3c9ede17 246 const void */*p*/, size_t /*sz*/);
d03ab969 247
248/* --- @mpx_storeb@ --- *
249 *
250 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
3c9ede17 251 * @void *p@ = pointer to octet array
d03ab969 252 * @size_t sz@ = size of octet array
253 *
254 * Returns: ---
255 *
256 * Use: Stores an MP in an octet array, most significant octet
257 * first. High-end octets are silently discarded if there
258 * isn't enough space for them.
259 */
260
261extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
3c9ede17 262 void */*p*/, size_t /*sz*/);
d03ab969 263
264/* --- @mpx_loadb@ --- *
265 *
266 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
3c9ede17 267 * @const void *p@ = pointer to octet array
d03ab969 268 * @size_t sz@ = size of octet array
269 *
270 * Returns: ---
271 *
272 * Use: Loads an MP in an octet array, most significant octet
273 * first. High-end octets are ignored if there isn't enough
274 * space for them.
275 */
276
277extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
3c9ede17 278 const void */*p*/, size_t /*sz*/);
d03ab969 279
f09e814a 280/* --- @mpx_storel2cn@ --- *
281 *
282 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
283 * @void *pp@ = pointer to octet array
284 * @size_t sz@ = size of octet array
285 *
286 * Returns: ---
287 *
288 * Use: Stores a negative MP in an octet array, least significant
289 * octet first, as two's complement. High-end octets are
290 * silently discarded if there isn't enough space for them.
291 * This obviously makes the output bad.
292 */
293
294extern void mpx_storel2cn(const mpw */*v*/, const mpw */*vl*/,
295 void */*p*/, size_t /*sz*/);
296
297/* --- @mpx_loadl2cn@ --- *
298 *
299 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
300 * @const void *pp@ = pointer to octet array
301 * @size_t sz@ = size of octet array
302 *
303 * Returns: ---
304 *
305 * Use: Loads a negative MP in an octet array, least significant
306 * octet first, as two's complement. High-end octets are
307 * ignored if there isn't enough space for them. This probably
308 * means you made the wrong choice coming here.
309 */
310
311extern void mpx_loadl2cn(mpw */*v*/, mpw */*vl*/,
312 const void */*p*/, size_t /*sz*/);
313
314/* --- @mpx_storeb2cn@ --- *
315 *
316 * Arguments: @const mpw *v, *vl@ = base and limit of source vector
317 * @void *pp@ = pointer to octet array
318 * @size_t sz@ = size of octet array
319 *
320 * Returns: ---
321 *
322 * Use: Stores a negative MP in an octet array, most significant
323 * octet first, as two's complement. High-end octets are
324 * silently discarded if there isn't enough space for them,
325 * which probably isn't what you meant.
326 */
327
328extern void mpx_storeb2cn(const mpw */*v*/, const mpw */*vl*/,
329 void */*p*/, size_t /*sz*/);
330
331/* --- @mpx_loadb2cn@ --- *
332 *
333 * Arguments: @mpw *v, *vl@ = base and limit of destination vector
334 * @const void *pp@ = pointer to octet array
335 * @size_t sz@ = size of octet array
336 *
337 * Returns: ---
338 *
339 * Use: Loads a negative MP in an octet array, most significant octet
340 * first as two's complement. High-end octets are ignored if
341 * there isn't enough space for them. This probably means you
342 * chose this function wrongly.
343 */
344
345extern void mpx_loadb2cn(mpw */*v*/, mpw */*vl*/,
346 const void */*p*/, size_t /*sz*/);
347
348
d03ab969 349/*----- Logical shifting --------------------------------------------------*/
350
351/* --- @mpx_lsl@ --- *
352 *
353 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
354 * @const mpw *av, *avl@ = source vector base and limit
355 * @size_t n@ = number of bit positions to shift by
356 *
357 * Returns: ---
358 *
359 * Use: Performs a logical shift left operation on an integer.
360 */
361
362extern void mpx_lsl(mpw */*dv*/, mpw */*dvl*/,
363 const mpw */*av*/, const mpw */*avl*/,
364 size_t /*n*/);
365
366/* --- @mpx_lsr@ --- *
367 *
368 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
369 * @const mpw *av, *avl@ = source vector base and limit
370 * @size_t n@ = number of bit positions to shift by
371 *
372 * Returns: ---
373 *
374 * Use: Performs a logical shift right operation on an integer.
375 */
376
377extern void mpx_lsr(mpw */*dv*/, mpw */*dvl*/,
378 const mpw */*av*/, const mpw */*avl*/,
379 size_t /*n*/);
380
0f32e0f8 381/*----- Bitwise operations ------------------------------------------------*/
382
f09e814a 383/* --- How to implement them --- *
384 *
385 * x: 0011
386 * y: 0101
387 */
388
389#define MPX_B0000(x, y) (0u)
390#define MPX_B0001(x, y) ((x) & (y))
391#define MPX_B0010(x, y) ((x) & ~(y))
392#define MPX_B0011(x, y) (x)
393#define MPX_B0100(x, y) (~(x) & ~(y))
394#define MPX_B0101(x, y) (y)
395#define MPX_B0110(x, y) ((x) ^ (y))
396#define MPX_B0111(x, y) ((x) | (y))
397#define MPX_B1000(x, y) (~((x) | (y)))
398#define MPX_B1001(x, y) (~((x) ^ (y)))
399#define MPX_B1010(x, y) (~(y))
400#define MPX_B1011(x, y) ((x) | ~(y))
401#define MPX_B1100(x, y) (~(x))
402#define MPX_B1101(x, y) (~(x) | (y))
403#define MPX_B1110(x, y) (~((x) & (y)))
404#define MPX_B1111(x, y) (~0u)
405
406/* --- @mpx_bitop@ --- *
0f32e0f8 407 *
408 * Arguments: @mpw *dv, *dvl@ = destination vector
409 * @const mpw *av, *avl@ = first source vector
410 * @const mpw *bv, *bvl@ = second source vector
411 *
412 * Returns: ---
413 *
f09e814a 414 * Use: Provide the dyadic boolean functions. The functions are
415 * named after the truth table they generate:
416 *
417 * a: 0011
418 * b: 0101
419 * @mpx_bitXXXX@
0f32e0f8 420 */
421
f09e814a 422#define MPX_DOBIN(what) \
423 what(0000) what(0001) what(0010) what(0011) \
424 what(0100) what(0101) what(0110) what(0111) \
425 what(1000) what(1001) what(1010) what(1011) \
426 what(1100) what(1101) what(1110) what(1111)
0f32e0f8 427
f09e814a 428#define MPX_BITDECL(string) \
429 extern void mpx_bit##string(mpw */*dv*/, mpw */*dvl*/, \
430 const mpw */*av*/, const mpw */*avl*/, \
431 const mpw */*bv*/, const mpw */*bvl*/);
432MPX_DOBIN(MPX_BITDECL)
0f32e0f8 433
f09e814a 434/* --- @mpx_[n]and@, @mpx_[n]or@, @mpx_xor@ --- *
435 *
436 * Synonyms for the commonly-used functions above.
437 */
438
439#define mpx_and mpx_bit0001
440#define mpx_or mpx_bit0111
441#define mpx_nand mpx_bit1110
442#define mpx_nor mpx_bit1000
443#define mpx_xor mpx_bit0110
444
445/* --- @mpx_not@ --- *
446 *
447 * Arguments: @mpw *dv, *dvl@ = destination vector
448 * @const mpw *av, *avl@ = first source vector
449 *
450 * Returns: ---
451 *
452 * Use; Bitwise NOT.
453 */
0f32e0f8 454
455extern void mpx_not(mpw */*dv*/, mpw */*dvl*/,
456 const mpw */*av*/, const mpw */*avl*/);
457
d03ab969 458/*----- Unsigned arithmetic -----------------------------------------------*/
459
7c13f461 460/* --- @mpx_2c@ --- *
461 *
462 * Arguments: @mpw *dv, *dvl@ = destination vector
463 * @const mpw *v, *vl@ = source vector
464 *
465 * Returns: ---
466 *
467 * Use: Calculates the two's complement of @v@.
468 */
469
470extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
471 const mpw */*v*/, const mpw */*vl*/);
472
1a05a8ef 473/* --- @mpx_ueq@ --- *
474 *
475 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
476 * @const mpw *bv, *bvl@ = second argument vector base and limit
477 *
478 * Returns: Nonzero if the two vectors are equal.
479 *
480 * Use: Performs an unsigned integer test for equality.
481 */
482
483extern int mpx_ueq(const mpw */*av*/, const mpw */*avl*/,
484 const mpw */*bv*/, const mpw */*bvl*/);
485
d03ab969 486/* --- @mpx_ucmp@ --- *
487 *
488 * Arguments: @const mpw *av, *avl@ = first argument vector base and limit
489 * @const mpw *bv, *bvl@ = second argument vector base and limit
490 *
491 * Returns: Less than, equal to, or greater than zero depending on
492 * whether @a@ is less than, equal to or greater than @b@,
493 * respectively.
494 *
495 * Use: Performs an unsigned integer comparison.
496 */
497
498#define MPX_UCMP(av, avl, op, dv, dvl) \
499 (mpx_ucmp((av), (avl), (dv), (dvl)) op 0)
500
501extern int mpx_ucmp(const mpw */*av*/, const mpw */*avl*/,
502 const mpw */*bv*/, const mpw */*bvl*/);
503
504/* --- @mpx_uadd@ --- *
505 *
506 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
507 * @const mpw *av, *avl@ = first addend vector base and limit
508 * @const mpw *bv, *bvl@ = second addend vector base and limit
509 *
510 * Returns: ---
511 *
512 * Use: Performs unsigned integer addition. If the result overflows
513 * the destination vector, high-order bits are discarded. This
514 * means that two's complement addition happens more or less for
515 * free, although that's more a side-effect than anything else.
516 * The result vector may be equal to either or both source
517 * vectors, but may not otherwise overlap them.
518 */
519
520extern void mpx_uadd(mpw */*dv*/, mpw */*dvl*/,
521 const mpw */*av*/, const mpw */*avl*/,
522 const mpw */*bv*/, const mpw */*bvl*/);
523
dd517851 524/* --- @mpx_uaddn@ --- *
525 *
526 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
527 * @mpw n@ = other addend
3c9ede17 528 *
dd517851 529 * Returns: ---
3c9ede17 530 *
531 * Use: Adds a small integer to a multiprecision number.
532 */
533
534#define MPX_UADDN(dv, dvl, n) do { \
535 mpw *_ddv = (dv), *_ddvl = (dvl); \
536 mpw _c = (n); \
537 \
538 while (_c && _ddv < _ddvl) { \
539 mpd _x = (mpd)*_ddv + (mpd)_c; \
540 *_ddv++ = MPW(_x); \
541 _c = _x >> MPW_BITS; \
542 } \
543} while (0)
544
dd517851 545extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
546
d03ab969 547/* --- @mpx_usub@ --- *
548 *
549 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
550 * @const mpw *av, *avl@ = first argument vector base and limit
551 * @const mpw *bv, *bvl@ = second argument vector base and limit
552 *
553 * Returns: ---
554 *
555 * Use: Performs unsigned integer subtraction. If the result
556 * overflows the destination vector, high-order bits are
557 * discarded. This means that two's complement subtraction
3c9ede17 558 * happens more or less for free, although that's more a side-
d03ab969 559 * effect than anything else. The result vector may be equal to
560 * either or both source vectors, but may not otherwise overlap
561 * them.
562 */
563
564extern void mpx_usub(mpw */*dv*/, mpw */*dvl*/,
565 const mpw */*av*/, const mpw */*avl*/,
566 const mpw */*bv*/, const mpw */*bvl*/);
567
dd517851 568/* --- @mpx_usubn@ --- *
3c9ede17 569 *
dd517851 570 * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
571 * @n@ = subtrahend
572 *
573 * Returns: ---
3c9ede17 574 *
575 * Use: Subtracts a small integer from a multiprecision number.
576 */
577
578#define MPX_USUBN(dv, dvl, n) do { \
579 mpw *_ddv = (dv), *_ddvl = (dvl); \
580 mpw _c = (n); \
581 \
582 while (_ddv < _ddvl) { \
583 mpd _x = (mpd)*_ddv - (mpd)_c; \
584 *_ddv++ = MPW(_x); \
585 if (_x >> MPW_BITS) \
586 _c = 1; \
587 else \
588 break; \
589 } \
590} while (0)
591
dd517851 592extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
593
3c9ede17 594/* --- @mpx_umul@ --- *
595 *
596 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
597 * @const mpw *av, *avl@ = multiplicand vector base and limit
598 * @const mpw *bv, *bvl@ = multiplier vector base and limit
599 *
600 * Returns: ---
601 *
602 * Use: Performs unsigned integer multiplication. If the result
603 * overflows the desination vector, high-order bits are
604 * discarded. The result vector may not overlap the argument
605 * vectors in any way.
606 */
607
608extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
609 const mpw */*av*/, const mpw */*avl*/,
610 const mpw */*bv*/, const mpw */*bvl*/);
611
dd517851 612/* --- @mpx_umuln@ --- *
d03ab969 613 *
dd517851 614 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
615 * @const mpw *av, *avl@ = multiplicand vector base and limit
616 * @mpw m@ = multiplier
617 *
618 * Returns: ---
d03ab969 619 *
620 * Use: Multiplies a multiprecision integer by a single-word value.
621 * The destination and source may be equal. The destination
622 * is completely cleared after use.
623 */
624
625#define MPX_UMULN(dv, dvl, av, avl, m) do { \
626 mpw *_dv = (dv), *_dvl = (dvl); \
627 const mpw *_av = (av), *_avl = (avl); \
628 mpw _c = 0; \
629 mpd _m = (m); \
630 \
631 while (_av < _avl) { \
632 mpd _x; \
633 if (_dv >= _dvl) \
634 break; \
3c9ede17 635 _x = (mpd)_m * (mpd)*_av++ + _c; \
d03ab969 636 *_dv++ = MPW(_x); \
637 _c = _x >> MPW_BITS; \
638 } \
639 if (_dv < _dvl) { \
640 *_dv++ = MPW(_c); \
641 MPX_ZERO(_dv, _dvl); \
642 } \
643} while (0)
644
dd517851 645extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
646 const mpw */*av*/, const mpw */*avl*/, mpw m);
647
648/* --- @mpx_umlan@ --- *
d03ab969 649 *
dd517851 650 * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
651 * @const mpw *av, *avl@ = multiplicand vector base and limit
652 * @mpw m@ = multiplier
653 *
654 * Returns: ---
d03ab969 655 *
656 * Use: Multiplies a multiprecision integer by a single-word value
657 * and adds the result to an accumulator.
658 */
659
660#define MPX_UMLAN(dv, dvl, av, avl, m) do { \
661 mpw *_dv = (dv), *_dvl = (dvl); \
662 const mpw *_av = (av), *_avl = (avl); \
7c13f461 663 mpw _cc = 0; \
d03ab969 664 mpd _m = (m); \
665 \
5bf74dea 666 while (_dv < _dvl && _av < _avl) { \
d03ab969 667 mpd _x; \
7c13f461 668 _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
d03ab969 669 *_dv++ = MPW(_x); \
7c13f461 670 _cc = _x >> MPW_BITS; \
d03ab969 671 } \
7c13f461 672 MPX_UADDN(_dv, _dvl, _cc); \
d03ab969 673} while (0)
674
dd517851 675extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
676 const mpw */*av*/, const mpw */*avl*/, mpw m);
677
3c9ede17 678/* --- @mpx_usqr@ --- *
d03ab969 679 *
680 * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
3c9ede17 681 * @const mpw *av, *av@ = source vector base and limit
d03ab969 682 *
683 * Returns: ---
684 *
3c9ede17 685 * Use: Performs unsigned integer squaring. The result vector must
686 * not overlap the source vector in any way.
d03ab969 687 */
688
3c9ede17 689extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
690 const mpw */*av*/, const mpw */*avl*/);
d03ab969 691
5bf74dea 692/* --- @mpx_udiv@ --- *
693 *
694 * Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
695 * @mpw *rv, *rvl@ = dividend/remainder vector base and limit
696 * @const mpw *dv, *dvl@ = divisor vector base and limit
697 * @mpw *sv, *svl@ = scratch workspace
698 *
699 * Returns: ---
700 *
701 * Use: Performs unsigned integer division. If the result overflows
702 * the quotient vector, high-order bits are discarded. (Clearly
703 * the remainder vector can't overflow.) The various vectors
704 * may not overlap in any way. Yes, I know it's a bit odd
705 * requiring the dividend to be in the result position but it
706 * does make some sense really. The remainder must have
707 * headroom for at least two extra words. The scratch space
708 * must be at least one word larger than the divisor.
709 */
710
711extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
712 const mpw */*dv*/, const mpw */*dvl*/,
713 mpw */*sv*/, mpw */*svl*/);
714
698bd937 715/* --- @mpx_udivn@ --- *
716 *
717 * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap
718 * dividend)
719 * @const mpw *rv, *rvl@ = dividend
720 * @mpw d@ = single-precision divisor
721 *
722 * Returns: Remainder after divison.
723 *
724 * Use: Performs a single-precision division operation.
725 */
726
727extern mpw mpx_udivn(mpw */*qv*/, mpw */*qvl*/,
728 const mpw */*rv*/, const mpw */*rvl*/, mpw /*d*/);
729
5bf74dea 730/*----- Karatsuba multiplication algorithms -------------------------------*/
731
52cdaca9 732/* --- @MPK_THRESH@ --- *
5bf74dea 733 *
734 * This is the limiting length for using Karatsuba algorithms. It's best to
735 * use the simpler classical multiplication method on numbers smaller than
736 * this.
737 */
738
52cdaca9 739#define MPK_THRESH 16
5bf74dea 740
52cdaca9 741/* --- @MPK_SLOP@ --- *
5bf74dea 742 *
743 * The extra number of words required as scratch space by the Karatsuba
744 * routines. This is a (generous) guess, since the actual amount of space
745 * required is proportional to the recursion depth.
746 */
747
52cdaca9 748#define MPK_SLOP 64
5bf74dea 749
a86e33af 750/* --- @mpx_kmul@ --- *
751 *
752 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
753 * @const mpw *av, *avl@ = pointer to first argument
754 * @const mpw *bv, *bvl@ = pointer to second argument
755 * @mpw *sv, *svl@ = pointer to scratch workspace
756 *
757 * Returns: ---
758 *
759 * Use: Multiplies two multiprecision integers using Karatsuba's
760 * algorithm. This is rather faster than traditional long
761 * multiplication (e.g., @mpx_umul@) on large numbers, although
762 * more expensive on small ones.
763 *
764 * The destination and scratch buffers must be twice as large as
5bf74dea 765 * the larger argument. The scratch space must be twice as
766 * large as the larger argument, plus the magic number
52cdaca9 767 * @MPK_SLOP@.
a86e33af 768 */
769
a86e33af 770extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
771 const mpw */*av*/, const mpw */*avl*/,
772 const mpw */*bv*/, const mpw */*bvl*/,
773 mpw */*sv*/, mpw */*svl*/);
774
5bf74dea 775/* --- @mpx_ksqr@ --- *
d03ab969 776 *
5bf74dea 777 * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
778 * @const mpw *av, *avl@ = pointer to first argument
779 * @mpw *sv, *svl@ = pointer to scratch workspace
d03ab969 780 *
781 * Returns: ---
782 *
5bf74dea 783 * Use: Squares a multiprecision integers using something similar to
784 * Karatsuba's multiplication algorithm. This is rather faster
785 * than traditional long multiplication (e.g., @mpx_umul@) on
786 * large numbers, although more expensive on small ones, and
787 * rather simpler than full-blown Karatsuba multiplication.
788 *
789 * The destination must be twice as large as the argument. The
790 * scratch space must be twice as large as the argument, plus
52cdaca9 791 * the magic number @MPK_SLOP@.
d03ab969 792 */
793
5bf74dea 794extern void mpx_ksqr(mpw */*dv*/, mpw */*dvl*/,
795 const mpw */*av*/, const mpw */*avl*/,
3c9ede17 796 mpw */*sv*/, mpw */*svl*/);
d03ab969 797
798/*----- That's all, folks -------------------------------------------------*/
799
800#ifdef __cplusplus
801 }
802#endif
803
804#endif