/* -*-c-*-
*
- * $Id: mpx.h,v 1.2 1999/11/11 17:47:55 mdw Exp $
+ * $Id: mpx.h,v 1.9 1999/12/22 15:49:07 mdw Exp $
*
* Low level multiprecision arithmetic
*
/*----- Revision history --------------------------------------------------*
*
* $Log: mpx.h,v $
+ * Revision 1.9 1999/12/22 15:49:07 mdw
+ * New function for division by a small integer.
+ *
+ * Revision 1.8 1999/12/11 10:57:43 mdw
+ * Karatsuba squaring algorithm.
+ *
+ * Revision 1.7 1999/12/11 01:51:28 mdw
+ * Change Karatsuba parameters slightly.
+ *
+ * Revision 1.6 1999/12/10 23:23:51 mdw
+ * Karatsuba-Ofman multiplication algorithm.
+ *
+ * Revision 1.5 1999/11/20 22:23:27 mdw
+ * Add function versions of some low-level macros with wider use.
+ *
+ * Revision 1.4 1999/11/17 18:04:43 mdw
+ * Add two's complement support. Fix a bug in MPX_UMLAN.
+ *
+ * Revision 1.3 1999/11/13 01:51:29 mdw
+ * Minor interface changes. Should be stable now.
+ *
* Revision 1.2 1999/11/11 17:47:55 mdw
* Minor changes for different `mptypes.h' format.
*
*
*/
-#ifndef MPX_H
-#define MPX_H
+#ifndef CATACOMB_MPX_H
+#define CATACOMB_MPX_H
#ifdef __cplusplus
extern "C" {
#include <string.h>
-#ifndef MPTYPES_H
-# include "mptypes.h"
+#ifndef CATACOMB_MPW_H
+# include "mpw.h"
#endif
-/*----- Basic macros ------------------------------------------------------*/
-
-/* --- @MPW@ --- *
- *
- * Arguments: @x@ = an unsigned value
- *
- * Use: Expands to the value of @x@ masked and typecast to a
- * multiprecision integer word.
- */
-
-#define MPW(x) ((mpw)((x) & MPW_MAX))
-
-/* --- @MPWS@ --- *
- *
- * Arguments: @n@ = number of words
- *
- * Use: Expands to the number of bytes occupied by a given number of
- * words.
- */
-
-#define MPWS(n) ((n) * sizeof(mpw))
-
/*----- General manipulation ----------------------------------------------*/
/* --- @MPX_SHRINK@ --- *
*/
#define MPX_SHRINK(v, vl) do { \
- const mpw *_v = (v), *_vl = (vl); \
- while (_vl > _v && *--_vl) \
- ; \
- (vl) = _vl; \
+ const mpw *_vv = (v), *_vvl = (vl); \
+ while (_vvl > _vv && !_vvl[-1]) \
+ _vvl--; \
+ (vl) = (mpw *)_vvl; \
} while (0)
/* --- @MPX_BITS@ --- *
#define MPX_BITS(b, v, vl) do { \
const mpw *_v = (v), *_vl = (vl); \
+ MPX_SHRINK(_v, _vl); \
if (_v == _vl) \
(b) = 0; \
else { \
/* --- @MPX_OCTETS@ --- *
*
* Arguments: @size_t o@ = result variable
- * @const mpw *v@ = pointer to array of words
- * @size_t len@ = length of the array (from @MPX_LEN@)
+ * @const mpw *v, *vl@ = pointer to array of words
*
* Use: Calculates the number of octets in a multiprecision value.
*/
-#define MPX_OCTETS(o, v, len) do { \
+#define MPX_OCTETS(o, v, vl) do { \
const mpw *_v = (v), *_vl = (vl); \
+ MPX_SHRINK(_v, _vl); \
if (_v == _vl) \
(o) = 0; \
else { \
- _size_t _o = (MPW_BITS / 8) * (_vl - _v - 1); \
+ size_t _o = (MPW_BITS / 8) * (_vl - _v - 1); \
mpw _w = _vl[-1]; \
unsigned _k = MPW_BITS / 2; \
- while (_k > 3) { \
+ while (_k >= 8) { \
if (_w >> _k) { \
_w >>= _k; \
- _o += _k - 3; \
+ _o += _k >> 3; \
} \
_k >>= 1; \
} \
* Use: Copies a multiprecision integer.
*/
-#define MPX_COPY(dv, dvl, av, dvl) do { \
- mpw *_dv = (dv); \
- size_t _dn = (dvl) - _dv; \
- const mpw *_av = (av); \
- size_t _an = (avl) - _av; \
+#define MPX_COPY(dv, dvl, av, avl) do { \
+ mpw *_dv = (dv), *_dvl = (dvl); \
+ size_t _dn = _dvl - _dv; \
+ const mpw *_av = (av), *_avl = (avl); \
+ size_t _an = _avl - _av; \
if (_av == _dv) { \
if (_dvl > _avl) \
- memset(_avl, 0, MPWS(_dn - _an)); \
+ memset(_dv, 0, MPWS(_dn - _an)); \
} else if (_an >= _dn) \
memmove(_dv, _av, MPWS(_dn)); \
else { \
* Use: Zeroes the area between the two vector pointers.
*/
-#define MPX_ZERO(v, vl) { \
+#define MPX_ZERO(v, vl) do { \
mpw *_v = (v), *_vl = (vl); \
- memset(_v, 0, MPWS(_vl - _v)); \
+ if (_v < _vl) \
+ memset(_v, 0, MPWS(_vl - _v)); \
} while (0)
/*----- Loading and storing -----------------------------------------------*/
/* --- @mpx_storel@ --- *
*
* Arguments: @const mpw *v, *vl@ = base and limit of source vector
- * @octet *p@ = pointer to octet array
+ * @void *p@ = pointer to octet array
* @size_t sz@ = size of octet array
*
* Returns: ---
*/
extern void mpx_storel(const mpw */*v*/, const mpw */*vl*/,
- octet */*p*/, size_t /*sz*/);
+ void */*p*/, size_t /*sz*/);
/* --- @mpx_loadl@ --- *
*
* Arguments: @mpw *v, *vl@ = base and limit of destination vector
- * @const octet *p@ = pointer to octet array
+ * @const void *p@ = pointer to octet array
* @size_t sz@ = size of octet array
*
* Returns: ---
*/
extern void mpx_loadl(mpw */*v*/, mpw */*vl*/,
- const octet */*p*/, size_t /*sz*/);
+ const void */*p*/, size_t /*sz*/);
/* --- @mpx_storeb@ --- *
*
* Arguments: @const mpw *v, *vl@ = base and limit of source vector
- * @octet *p@ = pointer to octet array
+ * @void *p@ = pointer to octet array
* @size_t sz@ = size of octet array
*
* Returns: ---
*/
extern void mpx_storeb(const mpw */*v*/, const mpw */*vl*/,
- octet */*p*/, size_t /*sz*/);
+ void */*p*/, size_t /*sz*/);
/* --- @mpx_loadb@ --- *
*
* Arguments: @mpw *v, *vl@ = base and limit of destination vector
- * @const octet *p@ = pointer to octet array
+ * @const void *p@ = pointer to octet array
* @size_t sz@ = size of octet array
*
* Returns: ---
*/
extern void mpx_loadb(mpw */*v*/, mpw */*vl*/,
- const octet */*p*/, size_t /*sz*/);
+ const void */*p*/, size_t /*sz*/);
/*----- Logical shifting --------------------------------------------------*/
/*----- Unsigned arithmetic -----------------------------------------------*/
+/* --- @mpx_2c@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = destination vector
+ * @const mpw *v, *vl@ = source vector
+ *
+ * Returns: ---
+ *
+ * Use: Calculates the two's complement of @v@.
+ */
+
+extern void mpx_2c(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*v*/, const mpw */*vl*/);
+
/* --- @mpx_ucmp@ --- *
*
* Arguments: @const mpw *av, *avl@ = first argument vector base and limit
const mpw */*av*/, const mpw */*avl*/,
const mpw */*bv*/, const mpw */*bvl*/);
+/* --- @mpx_uaddn@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
+ * @mpw n@ = other addend
+ *
+ * Returns: ---
+ *
+ * Use: Adds a small integer to a multiprecision number.
+ */
+
+#define MPX_UADDN(dv, dvl, n) do { \
+ mpw *_ddv = (dv), *_ddvl = (dvl); \
+ mpw _c = (n); \
+ \
+ while (_c && _ddv < _ddvl) { \
+ mpd _x = (mpd)*_ddv + (mpd)_c; \
+ *_ddv++ = MPW(_x); \
+ _c = _x >> MPW_BITS; \
+ } \
+} while (0)
+
+extern void mpx_uaddn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
+
/* --- @mpx_usub@ --- *
*
* Arguments: @mpw *dv, *dvl@ = destination vector base and limit
* Use: Performs unsigned integer subtraction. If the result
* overflows the destination vector, high-order bits are
* discarded. This means that two's complement subtraction
- * happens more or less for free, althuogh that's more a side-
+ * happens more or less for free, although that's more a side-
* effect than anything else. The result vector may be equal to
* either or both source vectors, but may not otherwise overlap
* them.
const mpw */*av*/, const mpw */*avl*/,
const mpw */*bv*/, const mpw */*bvl*/);
-/* --- @MPX_UMULN@ --- *
+/* --- @mpx_usubn@ --- *
*
- * Arguments: @dv, dvl@ = destination vector base and limit
- * @av, avl@ = multiplicand vector base and limit
- * @m@ = multiplier
+ * Arguments: @mpw *dv, *dvl@ = source and destination base and limit
+ * @n@ = subtrahend
+ *
+ * Returns: ---
+ *
+ * Use: Subtracts a small integer from a multiprecision number.
+ */
+
+#define MPX_USUBN(dv, dvl, n) do { \
+ mpw *_ddv = (dv), *_ddvl = (dvl); \
+ mpw _c = (n); \
+ \
+ while (_ddv < _ddvl) { \
+ mpd _x = (mpd)*_ddv - (mpd)_c; \
+ *_ddv++ = MPW(_x); \
+ if (_x >> MPW_BITS) \
+ _c = 1; \
+ else \
+ break; \
+ } \
+} while (0)
+
+extern void mpx_usubn(mpw */*dv*/, mpw */*dvl*/, mpw /*n*/);
+
+/* --- @mpx_umul@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
+ * @const mpw *av, *avl@ = multiplicand vector base and limit
+ * @const mpw *bv, *bvl@ = multiplier vector base and limit
+ *
+ * Returns: ---
+ *
+ * Use: Performs unsigned integer multiplication. If the result
+ * overflows the desination vector, high-order bits are
+ * discarded. The result vector may not overlap the argument
+ * vectors in any way.
+ */
+
+extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/,
+ const mpw */*bv*/, const mpw */*bvl*/);
+
+/* --- @mpx_umuln@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = destination vector base and limit
+ * @const mpw *av, *avl@ = multiplicand vector base and limit
+ * @mpw m@ = multiplier
+ *
+ * Returns: ---
*
* Use: Multiplies a multiprecision integer by a single-word value.
* The destination and source may be equal. The destination
mpd _x; \
if (_dv >= _dvl) \
break; \
- _x = _m * *_av++ + c; \
+ _x = (mpd)_m * (mpd)*_av++ + _c; \
*_dv++ = MPW(_x); \
_c = _x >> MPW_BITS; \
} \
} \
} while (0)
-/* --- @MPX_UMLAN@ --- *
+extern void mpx_umuln(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/, mpw m);
+
+/* --- @mpx_umlan@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = destination/accumulator base and limit
+ * @const mpw *av, *avl@ = multiplicand vector base and limit
+ * @mpw m@ = multiplier
*
- * Arguments: @dv, dvl@ = destination/accumulator vector base and limit
- * @av, avl@ = multiplicand vector base and limit
- * @m@ = multiplier
+ * Returns: ---
*
* Use: Multiplies a multiprecision integer by a single-word value
* and adds the result to an accumulator.
#define MPX_UMLAN(dv, dvl, av, avl, m) do { \
mpw *_dv = (dv), *_dvl = (dvl); \
const mpw *_av = (av), *_avl = (avl); \
- mpw _c = 0; \
+ mpw _cc = 0; \
mpd _m = (m); \
\
- while (_av < _avl) { \
+ while (_dv < _dvl && _av < _avl) { \
mpd _x; \
- if (_dv >= _dvl) \
- break; \
- _x = *_dv + _m * *_av++ + _c; \
+ _x = (mpd)*_dv + (mpd)_m * (mpd)*_av++ + _cc; \
*_dv++ = MPW(_x); \
- _c = _x >> MPW_BITS; \
- } \
- if (_dv < _dvl) { \
- *_dv++ = MPW(_c); \
- MPX_ZERO(_dv, _dvl); \
+ _cc = _x >> MPW_BITS; \
} \
+ MPX_UADDN(_dv, _dvl, _cc); \
} while (0)
-/* --- @mpx_umul@ --- *
+extern void mpx_umlan(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/, mpw m);
+
+/* --- @mpx_usqr@ --- *
*
* Arguments: @mpw *dv, *dvl@ = destination vector base and limit
- * @const mpw *av, *avl@ = multiplicand vector base and limit
- * @const mpw *bv, *bvl@ = multiplier vector base and limit
+ * @const mpw *av, *av@ = source vector base and limit
*
* Returns: ---
*
- * Use: Performs unsigned integer multiplication. If the result
- * overflows the desination vector, high-order bits are
- * discarded. The result vector may not overlap the argument
- * vectors in any way.
+ * Use: Performs unsigned integer squaring. The result vector must
+ * not overlap the source vector in any way.
*/
-extern void mpx_umul(mpw */*dv*/, mpw */*dvl*/,
- const mpw */*av*/, const mpw */*avl*/,
- const mpw */*bv*/, const mpw */*bvl*/);
+extern void mpx_usqr(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/);
/* --- @mpx_udiv@ --- *
*
* Arguments: @mpw *qv, *qvl@ = quotient vector base and limit
* @mpw *rv, *rvl@ = dividend/remainder vector base and limit
* @const mpw *dv, *dvl@ = divisor vector base and limit
+ * @mpw *sv, *svl@ = scratch workspace
*
* Returns: ---
*
* the remainder vector can't overflow.) The various vectors
* may not overlap in any way. Yes, I know it's a bit odd
* requiring the dividend to be in the result position but it
- * does make some sense really.
+ * does make some sense really. The remainder must have
+ * headroom for at least two extra words. The scratch space
+ * must be at least one word larger than the divisor.
*/
extern void mpx_udiv(mpw */*qv*/, mpw */*qvl*/, mpw */*rv*/, mpw */*rvl*/,
- const mpw */*dv*/, const mpw */*dvl*/);
+ const mpw */*dv*/, const mpw */*dvl*/,
+ mpw */*sv*/, mpw */*svl*/);
+
+/* --- @mpx_udivn@ --- *
+ *
+ * Arguments: @mpw *qv, *qvl@ = storage for the quotient (may overlap
+ * dividend)
+ * @const mpw *rv, *rvl@ = dividend
+ * @mpw d@ = single-precision divisor
+ *
+ * Returns: Remainder after divison.
+ *
+ * Use: Performs a single-precision division operation.
+ */
+
+extern mpw mpx_udivn(mpw */*qv*/, mpw */*qvl*/,
+ const mpw */*rv*/, const mpw */*rvl*/, mpw /*d*/);
+
+/*----- Karatsuba multiplication algorithms -------------------------------*/
+
+/* --- @KARATSUBA_CUTOFF@ --- *
+ *
+ * This is the limiting length for using Karatsuba algorithms. It's best to
+ * use the simpler classical multiplication method on numbers smaller than
+ * this.
+ */
+
+#define KARATSUBA_CUTOFF 16
+
+/* --- @KARATSUBA_SLOP@ --- *
+ *
+ * The extra number of words required as scratch space by the Karatsuba
+ * routines. This is a (generous) guess, since the actual amount of space
+ * required is proportional to the recursion depth.
+ */
+
+#define KARATSUBA_SLOP 64
+
+/* --- @mpx_kmul@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
+ * @const mpw *av, *avl@ = pointer to first argument
+ * @const mpw *bv, *bvl@ = pointer to second argument
+ * @mpw *sv, *svl@ = pointer to scratch workspace
+ *
+ * Returns: ---
+ *
+ * Use: Multiplies two multiprecision integers using Karatsuba's
+ * algorithm. This is rather faster than traditional long
+ * multiplication (e.g., @mpx_umul@) on large numbers, although
+ * more expensive on small ones.
+ *
+ * The destination and scratch buffers must be twice as large as
+ * the larger argument. The scratch space must be twice as
+ * large as the larger argument, plus the magic number
+ * @KARATSUBA_SLOP@.
+ */
+
+extern void mpx_kmul(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/,
+ const mpw */*bv*/, const mpw */*bvl*/,
+ mpw */*sv*/, mpw */*svl*/);
+
+/* --- @mpx_ksqr@ --- *
+ *
+ * Arguments: @mpw *dv, *dvl@ = pointer to destination buffer
+ * @const mpw *av, *avl@ = pointer to first argument
+ * @mpw *sv, *svl@ = pointer to scratch workspace
+ *
+ * Returns: ---
+ *
+ * Use: Squares a multiprecision integers using something similar to
+ * Karatsuba's multiplication algorithm. This is rather faster
+ * than traditional long multiplication (e.g., @mpx_umul@) on
+ * large numbers, although more expensive on small ones, and
+ * rather simpler than full-blown Karatsuba multiplication.
+ *
+ * The destination must be twice as large as the argument. The
+ * scratch space must be twice as large as the argument, plus
+ * the magic number @KARATSUBA_SLOP@.
+ */
+
+extern void mpx_ksqr(mpw */*dv*/, mpw */*dvl*/,
+ const mpw */*av*/, const mpw */*avl*/,
+ mpw */*sv*/, mpw */*svl*/);
/*----- That's all, folks -------------------------------------------------*/