The internal arithmetic used to be done on a big-endian representation
of the integers, while the external format was defined to be
little-endian. This causes a bunch of indexing operations to be more
complicated than they really need to be, and, more importantly,
introduces an unfortunate mismatch between the two different strata of
the bignum library, which will be annoying later. Finally, and least
important of all, in my very brief testing, the bignum test runs about
4.7% faster with this new version.
Signed-off-by: Mark Wooding <mdw@distorted.org.uk>
/*
* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
/*
* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
- * big-endian arrays of 'len' BignumInts. Returns a BignumInt carried
+ * little-endian arrays of 'len' BignumInts. Returns a BignumInt carried
* off the top.
*/
static BignumInt internal_add(const BignumInt *a, const BignumInt *b,
* off the top.
*/
static BignumInt internal_add(const BignumInt *a, const BignumInt *b,
int i;
BignumDblInt carry = 0;
int i;
BignumDblInt carry = 0;
- for (i = len-1; i >= 0; i--) {
+ for (i = 0; i < len; i++) {
carry += (BignumDblInt)a[i] + b[i];
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
carry += (BignumDblInt)a[i] + b[i];
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
/*
* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
/*
* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
- * all big-endian arrays of 'len' BignumInts. Any borrow from the top
+ * all little-endian arrays of 'len' BignumInts. Any borrow from the top
* is ignored.
*/
static void internal_sub(const BignumInt *a, const BignumInt *b,
* is ignored.
*/
static void internal_sub(const BignumInt *a, const BignumInt *b,
int i;
BignumDblInt carry = 1;
int i;
BignumDblInt carry = 1;
- for (i = len-1; i >= 0; i--) {
+ for (i = 0; i < len; i++) {
carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
printf("a1,a0 = 0x");
for (i = 0; i < len; i++) {
if (i == toplen) printf(", 0x");
printf("a1,a0 = 0x");
for (i = 0; i < len; i++) {
if (i == toplen) printf(", 0x");
- printf("%0*x", BIGNUM_INT_BITS/4, a[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, a[len - 1 - i]);
}
printf("\n");
printf("b1,b0 = 0x");
for (i = 0; i < len; i++) {
if (i == toplen) printf(", 0x");
}
printf("\n");
printf("b1,b0 = 0x");
for (i = 0; i < len; i++) {
if (i == toplen) printf(", 0x");
- printf("%0*x", BIGNUM_INT_BITS/4, b[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, b[len - 1 - i]);
}
printf("\n");
#endif
/* a_1 b_1 */
}
printf("\n");
#endif
/* a_1 b_1 */
- internal_mul(a, b, c, toplen, scratch);
+ internal_mul(a + botlen, b + botlen, c + 2*botlen, toplen, scratch);
#ifdef KARA_DEBUG
printf("a1b1 = 0x");
for (i = 0; i < 2*toplen; i++) {
#ifdef KARA_DEBUG
printf("a1b1 = 0x");
for (i = 0; i < 2*toplen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - 1 - i]);
}
printf("\n");
#endif
/* a_0 b_0 */
}
printf("\n");
#endif
/* a_0 b_0 */
- internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch);
+ internal_mul(a, b, c, botlen, scratch);
#ifdef KARA_DEBUG
printf("a0b0 = 0x");
for (i = 0; i < 2*botlen; i++) {
#ifdef KARA_DEBUG
printf("a0b0 = 0x");
for (i = 0; i < 2*botlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, c[2*botlen - 1 - i]);
- /* Zero padding. midlen exceeds toplen by at most 2, so just
- * zero the first two words of each input and the rest will be
- * copied over. */
- scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
+ /* Zero padding. botlen exceeds toplen by at most 1, and we'll set
+ * the extra carry explicitly below, so we only need to zero at most
+ * one of the top words here.
+ */
+ scratch[midlen - 2] = scratch[2*midlen - 2] = 0;
for (i = 0; i < toplen; i++) {
for (i = 0; i < toplen; i++) {
- scratch[midlen - toplen + i] = a[i]; /* a_1 */
- scratch[2*midlen - toplen + i] = b[i]; /* b_1 */
+ scratch[i] = a[i + botlen]; /* a_1 */
+ scratch[midlen + i] = b[i + botlen]; /* b_1 */
}
/* compute a_1 + a_0 */
}
/* compute a_1 + a_0 */
- scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);
+ scratch[midlen - 1] = internal_add(scratch, a, scratch, botlen);
#ifdef KARA_DEBUG
printf("a1plusa0 = 0x");
for (i = 0; i < midlen; i++) {
#ifdef KARA_DEBUG
printf("a1plusa0 = 0x");
for (i = 0; i < midlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen - 1 - i]);
}
printf("\n");
#endif
/* compute b_1 + b_0 */
}
printf("\n");
#endif
/* compute b_1 + b_0 */
- scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,
- scratch+midlen+1, botlen);
+ scratch[2*midlen - 1] = internal_add(scratch+midlen, b,
+ scratch+midlen, botlen);
#ifdef KARA_DEBUG
printf("b1plusb0 = 0x");
for (i = 0; i < midlen; i++) {
#ifdef KARA_DEBUG
printf("b1plusb0 = 0x");
for (i = 0; i < midlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
#ifdef KARA_DEBUG
printf("a1plusa0timesb1plusb0 = 0x");
for (i = 0; i < 2*midlen; i++) {
#ifdef KARA_DEBUG
printf("a1plusa0timesb1plusb0 = 0x");
for (i = 0; i < 2*midlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
* sum of the outer two coefficients, to subtract from that
* product to obtain the middle one.
*/
* sum of the outer two coefficients, to subtract from that
* product to obtain the middle one.
*/
- scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
+ scratch[2*botlen - 2] = scratch[2*botlen - 1] = 0;
for (i = 0; i < 2*toplen; i++)
for (i = 0; i < 2*toplen; i++)
- scratch[2*midlen - 2*toplen + i] = c[i];
- scratch[1] = internal_add(scratch+2, c + 2*toplen,
- scratch+2, 2*botlen);
+ scratch[i] = c[2*botlen + i];
+ scratch[2*botlen] = internal_add(scratch, c, scratch, 2*botlen);
+ scratch[2*botlen + 1] = 0;
#ifdef KARA_DEBUG
printf("a1b1plusa0b0 = 0x");
for (i = 0; i < 2*midlen; i++) {
#ifdef KARA_DEBUG
printf("a1b1plusa0b0 = 0x");
for (i = 0; i < 2*midlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
- internal_sub(scratch + 2*midlen, scratch,
- scratch + 2*midlen, 2*midlen);
+ internal_sub(scratch + 2*midlen, scratch, scratch, 2*midlen);
#ifdef KARA_DEBUG
printf("a1b0plusa0b1 = 0x");
for (i = 0; i < 2*midlen; i++) {
#ifdef KARA_DEBUG
printf("a1b0plusa0b1 = 0x");
for (i = 0; i < 2*midlen; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
* further up the output, but we can be sure it won't
* propagate right the way off the top.
*/
* further up the output, but we can be sure it won't
* propagate right the way off the top.
*/
- carry = internal_add(c + 2*len - botlen - 2*midlen,
- scratch + 2*midlen,
- c + 2*len - botlen - 2*midlen, 2*midlen);
- i = 2*len - botlen - 2*midlen - 1;
+ carry = internal_add(c + botlen, scratch, c + botlen, 2*midlen);
+ i = botlen + 2*midlen;
carry += c[i];
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
carry += c[i];
c[i] = (BignumInt)carry;
carry >>= BIGNUM_INT_BITS;
}
#ifdef KARA_DEBUG
printf("ab = 0x");
for (i = 0; i < 2*len; i++) {
}
#ifdef KARA_DEBUG
printf("ab = 0x");
for (i = 0; i < 2*len; i++) {
- printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
+ printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - i]);
int i;
BignumInt carry;
BignumDblInt t;
int i;
BignumInt carry;
BignumDblInt t;
- const BignumInt *ap, *bp;
+ const BignumInt *ap, *alim = a + len, *bp, *blim = b + len;
for (i = 0; i < 2 * len; i++)
c[i] = 0;
for (i = 0; i < 2 * len; i++)
c[i] = 0;
- for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
+ for (cps = c, ap = a; ap < alim; ap++, cps++) {
- for (cp = cps, bp = b + len; cp--, bp-- > b ;) {
+ for (cp = cps, bp = b, i = blim - bp; i--; bp++, cp++) {
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
- internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen,
- scratch + 2*len);
+ internal_mul(a, b, scratch + 2*toplen, botlen, scratch + 2*len);
- internal_mul_low(a, b + len - toplen, scratch + toplen, toplen,
+ internal_mul_low(a + botlen, b, scratch + toplen, toplen,
scratch + 2*len);
/* a_0 b_1 */
scratch + 2*len);
/* a_0 b_1 */
- internal_mul_low(a + len - toplen, b, scratch, toplen,
- scratch + 2*len);
+ internal_mul_low(a, b + botlen, scratch, toplen, scratch + 2*len);
/* Copy the bottom half of the big coefficient into place */
for (i = 0; i < botlen; i++)
/* Copy the bottom half of the big coefficient into place */
for (i = 0; i < botlen; i++)
- c[toplen + i] = scratch[2*toplen + botlen + i];
+ c[i] = scratch[2*toplen + i];
/* Add the two small coefficients, throwing away the returned carry */
internal_add(scratch, scratch + toplen, scratch, toplen);
/* And add that to the large coefficient, leaving the result in c. */
/* Add the two small coefficients, throwing away the returned carry */
internal_add(scratch, scratch + toplen, scratch, toplen);
/* And add that to the large coefficient, leaving the result in c. */
- internal_add(scratch, scratch + 2*toplen + botlen - toplen,
- c, toplen);
+ internal_add(scratch, scratch + 2*toplen + botlen,
+ c + botlen, toplen);
} else {
int i;
BignumInt carry;
BignumDblInt t;
} else {
int i;
BignumInt carry;
BignumDblInt t;
- const BignumInt *ap, *bp;
- BignumInt *cp, *cps;
+ const BignumInt *ap, *alim = a + len, *bp;
+ BignumInt *cp, *cps, *clim = c + len;
/*
* Multiply in the ordinary O(N^2) way.
/*
* Multiply in the ordinary O(N^2) way.
for (i = 0; i < len; i++)
c[i] = 0;
for (i = 0; i < len; i++)
c[i] = 0;
- for (cps = c + len, ap = a + len; ap-- > a; cps--) {
+ for (cps = c, ap = a; ap < alim; ap++, cps++) {
- for (cp = cps, bp = b + len; bp--, cp-- > c ;) {
+ for (cp = cps, bp = b, i = clim - cp; i--; bp++, cp++) {
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
t = (MUL_WORD(*ap, *bp) + carry) + *cp;
*cp = (BignumInt) t;
carry = (BignumInt)(t >> BIGNUM_INT_BITS);
- * Montgomery reduction. Expects x to be a big-endian array of 2*len
+ * Montgomery reduction. Expects x to be a little-endian array of 2*len
* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
* x' < n.
*
* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
* x' < n.
*
- * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts
+ * 'n' and 'mninv' should be little-endian arrays of 'len' BignumInts
* each, containing respectively n and the multiplicative inverse of
* -n mod r.
*
* each, containing respectively n and the multiplicative inverse of
* -n mod r.
*
* that mn is congruent to -x mod r. Hence, mn+x is an exact
* multiple of r, and is also (obviously) congruent to x mod n.
*/
* that mn is congruent to -x mod r. Hence, mn+x is an exact
* multiple of r, and is also (obviously) congruent to x mod n.
*/
- internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len);
+ internal_mul_low(x, mninv, tmp, len, tmp + 3*len);
/*
* Compute t = (mn+x)/r in ordinary, non-modular, integer
/*
* Compute t = (mn+x)/r in ordinary, non-modular, integer
internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
carry = internal_add(x, tmp+len, x, 2*len);
for (i = 0; i < len; i++)
internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
carry = internal_add(x, tmp+len, x, 2*len);
for (i = 0; i < len; i++)
- x[len + i] = x[i], x[i] = 0;
+ x[i] = x[len + i], x[len + i] = 0;
/*
* Reduce t mod n. This doesn't require a full-on division by n,
/*
* Reduce t mod n. This doesn't require a full-on division by n,
* + yielding 0 <= (mn+x)/r < 2n as required.
*/
if (!carry) {
* + yielding 0 <= (mn+x)/r < 2n as required.
*/
if (!carry) {
- for (i = 0; i < len; i++)
- if (x[len + i] != n[i])
+ for (i = len; i-- > 0; )
+ if (x[i] != n[i])
- if (carry || i >= len || x[len + i] > n[i])
- internal_sub(x+len, n, x+len, len);
+ if (carry || i < 0 || x[i] > n[i])
+ internal_sub(x, n, x, len);
}
static void internal_add_shifted(BignumInt *number,
}
static void internal_add_shifted(BignumInt *number,
* Compute a = a % m.
* Input in first alen words of a and first mlen words of m.
* Output in first alen words of a
* Compute a = a % m.
* Input in first alen words of a and first mlen words of m.
* Output in first alen words of a
- * (of which first alen-mlen words will be zero).
+ * (of which last alen-mlen words will be zero).
* The MSW of m MUST have its high bit set.
* The MSW of m MUST have its high bit set.
- * Quotient is accumulated in the `quotient' array, which is a Bignum
- * rather than the internal bigendian format. Quotient parts are shifted
- * left by `qshift' before adding into quot.
+ * Quotient is accumulated in the `quotient' array. Quotient parts
+ * are shifted left by `qshift' before adding into quot.
*/
static void internal_mod(BignumInt *a, int alen,
BignumInt *m, int mlen,
*/
static void internal_mod(BignumInt *a, int alen,
BignumInt *m, int mlen,
{
BignumInt m0, m1;
unsigned int h;
{
BignumInt m0, m1;
unsigned int h;
- for (i = 0; i <= alen - mlen; i++) {
+ for (i = alen, h = 0; i-- >= mlen; ) {
BignumDblInt t;
unsigned int q, r, c, ai1;
BignumDblInt t;
unsigned int q, r, c, ai1;
- if (i == 0) {
- h = 0;
- } else {
- h = a[i - 1];
- a[i - 1] = 0;
- }
-
- if (i == alen - 1)
- ai1 = 0;
- else
- ai1 = a[i + 1];
+ if (i)
+ ai1 = a[i - 1];
+ else
+ ai1 = 0;
/* Find q = h:a[i] / m0 */
if (h >= m0) {
/* Find q = h:a[i] / m0 */
if (h >= m0) {
DIVMOD_WORD(q, r, h, tmplo, m0);
/* Refine our estimate of q by looking at
DIVMOD_WORD(q, r, h, tmplo, m0);
/* Refine our estimate of q by looking at
- h:a[i]:a[i+1] / m0:m1 */
+ h:a[i]:a[i-1] / m0:m1 */
t = MUL_WORD(m1, q);
if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
q--;
t = MUL_WORD(m1, q);
if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
q--;
/* Subtract q * m from a[i...] */
c = 0;
/* Subtract q * m from a[i...] */
c = 0;
- for (k = mlen - 1; k >= 0; k--) {
+ for (k = 0; k < mlen; k++) {
t = MUL_WORD(q, m[k]);
t += c;
c = (unsigned)(t >> BIGNUM_INT_BITS);
t = MUL_WORD(q, m[k]);
t += c;
c = (unsigned)(t >> BIGNUM_INT_BITS);
- if ((BignumInt) t > a[i + k])
+ if ((BignumInt) t > a[j + k])
- a[i + k] -= (BignumInt) t;
+ a[j + k] -= (BignumInt) t;
}
/* Add back m in case of borrow */
if (c != h) {
t = 0;
}
/* Add back m in case of borrow */
if (c != h) {
t = 0;
- for (k = mlen - 1; k >= 0; k--) {
+ for (k = 0; k < mlen; k++) {
- t += a[i + k];
- a[i + k] = (BignumInt) t;
+ t += a[j + k];
+ a[j + k] = (BignumInt) t;
t = t >> BIGNUM_INT_BITS;
}
q--;
}
t = t >> BIGNUM_INT_BITS;
}
q--;
}
- internal_add_shifted(quot, q, qshift + BIGNUM_INT_BITS * (alen - mlen - i));
+ internal_add_shifted(quot, q,
+ qshift + BIGNUM_INT_BITS * (i + 1 - mlen));
+
+ if (i >= mlen) {
+ h = a[i];
+ a[i] = 0;
+ }
+static void shift_left(BignumInt *x, int xlen, int shift)
+{
+ int i;
+
+ if (!shift)
+ return;
+ for (i = xlen; --i > 0; )
+ x[i] = (x[i] << shift) | (x[i - 1] >> (BIGNUM_INT_BITS - shift));
+ x[0] = x[0] << shift;
+}
+
+static void shift_right(BignumInt *x, int xlen, int shift)
+{
+ int i;
+
+ if (!shift || !xlen)
+ return;
+ xlen--;
+ for (i = 0; i < xlen; i++)
+ x[i] = (x[i] >> shift) | (x[i + 1] << (BIGNUM_INT_BITS - shift));
+ x[i] = x[i] >> shift;
+}
+
/*
* Compute (base ^ exp) % mod, the pedestrian way.
*/
/*
* Compute (base ^ exp) % mod, the pedestrian way.
*/
base = bigmod(base_in, mod);
/* Allocate m of size mlen, copy mod to m */
base = bigmod(base_in, mod);
/* Allocate m of size mlen, copy mod to m */
- /* We use big endian internally */
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
- m[j] = mod[mod[0] - j];
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
- if ((m[0] << mshift) & BIGNUM_TOP_BIT)
+ if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
- if (mshift) {
- for (i = 0; i < mlen - 1; i++)
- m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
- m[mlen - 1] = m[mlen - 1] << mshift;
- }
+ if (mshift)
+ shift_left(m, mlen, mshift);
/* Allocate n of size mlen, copy base to n */
n = snewn(mlen, BignumInt);
/* Allocate n of size mlen, copy base to n */
n = snewn(mlen, BignumInt);
- i = mlen - base[0];
- for (j = 0; j < i; j++)
- n[j] = 0;
- for (j = 0; j < (int)base[0]; j++)
- n[i + j] = base[base[0] - j];
+ for (i = 0; i < (int)base[0]; i++)
+ n[i] = base[i + 1];
+ for (; i < mlen; i++)
+ n[i] = 0;
/* Allocate a and b of size 2*mlen. Set a = 1 */
a = snewn(2 * mlen, BignumInt);
b = snewn(2 * mlen, BignumInt);
/* Allocate a and b of size 2*mlen. Set a = 1 */
a = snewn(2 * mlen, BignumInt);
b = snewn(2 * mlen, BignumInt);
- for (i = 0; i < 2 * mlen; i++)
+ a[0] = 1;
+ for (i = 1; i < 2 * mlen; i++)
/* Scratch space for multiplies */
scratchlen = mul_compute_scratch(mlen);
/* Scratch space for multiplies */
scratchlen = mul_compute_scratch(mlen);
/* Main computation */
while (i < (int)exp[0]) {
while (j >= 0) {
/* Main computation */
while (i < (int)exp[0]) {
while (j >= 0) {
- internal_mul(a + mlen, a + mlen, b, mlen, scratch);
+ internal_mul(a, a, b, mlen, scratch);
internal_mod(b, mlen * 2, m, mlen, NULL, 0);
if ((exp[exp[0] - i] & (1 << j)) != 0) {
internal_mod(b, mlen * 2, m, mlen, NULL, 0);
if ((exp[exp[0] - i] & (1 << j)) != 0) {
- internal_mul(b + mlen, n, a, mlen, scratch);
+ internal_mul(b, n, a, mlen, scratch);
internal_mod(a, mlen * 2, m, mlen, NULL, 0);
} else {
BignumInt *t;
internal_mod(a, mlen * 2, m, mlen, NULL, 0);
} else {
BignumInt *t;
/* Fixup result in case the modulus was shifted */
if (mshift) {
/* Fixup result in case the modulus was shifted */
if (mshift) {
- for (i = mlen - 1; i < 2 * mlen - 1; i++)
- a[i] = (a[i] << mshift) | (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
- a[2 * mlen - 1] = a[2 * mlen - 1] << mshift;
- internal_mod(a, mlen * 2, m, mlen, NULL, 0);
- for (i = 2 * mlen - 1; i >= mlen; i--)
- a[i] = (a[i] >> mshift) | (a[i - 1] << (BIGNUM_INT_BITS - mshift));
+ shift_left(a, mlen + 1, mshift);
+ internal_mod(a, mlen + 1, m, mlen, NULL, 0);
+ shift_right(a, mlen, mshift);
}
/* Copy result to buffer */
result = newbn(mod[0]);
for (i = 0; i < mlen; i++)
}
/* Copy result to buffer */
result = newbn(mod[0]);
for (i = 0; i < mlen; i++)
- result[result[0] - i] = a[i + mlen];
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
freebn(r); /* won't need this any more */
/*
freebn(r); /* won't need this any more */
/*
- * Set up internal arrays of the right lengths, in big-endian
- * format, containing the base, the modulus, and the modulus's
- * inverse.
+ * Set up internal arrays of the right lengths containing the base,
+ * the modulus, and the modulus's inverse.
*/
n = snewn(len, BignumInt);
for (j = 0; j < len; j++)
*/
n = snewn(len, BignumInt);
for (j = 0; j < len; j++)
- n[len - 1 - j] = mod[j + 1];
mninv = snewn(len, BignumInt);
for (j = 0; j < len; j++)
mninv = snewn(len, BignumInt);
for (j = 0; j < len; j++)
- mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0);
+ mninv[j] = (j < (int)inv[0] ? inv[j + 1] : 0);
freebn(inv); /* we don't need this copy of it any more */
/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
x = snewn(len, BignumInt);
freebn(inv); /* we don't need this copy of it any more */
/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
x = snewn(len, BignumInt);
/* x = snewn(len, BignumInt); */ /* already done above */
for (j = 0; j < len; j++)
/* x = snewn(len, BignumInt); */ /* already done above */
for (j = 0; j < len; j++)
- x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0);
+ x[j] = (j < (int)base[0] ? base[j + 1] : 0);
freebn(base); /* we don't need this copy of it any more */
a = snewn(2*len, BignumInt);
b = snewn(2*len, BignumInt);
for (j = 0; j < len; j++)
freebn(base); /* we don't need this copy of it any more */
a = snewn(2*len, BignumInt);
b = snewn(2*len, BignumInt);
for (j = 0; j < len; j++)
- a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0);
+ a[j] = (j < (int)rn[0] ? rn[j + 1] : 0);
freebn(rn);
/* Scratch space for multiplies */
freebn(rn);
/* Scratch space for multiplies */
/* Main computation */
while (i < (int)exp[0]) {
while (j >= 0) {
/* Main computation */
while (i < (int)exp[0]) {
while (j >= 0) {
- internal_mul(a + len, a + len, b, len, scratch);
+ internal_mul(a, a, b, len, scratch);
monty_reduce(b, n, mninv, scratch, len);
if ((exp[exp[0] - i] & (1 << j)) != 0) {
monty_reduce(b, n, mninv, scratch, len);
if ((exp[exp[0] - i] & (1 << j)) != 0) {
- internal_mul(b + len, x, a, len, scratch);
+ internal_mul(b, x, a, len, scratch);
monty_reduce(a, n, mninv, scratch, len);
} else {
BignumInt *t;
monty_reduce(a, n, mninv, scratch, len);
} else {
BignumInt *t;
/* Copy result to buffer */
result = newbn(mod[0]);
for (i = 0; i < len; i++)
/* Copy result to buffer */
result = newbn(mod[0]);
for (i = 0; i < len; i++)
- result[result[0] - i] = a[i + len];
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
Bignum result;
/* Allocate m of size mlen, copy mod to m */
Bignum result;
/* Allocate m of size mlen, copy mod to m */
- /* We use big endian internally */
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
- m[j] = mod[mod[0] - j];
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
- if ((m[0] << mshift) & BIGNUM_TOP_BIT)
+ if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
- if (mshift) {
- for (i = 0; i < mlen - 1; i++)
- m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
- m[mlen - 1] = m[mlen - 1] << mshift;
- }
+ if (mshift)
+ shift_left(m, mlen, mshift);
pqlen = (p[0] > q[0] ? p[0] : q[0]);
pqlen = (p[0] > q[0] ? p[0] : q[0]);
/* Allocate n of size pqlen, copy p to n */
n = snewn(pqlen, BignumInt);
/* Allocate n of size pqlen, copy p to n */
n = snewn(pqlen, BignumInt);
- i = pqlen - p[0];
- for (j = 0; j < i; j++)
- n[j] = 0;
- for (j = 0; j < (int)p[0]; j++)
- n[i + j] = p[p[0] - j];
+ for (i = 0; i < (int)p[0]; i++)
+ n[i] = p[i + 1];
+ for (; i < pqlen; i++)
+ n[i] = 0;
/* Allocate o of size pqlen, copy q to o */
o = snewn(pqlen, BignumInt);
/* Allocate o of size pqlen, copy q to o */
o = snewn(pqlen, BignumInt);
- i = pqlen - q[0];
- for (j = 0; j < i; j++)
- o[j] = 0;
- for (j = 0; j < (int)q[0]; j++)
- o[i + j] = q[q[0] - j];
+ for (i = 0; i < (int)q[0]; i++)
+ o[i] = q[i + 1];
+ for (; i < pqlen; i++)
+ o[i] = 0;
/* Allocate a of size 2*pqlen for result */
a = snewn(2 * pqlen, BignumInt);
/* Allocate a of size 2*pqlen for result */
a = snewn(2 * pqlen, BignumInt);
/* Fixup result in case the modulus was shifted */
if (mshift) {
/* Fixup result in case the modulus was shifted */
if (mshift) {
- for (i = 2 * pqlen - mlen - 1; i < 2 * pqlen - 1; i++)
- a[i] = (a[i] << mshift) | (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
- a[2 * pqlen - 1] = a[2 * pqlen - 1] << mshift;
- internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
- for (i = 2 * pqlen - 1; i >= 2 * pqlen - mlen; i--)
- a[i] = (a[i] >> mshift) | (a[i - 1] << (BIGNUM_INT_BITS - mshift));
+ shift_left(a, mlen + 1, mshift);
+ internal_mod(a, mlen + 1, m, mlen, NULL, 0);
+ shift_right(a, mlen, mshift);
}
/* Copy result to buffer */
rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
result = newbn(rlen);
for (i = 0; i < rlen; i++)
}
/* Copy result to buffer */
rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
result = newbn(rlen);
for (i = 0; i < rlen; i++)
- result[result[0] - i] = a[i + 2 * pqlen - rlen];
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
while (result[0] > 1 && result[result[0]] == 0)
result[0]--;
int plen, mlen, i, j;
/* Allocate m of size mlen, copy mod to m */
int plen, mlen, i, j;
/* Allocate m of size mlen, copy mod to m */
- /* We use big endian internally */
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
mlen = mod[0];
m = snewn(mlen, BignumInt);
for (j = 0; j < mlen; j++)
- m[j] = mod[mod[0] - j];
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
/* Shift m left to make msb bit set */
for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
- if ((m[0] << mshift) & BIGNUM_TOP_BIT)
+ if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
- if (mshift) {
- for (i = 0; i < mlen - 1; i++)
- m[i] = (m[i] << mshift) | (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
- m[mlen - 1] = m[mlen - 1] << mshift;
- }
+ if (mshift)
+ shift_left(m, mlen, mshift);
plen = p[0];
/* Ensure plen > mlen */
plen = p[0];
/* Ensure plen > mlen */
/* Allocate n of size plen, copy p to n */
n = snewn(plen, BignumInt);
/* Allocate n of size plen, copy p to n */
n = snewn(plen, BignumInt);
- for (j = 0; j < plen; j++)
- n[j] = 0;
- for (j = 1; j <= (int)p[0]; j++)
- n[plen - j] = p[j];
+ for (i = 0; i < (int)p[0]; i++)
+ n[i] = p[i + 1];
+ for (; i < plen; i++)
+ n[i] = 0;
/* Main computation */
internal_mod(n, plen, m, mlen, quotient, mshift);
/* Fixup result in case the modulus was shifted */
if (mshift) {
/* Main computation */
internal_mod(n, plen, m, mlen, quotient, mshift);
/* Fixup result in case the modulus was shifted */
if (mshift) {
- for (i = plen - mlen - 1; i < plen - 1; i++)
- n[i] = (n[i] << mshift) | (n[i + 1] >> (BIGNUM_INT_BITS - mshift));
- n[plen - 1] = n[plen - 1] << mshift;
+ shift_left(n, mlen + 1, mshift);
internal_mod(n, plen, m, mlen, quotient, 0);
internal_mod(n, plen, m, mlen, quotient, 0);
- for (i = plen - 1; i >= plen - mlen; i--)
- n[i] = (n[i] >> mshift) | (n[i - 1] << (BIGNUM_INT_BITS - mshift));
+ shift_right(n, mlen, mshift);
}
/* Copy result to buffer */
if (result) {
}
/* Copy result to buffer */
if (result) {
- for (i = 1; i <= (int)result[0]; i++) {
- int j = plen - i;
- result[i] = j >= 0 ? n[j] : 0;
- }
+ for (i = 0; i < (int)result[0]; i++)
+ result[i + 1] = i < plen ? n[i] : 0;
+ bn_restore_invariant(result);
}
/* Free temporary arrays */
}
/* Free temporary arrays */
wslen = mlen * 4 + mul_compute_scratch(mlen);
workspace = snewn(wslen, BignumInt);
for (i = 0; i < mlen; i++) {
wslen = mlen * 4 + mul_compute_scratch(mlen);
workspace = snewn(wslen, BignumInt);
for (i = 0; i < mlen; i++) {
- workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);
- workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);
+ workspace[0 * mlen + i] = i < (int)a[0] ? a[i + 1] : 0;
+ workspace[1 * mlen + i] = i < (int)b[0] ? b[i + 1] : 0;
}
internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
}
internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
rlen = addend[0] + 1;
ret = newbn(rlen);
maxspot = 0;
rlen = addend[0] + 1;
ret = newbn(rlen);
maxspot = 0;
- for (i = 1; i <= (int)ret[0]; i++) {
- ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);
- if (ret[i] != 0)
- maxspot = i;
+ for (i = 0; i < (int)ret[0]; i++) {
+ ret[i + 1] = (i < 2 * mlen ? workspace[2 * mlen + i] : 0);
+ if (ret[i + 1] != 0)
+ maxspot = i + 1;