git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Bignum routines for RSA and DH and stuff.
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include "misc.h"
	11	#include "bn-internal.h"
	12	#include "ssh.h"
	13
	14	BignumInt bnZero[1] = { 0 };
	15	BignumInt bnOne[2] = { 1, 1 };
	16
	17	/*
	18	* The Bignum format is an array of `BignumInt'. The first
	19	* element of the array counts the remaining elements. The
	20	* remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
	21	* significant digit first. (So it's trivial to extract the bit
	22	* with value 2^n for any n.)
	23	*
	24	* All Bignums in this module are positive. Negative numbers must
	25	* be dealt with outside it.
	26	*
	27	* INVARIANT: the most significant word of any Bignum must be
	28	* nonzero.
	29	*/
	30
	31	Bignum Zero = bnZero, One = bnOne;
	32
	33	static Bignum newbn(int length)
	34	{
	35	Bignum b = snewn(length + 1, BignumInt);
	36	if (!b)
	37	abort(); /* FIXME */
	38	memset(b, 0, (length + 1) * sizeof(*b));
	39	b[0] = length;
	40	return b;
	41	}
	42
	43	void bn_restore_invariant(Bignum b)
	44	{
	45	while (b[0] > 1 && b[b[0]] == 0)
	46	b[0]--;
	47	}
	48
	49	Bignum copybn(Bignum orig)
	50	{
	51	Bignum b = snewn(orig[0] + 1, BignumInt);
	52	if (!b)
	53	abort(); /* FIXME */
	54	memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
	55	return b;
	56	}
	57
	58	void freebn(Bignum b)
	59	{
	60	/*
	61	* Burn the evidence, just in case.
	62	*/
	63	smemclr(b, sizeof(b[0]) * (b[0] + 1));
	64	sfree(b);
	65	}
	66
	67	Bignum bn_power_2(int n)
	68	{
	69	Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
	70	bignum_set_bit(ret, n, 1);
	71	return ret;
	72	}
	73
	74	/*
	75	* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
	76	* little-endian arrays of 'len' BignumInts. Returns a BignumInt carried
	77	* off the top.
	78	*/
	79	static BignumInt internal_add(const BignumInt a, const BignumInt b,
	80	BignumInt *c, int len)
	81	{
	82	int i;
	83	BignumDblInt carry = 0;
	84
	85	for (i = 0; i < len; i++) {
	86	carry += (BignumDblInt)a[i] + b[i];
	87	c[i] = (BignumInt)carry;
	88	carry >>= BIGNUM_INT_BITS;
	89	}
	90
	91	return (BignumInt)carry;
	92	}
	93
	94	/*
	95	* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
	96	* all little-endian arrays of 'len' BignumInts. Any borrow from the top
	97	* is ignored.
	98	*/
	99	static void internal_sub(const BignumInt a, const BignumInt b,
	100	BignumInt *c, int len)
	101	{
	102	int i;
	103	BignumDblInt carry = 1;
	104
	105	for (i = 0; i < len; i++) {
	106	carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
	107	c[i] = (BignumInt)carry;
	108	carry >>= BIGNUM_INT_BITS;
	109	}
	110	}
	111
	112	/*
	113	* Compute c = a * b.
	114	* Input is in the first len words of a and b.
	115	* Result is returned in the first 2*len words of c.
	116	*
	117	* 'scratch' must point to an array of BignumInt of size at least
	118	* mul_compute_scratch(len). (This covers the needs of internal_mul
	119	* and all its recursive calls to itself.)
	120	*/
	121	#define KARATSUBA_THRESHOLD 50
	122	static int mul_compute_scratch(int len)
	123	{
	124	int ret = 0;
	125	while (len > KARATSUBA_THRESHOLD) {
	126	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	127	int midlen = botlen + 1;
	128	ret += 4*midlen;
	129	len = midlen;
	130	}
	131	return ret;
	132	}
	133	static void internal_mul(const BignumInt a, const BignumInt b,
	134	BignumInt c, int len, BignumInt scratch)
	135	{
	136	if (len > KARATSUBA_THRESHOLD) {
	137	int i;
	138
	139	/*
	140	* Karatsuba divide-and-conquer algorithm. Cut each input in
	141	* half, so that it's expressed as two big 'digits' in a giant
	142	* base D:
	143	*
	144	* a = a_1 D + a_0
	145	* b = b_1 D + b_0
	146	*
	147	* Then the product is of course
	148	*
	149	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	150	*
	151	* and we compute the three coefficients by recursively
	152	* calling ourself to do half-length multiplications.
	153	*
	154	* The clever bit that makes this worth doing is that we only
	155	* need _one_ half-length multiplication for the central
	156	* coefficient rather than the two that it obviouly looks
	157	* like, because we can use a single multiplication to compute
	158	*
	159	* (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
	160	*
	161	* and then we subtract the other two coefficients (a_1 b_1
	162	* and a_0 b_0) which we were computing anyway.
	163	*
	164	* Hence we get to multiply two numbers of length N in about
	165	* three times as much work as it takes to multiply numbers of
	166	* length N/2, which is obviously better than the four times
	167	* as much work it would take if we just did a long
	168	* conventional multiply.
	169	*/
	170
	171	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	172	int midlen = botlen + 1;
	173	BignumDblInt carry;
	174
	175	/*
	176	* The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
	177	* in the output array, so we can compute them immediately in
	178	* place.
	179	*/
	180
	181	#ifdef KARA_DEBUG
	182	printf("a1,a0 = 0x");
	183	for (i = 0; i < len; i++) {
	184	if (i == toplen) printf(", 0x");
	185	printf("%0*x", BIGNUM_INT_BITS/4, a[len - 1 - i]);
	186	}
	187	printf("\n");
	188	printf("b1,b0 = 0x");
	189	for (i = 0; i < len; i++) {
	190	if (i == toplen) printf(", 0x");
	191	printf("%0*x", BIGNUM_INT_BITS/4, b[len - 1 - i]);
	192	}
	193	printf("\n");
	194	#endif
	195
	196	/* a_1 b_1 */
	197	internal_mul(a + botlen, b + botlen, c + 2*botlen, toplen, scratch);
	198	#ifdef KARA_DEBUG
	199	printf("a1b1 = 0x");
	200	for (i = 0; i < 2*toplen; i++) {
	201	printf("%0x", BIGNUM_INT_BITS/4, c[2len - 1 - i]);
	202	}
	203	printf("\n");
	204	#endif
	205
	206	/* a_0 b_0 */
	207	internal_mul(a, b, c, botlen, scratch);
	208	#ifdef KARA_DEBUG
	209	printf("a0b0 = 0x");
	210	for (i = 0; i < 2*botlen; i++) {
	211	printf("%0x", BIGNUM_INT_BITS/4, c[2botlen - 1 - i]);
	212	}
	213	printf("\n");
	214	#endif
	215
	216	/* Zero padding. botlen exceeds toplen by at most 1, and we'll set
	217	* the extra carry explicitly below, so we only need to zero at most
	218	* one of the top words here.
	219	*/
	220	scratch[midlen - 2] = scratch[2*midlen - 2] = 0;
	221
	222	for (i = 0; i < toplen; i++) {
	223	scratch[i] = a[i + botlen]; /* a_1 */
	224	scratch[midlen + i] = b[i + botlen]; /* b_1 */
	225	}
	226
	227	/* compute a_1 + a_0 */
	228	scratch[midlen - 1] = internal_add(scratch, a, scratch, botlen);
	229	#ifdef KARA_DEBUG
	230	printf("a1plusa0 = 0x");
	231	for (i = 0; i < midlen; i++) {
	232	printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen - 1 - i]);
	233	}
	234	printf("\n");
	235	#endif
	236	/* compute b_1 + b_0 */
	237	scratch[2*midlen - 1] = internal_add(scratch+midlen, b,
	238	scratch+midlen, botlen);
	239	#ifdef KARA_DEBUG
	240	printf("b1plusb0 = 0x");
	241	for (i = 0; i < midlen; i++) {
	242	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen - 1 - i]);
	243	}
	244	printf("\n");
	245	#endif
	246
	247	/*
	248	* Now we can do the third multiplication.
	249	*/
	250	internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
	251	scratch + 4*midlen);
	252	#ifdef KARA_DEBUG
	253	printf("a1plusa0timesb1plusb0 = 0x");
	254	for (i = 0; i < 2*midlen; i++) {
	255	printf("%0x", BIGNUM_INT_BITS/4, scratch[4midlen - 1 - i]);
	256	}
	257	printf("\n");
	258	#endif
	259
	260	/*
	261	* Now we can reuse the first half of 'scratch' to compute the
	262	* sum of the outer two coefficients, to subtract from that
	263	* product to obtain the middle one.
	264	*/
	265	scratch[2botlen - 2] = scratch[2botlen - 1] = 0;
	266	for (i = 0; i < 2*toplen; i++)
	267	scratch[i] = c[2*botlen + i];
	268	scratch[2botlen] = internal_add(scratch, c, scratch, 2botlen);
	269	scratch[2*botlen + 1] = 0;
	270	#ifdef KARA_DEBUG
	271	printf("a1b1plusa0b0 = 0x");
	272	for (i = 0; i < 2*midlen; i++) {
	273	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen - 1 - i]);
	274	}
	275	printf("\n");
	276	#endif
	277
	278	internal_sub(scratch + 2midlen, scratch, scratch, 2midlen);
	279	#ifdef KARA_DEBUG
	280	printf("a1b0plusa0b1 = 0x");
	281	for (i = 0; i < 2*midlen; i++) {
	282	printf("%0x", BIGNUM_INT_BITS/4, scratch[4midlen - 1 - i]);
	283	}
	284	printf("\n");
	285	#endif
	286
	287	/*
	288	* And now all we need to do is to add that middle coefficient
	289	* back into the output. We may have to propagate a carry
	290	* further up the output, but we can be sure it won't
	291	* propagate right the way off the top.
	292	*/
	293	carry = internal_add(c + botlen, scratch, c + botlen, 2*midlen);
	294	i = botlen + 2*midlen;
	295	while (carry) {
	296	assert(i <= 2*len);
	297	carry += c[i];
	298	c[i] = (BignumInt)carry;
	299	carry >>= BIGNUM_INT_BITS;
	300	i++;
	301	}
	302	#ifdef KARA_DEBUG
	303	printf("ab = 0x");
	304	for (i = 0; i < 2*len; i++) {
	305	printf("%0x", BIGNUM_INT_BITS/4, c[2len - i]);
	306	}
	307	printf("\n");
	308	#endif
	309
	310	} else {
	311	int i;
	312	BignumInt carry;
	313	BignumDblInt t;
	314	const BignumInt ap, alim = a + len, bp, blim = b + len;
	315	BignumInt cp, cps;
	316
	317	/*
	318	* Multiply in the ordinary O(N^2) way.
	319	*/
	320
	321	for (i = 0; i < 2 * len; i++)
	322	c[i] = 0;
	323
	324	for (cps = c, ap = a; ap < alim; ap++, cps++) {
	325	carry = 0;
	326	for (cp = cps, bp = b, i = blim - bp; i--; bp++, cp++) {
	327	t = (MUL_WORD(ap, bp) + carry) + *cp;
	328	*cp = (BignumInt) t;
	329	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
	330	}
	331	*cp = carry;
	332	}
	333	}
	334	}
	335
	336	/*
	337	* Variant form of internal_mul used for the initial step of
	338	* Montgomery reduction. Only bothers outputting 'len' words
	339	* (everything above that is thrown away).
	340	*/
	341	static void internal_mul_low(const BignumInt a, const BignumInt b,
	342	BignumInt c, int len, BignumInt scratch)
	343	{
	344	if (len > KARATSUBA_THRESHOLD) {
	345	int i;
	346
	347	/*
	348	* Karatsuba-aware version of internal_mul_low. As before, we
	349	* express each input value as a shifted combination of two
	350	* halves:
	351	*
	352	* a = a_1 D + a_0
	353	* b = b_1 D + b_0
	354	*
	355	* Then the full product is, as before,
	356	*
	357	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	358	*
	359	* Provided we choose D on the large side (so that a_0 and b_0
	360	* are _at least_ as long as a_1 and b_1), we don't need the
	361	* topmost term at all, and we only need half of the middle
	362	* term. So there's no point in doing the proper Karatsuba
	363	* optimisation which computes the middle term using the top
	364	* one, because we'd take as long computing the top one as
	365	* just computing the middle one directly.
	366	*
	367	* So instead, we do a much more obvious thing: we call the
	368	* fully optimised internal_mul to compute a_0 b_0, and we
	369	* recursively call ourself to compute the _bottom halves_ of
	370	* a_1 b_0 and a_0 b_1, each of which we add into the result
	371	* in the obvious way.
	372	*
	373	* In other words, there's no actual Karatsuba _optimisation_
	374	* in this function; the only benefit in doing it this way is
	375	* that we call internal_mul proper for a large part of the
	376	* work, and _that_ can optimise its operation.
	377	*/
	378
	379	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	380
	381	/*
	382	* Scratch space for the various bits and pieces we're going
	383	* to be adding together: we need botlen*2 words for a_0 b_0
	384	* (though we may end up throwing away its topmost word), and
	385	* toplen words for each of a_1 b_0 and a_0 b_1. That adds up
	386	* to exactly 2*len.
	387	*/
	388
	389	/* a_0 b_0 */
	390	internal_mul(a, b, scratch + 2toplen, botlen, scratch + 2len);
	391
	392	/* a_1 b_0 */
	393	internal_mul_low(a + botlen, b, scratch + toplen, toplen,
	394	scratch + 2*len);
	395
	396	/* a_0 b_1 */
	397	internal_mul_low(a, b + botlen, scratch, toplen, scratch + 2*len);
	398
	399	/* Copy the bottom half of the big coefficient into place */
	400	for (i = 0; i < botlen; i++)
	401	c[i] = scratch[2*toplen + i];
	402
	403	/* Add the two small coefficients, throwing away the returned carry */
	404	internal_add(scratch, scratch + toplen, scratch, toplen);
	405
	406	/* And add that to the large coefficient, leaving the result in c. */
	407	internal_add(scratch, scratch + 2*toplen + botlen,
	408	c + botlen, toplen);
	409
	410	} else {
	411	int i;
	412	BignumInt carry;
	413	BignumDblInt t;
	414	const BignumInt ap, alim = a + len, *bp;
	415	BignumInt cp, cps, *clim = c + len;
	416
	417	/*
	418	* Multiply in the ordinary O(N^2) way.
	419	*/
	420
	421	for (i = 0; i < len; i++)
	422	c[i] = 0;
	423
	424	for (cps = c, ap = a; ap < alim; ap++, cps++) {
	425	carry = 0;
	426	for (cp = cps, bp = b, i = clim - cp; i--; bp++, cp++) {
	427	t = (MUL_WORD(ap, bp) + carry) + *cp;
	428	*cp = (BignumInt) t;
	429	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
	430	}
	431	}
	432	}
	433	}
	434
	435	/*
	436	* Montgomery reduction. Expects x to be a little-endian array of 2*len
	437	* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
	438	* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
	439	* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
	440	* x' < n.
	441	*
	442	* 'n' and 'mninv' should be little-endian arrays of 'len' BignumInts
	443	* each, containing respectively n and the multiplicative inverse of
	444	* -n mod r.
	445	*
	446	* 'tmp' is an array of BignumInt used as scratch space, of length at
	447	* least 3*len + mul_compute_scratch(len).
	448	*/
	449	static void monty_reduce(BignumInt x, const BignumInt n,
	450	const BignumInt mninv, BignumInt tmp, int len)
	451	{
	452	int i;
	453	BignumInt carry;
	454
	455	/*
	456	* Multiply x by (-n)^{-1} mod r. This gives us a value m such
	457	* that mn is congruent to -x mod r. Hence, mn+x is an exact
	458	* multiple of r, and is also (obviously) congruent to x mod n.
	459	*/
	460	internal_mul_low(x, mninv, tmp, len, tmp + 3*len);
	461
	462	/*
	463	* Compute t = (mn+x)/r in ordinary, non-modular, integer
	464	* arithmetic. By construction this is exact, and is congruent mod
	465	* n to x * r^{-1}, i.e. the answer we want.
	466	*
	467	* The following multiply leaves that answer in the _most_
	468	* significant half of the 'x' array, so then we must shift it
	469	* down.
	470	*/
	471	internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
	472	carry = internal_add(x, tmp+len, x, 2*len);
	473	for (i = 0; i < len; i++)
	474	x[i] = x[len + i], x[len + i] = 0;
	475
	476	/*
	477	* Reduce t mod n. This doesn't require a full-on division by n,
	478	* but merely a test and single optional subtraction, since we can
	479	* show that 0 <= t < 2n.
	480	*
	481	* Proof:
	482	* + we computed m mod r, so 0 <= m < r.
	483	* + so 0 <= mn < rn, obviously
	484	* + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
	485	* + yielding 0 <= (mn+x)/r < 2n as required.
	486	*/
	487	if (!carry) {
	488	for (i = len; i-- > 0; )
	489	if (x[i] != n[i])
	490	break;
	491	}
	492	if (carry \|\| i < 0 \|\| x[i] > n[i])
	493	internal_sub(x, n, x, len);
	494	}
	495
	496	static void internal_add_shifted(BignumInt *number,
	497	unsigned n, int shift)
	498	{
	499	int word = 1 + (shift / BIGNUM_INT_BITS);
	500	int bshift = shift % BIGNUM_INT_BITS;
	501	BignumDblInt addend;
	502
	503	addend = (BignumDblInt)n << bshift;
	504
	505	while (addend) {
	506	addend += number[word];
	507	number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
	508	addend >>= BIGNUM_INT_BITS;
	509	word++;
	510	}
	511	}
	512
	513	/*
	514	* Compute a = a % m.
	515	* Input in first alen words of a and first mlen words of m.
	516	* Output in first alen words of a
	517	* (of which last alen-mlen words will be zero).
	518	* The MSW of m MUST have its high bit set.
	519	* Quotient is accumulated in the `quotient' array. Quotient parts
	520	* are shifted left by `qshift' before adding into quot.
	521	*/
	522	static void internal_mod(BignumInt *a, int alen,
	523	BignumInt *m, int mlen,
	524	BignumInt *quot, int qshift)
	525	{
	526	BignumInt m0, m1;
	527	unsigned int h;
	528	int i, j, k;
	529
	530	m0 = m[mlen - 1];
	531	if (mlen > 1)
	532	m1 = m[mlen - 2];
	533	else
	534	m1 = 0;
	535
	536	for (i = alen, h = 0; i-- >= mlen; ) {
	537	BignumDblInt t;
	538	unsigned int q, r, c, ai1;
	539
	540	if (i)
	541	ai1 = a[i - 1];
	542	else
	543	ai1 = 0;
	544
	545	/* Find q = h:a[i] / m0 */
	546	if (h >= m0) {
	547	/*
	548	* Special case.
	549	*
	550	* To illustrate it, suppose a BignumInt is 8 bits, and
	551	* we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
	552	* our initial division will be 0xA123 / 0xA1, which
	553	* will give a quotient of 0x100 and a divide overflow.
	554	* However, the invariants in this division algorithm
	555	* are not violated, since the full number A1:23:... is
	556	* _less_ than the quotient prefix A1:B2:... and so the
	557	* following correction loop would have sorted it out.
	558	*
	559	* In this situation we set q to be the largest
	560	* quotient we _can_ stomach (0xFF, of course).
	561	*/
	562	q = BIGNUM_INT_MASK;
	563	} else {
	564	/* Macro doesn't want an array subscript expression passed
	565	* into it (see definition), so use a temporary. */
	566	BignumInt tmplo = a[i];
	567	DIVMOD_WORD(q, r, h, tmplo, m0);
	568
	569	/* Refine our estimate of q by looking at
	570	h:a[i]:a[i-1] / m0:m1 */
	571	t = MUL_WORD(m1, q);
	572	if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
	573	q--;
	574	t -= m1;
	575	r = (r + m0) & BIGNUM_INT_MASK; /* overflow? */
	576	if (r >= (BignumDblInt) m0 &&
	577	t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
	578	}
	579	}
	580
	581	j = i + 1 - mlen;
	582
	583	/* Subtract q * m from a[i...] */
	584	c = 0;
	585	for (k = 0; k < mlen; k++) {
	586	t = MUL_WORD(q, m[k]);
	587	t += c;
	588	c = (unsigned)(t >> BIGNUM_INT_BITS);
	589	if ((BignumInt) t > a[j + k])
	590	c++;
	591	a[j + k] -= (BignumInt) t;
	592	}
	593
	594	/* Add back m in case of borrow */
	595	if (c != h) {
	596	t = 0;
	597	for (k = 0; k < mlen; k++) {
	598	t += m[k];
	599	t += a[j + k];
	600	a[j + k] = (BignumInt) t;
	601	t = t >> BIGNUM_INT_BITS;
	602	}
	603	q--;
	604	}
	605
	606	if (quot)
	607	internal_add_shifted(quot, q,
	608	qshift + BIGNUM_INT_BITS * (i + 1 - mlen));
	609
	610	if (i >= mlen) {
	611	h = a[i];
	612	a[i] = 0;
	613	}
	614	}
	615	}
	616
	617	static void shift_left(BignumInt *x, int xlen, int shift)
	618	{
	619	int i;
	620
	621	if (!shift)
	622	return;
	623	for (i = xlen; --i > 0; )
	624	x[i] = (x[i] << shift) \| (x[i - 1] >> (BIGNUM_INT_BITS - shift));
	625	x[0] = x[0] << shift;
	626	}
	627
	628	static void shift_right(BignumInt *x, int xlen, int shift)
	629	{
	630	int i;
	631
	632	if (!shift \|\| !xlen)
	633	return;
	634	xlen--;
	635	for (i = 0; i < xlen; i++)
	636	x[i] = (x[i] >> shift) \| (x[i + 1] << (BIGNUM_INT_BITS - shift));
	637	x[i] = x[i] >> shift;
	638	}
	639
	640	/*
	641	* Compute (base ^ exp) % mod, the pedestrian way.
	642	*/
	643	Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
	644	{
	645	BignumInt a, b, n, m, *scratch;
	646	int mshift;
	647	int mlen, scratchlen, i, j;
	648	Bignum base, result;
	649
	650	/*
	651	* The most significant word of mod needs to be non-zero. It
	652	* should already be, but let's make sure.
	653	*/
	654	assert(mod[mod[0]] != 0);
	655
	656	/*
	657	* Make sure the base is smaller than the modulus, by reducing
	658	* it modulo the modulus if not.
	659	*/
	660	base = bigmod(base_in, mod);
	661
	662	/* Allocate m of size mlen, copy mod to m */
	663	mlen = mod[0];
	664	m = snewn(mlen, BignumInt);
	665	for (j = 0; j < mlen; j++)
	666	m[j] = mod[j + 1];
	667
	668	/* Shift m left to make msb bit set */
	669	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	670	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	671	break;
	672	if (mshift)
	673	shift_left(m, mlen, mshift);
	674
	675	/* Allocate n of size mlen, copy base to n */
	676	n = snewn(mlen, BignumInt);
	677	for (i = 0; i < (int)base[0]; i++)
	678	n[i] = base[i + 1];
	679	for (; i < mlen; i++)
	680	n[i] = 0;
	681
	682	/* Allocate a and b of size 2mlen. Set a = 1 /
	683	a = snewn(2 * mlen, BignumInt);
	684	b = snewn(2 * mlen, BignumInt);
	685	a[0] = 1;
	686	for (i = 1; i < 2 * mlen; i++)
	687	a[i] = 0;
	688
	689	/* Scratch space for multiplies */
	690	scratchlen = mul_compute_scratch(mlen);
	691	scratch = snewn(scratchlen, BignumInt);
	692
	693	/* Skip leading zero bits of exp. */
	694	i = 0;
	695	j = BIGNUM_INT_BITS-1;
	696	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	697	j--;
	698	if (j < 0) {
	699	i++;
	700	j = BIGNUM_INT_BITS-1;
	701	}
	702	}
	703
	704	/* Main computation */
	705	while (i < (int)exp[0]) {
	706	while (j >= 0) {
	707	internal_mul(a, a, b, mlen, scratch);
	708	internal_mod(b, mlen * 2, m, mlen, NULL, 0);
	709	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	710	internal_mul(b, n, a, mlen, scratch);
	711	internal_mod(a, mlen * 2, m, mlen, NULL, 0);
	712	} else {
	713	BignumInt *t;
	714	t = a;
	715	a = b;
	716	b = t;
	717	}
	718	j--;
	719	}
	720	i++;
	721	j = BIGNUM_INT_BITS-1;
	722	}
	723
	724	/* Fixup result in case the modulus was shifted */
	725	if (mshift) {
	726	shift_left(a, mlen + 1, mshift);
	727	internal_mod(a, mlen + 1, m, mlen, NULL, 0);
	728	shift_right(a, mlen, mshift);
	729	}
	730
	731	/* Copy result to buffer */
	732	result = newbn(mod[0]);
	733	for (i = 0; i < mlen; i++)
	734	result[i + 1] = a[i];
	735	while (result[0] > 1 && result[result[0]] == 0)
	736	result[0]--;
	737
	738	/* Free temporary arrays */
	739	for (i = 0; i < 2 * mlen; i++)
	740	a[i] = 0;
	741	sfree(a);
	742	for (i = 0; i < scratchlen; i++)
	743	scratch[i] = 0;
	744	sfree(scratch);
	745	for (i = 0; i < 2 * mlen; i++)
	746	b[i] = 0;
	747	sfree(b);
	748	for (i = 0; i < mlen; i++)
	749	m[i] = 0;
	750	sfree(m);
	751	for (i = 0; i < mlen; i++)
	752	n[i] = 0;
	753	sfree(n);
	754
	755	freebn(base);
	756
	757	return result;
	758	}
	759
	760	/*
	761	* Compute (base ^ exp) % mod. Uses the Montgomery multiplication
	762	* technique where possible, falling back to modpow_simple otherwise.
	763	*/
	764	Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
	765	{
	766	BignumInt a, b, x, n, mninv, scratch;
	767	int len, scratchlen, i, j;
	768	Bignum base, base2, r, rn, inv, result;
	769
	770	/*
	771	* The most significant word of mod needs to be non-zero. It
	772	* should already be, but let's make sure.
	773	*/
	774	assert(mod[mod[0]] != 0);
	775
	776	/*
	777	* mod had better be odd, or we can't do Montgomery multiplication
	778	* using a power of two at all.
	779	*/
	780	if (!(mod[1] & 1))
	781	return modpow_simple(base_in, exp, mod);
	782
	783	/*
	784	* Make sure the base is smaller than the modulus, by reducing
	785	* it modulo the modulus if not.
	786	*/
	787	base = bigmod(base_in, mod);
	788
	789	/*
	790	* Compute the inverse of n mod r, for monty_reduce. (In fact we
	791	* want the inverse of _minus_ n mod r, but we'll sort that out
	792	* below.)
	793	*/
	794	len = mod[0];
	795	r = bn_power_2(BIGNUM_INT_BITS * len);
	796	inv = modinv(mod, r);
	797
	798	/*
	799	* Multiply the base by r mod n, to get it into Montgomery
	800	* representation.
	801	*/
	802	base2 = modmul(base, r, mod);
	803	freebn(base);
	804	base = base2;
	805
	806	rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */
	807
	808	freebn(r); /* won't need this any more */
	809
	810	/*
	811	* Set up internal arrays of the right lengths containing the base,
	812	* the modulus, and the modulus's inverse.
	813	*/
	814	n = snewn(len, BignumInt);
	815	for (j = 0; j < len; j++)
	816	n[j] = mod[j + 1];
	817
	818	mninv = snewn(len, BignumInt);
	819	for (j = 0; j < len; j++)
	820	mninv[j] = (j < (int)inv[0] ? inv[j + 1] : 0);
	821	freebn(inv); /* we don't need this copy of it any more */
	822	/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
	823	x = snewn(len, BignumInt);
	824	for (j = 0; j < len; j++)
	825	x[j] = 0;
	826	internal_sub(x, mninv, mninv, len);
	827
	828	/* x = snewn(len, BignumInt); / / already done above */
	829	for (j = 0; j < len; j++)
	830	x[j] = (j < (int)base[0] ? base[j + 1] : 0);
	831	freebn(base); /* we don't need this copy of it any more */
	832
	833	a = snewn(2*len, BignumInt);
	834	b = snewn(2*len, BignumInt);
	835	for (j = 0; j < len; j++)
	836	a[j] = (j < (int)rn[0] ? rn[j + 1] : 0);
	837	freebn(rn);
	838
	839	/* Scratch space for multiplies */
	840	scratchlen = 3*len + mul_compute_scratch(len);
	841	scratch = snewn(scratchlen, BignumInt);
	842
	843	/* Skip leading zero bits of exp. */
	844	i = 0;
	845	j = BIGNUM_INT_BITS-1;
	846	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	847	j--;
	848	if (j < 0) {
	849	i++;
	850	j = BIGNUM_INT_BITS-1;
	851	}
	852	}
	853
	854	/* Main computation */
	855	while (i < (int)exp[0]) {
	856	while (j >= 0) {
	857	internal_mul(a, a, b, len, scratch);
	858	monty_reduce(b, n, mninv, scratch, len);
	859	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	860	internal_mul(b, x, a, len, scratch);
	861	monty_reduce(a, n, mninv, scratch, len);
	862	} else {
	863	BignumInt *t;
	864	t = a;
	865	a = b;
	866	b = t;
	867	}
	868	j--;
	869	}
	870	i++;
	871	j = BIGNUM_INT_BITS-1;
	872	}
	873
	874	/*
	875	* Final monty_reduce to get back from the adjusted Montgomery
	876	* representation.
	877	*/
	878	monty_reduce(a, n, mninv, scratch, len);
	879
	880	/* Copy result to buffer */
	881	result = newbn(mod[0]);
	882	for (i = 0; i < len; i++)
	883	result[i + 1] = a[i];
	884	while (result[0] > 1 && result[result[0]] == 0)
	885	result[0]--;
	886
	887	/* Free temporary arrays */
	888	for (i = 0; i < scratchlen; i++)
	889	scratch[i] = 0;
	890	sfree(scratch);
	891	for (i = 0; i < 2 * len; i++)
	892	a[i] = 0;
	893	sfree(a);
	894	for (i = 0; i < 2 * len; i++)
	895	b[i] = 0;
	896	sfree(b);
	897	for (i = 0; i < len; i++)
	898	mninv[i] = 0;
	899	sfree(mninv);
	900	for (i = 0; i < len; i++)
	901	n[i] = 0;
	902	sfree(n);
	903	for (i = 0; i < len; i++)
	904	x[i] = 0;
	905	sfree(x);
	906
	907	return result;
	908	}
	909
	910	/*
	911	* Compute (p * q) % mod.
	912	* The most significant word of mod MUST be non-zero.
	913	* We assume that the result array is the same size as the mod array.
	914	*/
	915	Bignum modmul(Bignum p, Bignum q, Bignum mod)
	916	{
	917	BignumInt a, n, m, o, *scratch;
	918	int mshift, scratchlen;
	919	int pqlen, mlen, rlen, i, j;
	920	Bignum result;
	921
	922	/* Allocate m of size mlen, copy mod to m */
	923	mlen = mod[0];
	924	m = snewn(mlen, BignumInt);
	925	for (j = 0; j < mlen; j++)
	926	m[j] = mod[j + 1];
	927
	928	/* Shift m left to make msb bit set */
	929	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	930	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	931	break;
	932	if (mshift)
	933	shift_left(m, mlen, mshift);
	934
	935	pqlen = (p[0] > q[0] ? p[0] : q[0]);
	936
	937	/* Make sure that we're allowing enough space. The shifting below will
	938	* underflow the vectors we allocate if `pqlen' is too small.
	939	*/
	940	if (2*pqlen <= mlen)
	941	pqlen = mlen/2 + 1;
	942
	943	/* Allocate n of size pqlen, copy p to n */
	944	n = snewn(pqlen, BignumInt);
	945	for (i = 0; i < (int)p[0]; i++)
	946	n[i] = p[i + 1];
	947	for (; i < pqlen; i++)
	948	n[i] = 0;
	949
	950	/* Allocate o of size pqlen, copy q to o */
	951	o = snewn(pqlen, BignumInt);
	952	for (i = 0; i < (int)q[0]; i++)
	953	o[i] = q[i + 1];
	954	for (; i < pqlen; i++)
	955	o[i] = 0;
	956
	957	/* Allocate a of size 2pqlen for result /
	958	a = snewn(2 * pqlen, BignumInt);
	959
	960	/* Scratch space for multiplies */
	961	scratchlen = mul_compute_scratch(pqlen);
	962	scratch = snewn(scratchlen, BignumInt);
	963
	964	/* Main computation */
	965	internal_mul(n, o, a, pqlen, scratch);
	966	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	967
	968	/* Fixup result in case the modulus was shifted */
	969	if (mshift) {
	970	shift_left(a, mlen + 1, mshift);
	971	internal_mod(a, mlen + 1, m, mlen, NULL, 0);
	972	shift_right(a, mlen, mshift);
	973	}
	974
	975	/* Copy result to buffer */
	976	rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
	977	result = newbn(rlen);
	978	for (i = 0; i < rlen; i++)
	979	result[i + 1] = a[i];
	980	while (result[0] > 1 && result[result[0]] == 0)
	981	result[0]--;
	982
	983	/* Free temporary arrays */
	984	for (i = 0; i < scratchlen; i++)
	985	scratch[i] = 0;
	986	sfree(scratch);
	987	for (i = 0; i < 2 * pqlen; i++)
	988	a[i] = 0;
	989	sfree(a);
	990	for (i = 0; i < mlen; i++)
	991	m[i] = 0;
	992	sfree(m);
	993	for (i = 0; i < pqlen; i++)
	994	n[i] = 0;
	995	sfree(n);
	996	for (i = 0; i < pqlen; i++)
	997	o[i] = 0;
	998	sfree(o);
	999
	1000	return result;
	1001	}
	1002
	1003	/*
	1004	* Compute p % mod.
	1005	* The most significant word of mod MUST be non-zero.
	1006	* We assume that the result array is the same size as the mod array.
	1007	* We optionally write out a quotient if `quotient' is non-NULL.
	1008	* We can avoid writing out the result if `result' is NULL.
	1009	*/
	1010	static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
	1011	{
	1012	BignumInt n, m;
	1013	int mshift;
	1014	int plen, mlen, i, j;
	1015
	1016	/* Allocate m of size mlen, copy mod to m */
	1017	mlen = mod[0];
	1018	m = snewn(mlen, BignumInt);
	1019	for (j = 0; j < mlen; j++)
	1020	m[j] = mod[j + 1];
	1021
	1022	/* Shift m left to make msb bit set */
	1023	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	1024	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	1025	break;
	1026	if (mshift)
	1027	shift_left(m, mlen, mshift);
	1028
	1029	plen = p[0];
	1030	/* Ensure plen > mlen */
	1031	if (plen <= mlen)
	1032	plen = mlen + 1;
	1033
	1034	/* Allocate n of size plen, copy p to n */
	1035	n = snewn(plen, BignumInt);
	1036	for (i = 0; i < (int)p[0]; i++)
	1037	n[i] = p[i + 1];
	1038	for (; i < plen; i++)
	1039	n[i] = 0;
	1040
	1041	/* Main computation */
	1042	internal_mod(n, plen, m, mlen, quotient, mshift);
	1043
	1044	/* Fixup result in case the modulus was shifted */
	1045	if (mshift) {
	1046	shift_left(n, mlen + 1, mshift);
	1047	internal_mod(n, plen, m, mlen, quotient, 0);
	1048	shift_right(n, mlen, mshift);
	1049	}
	1050
	1051	/* Copy result to buffer */
	1052	if (result) {
	1053	for (i = 0; i < (int)result[0]; i++)
	1054	result[i + 1] = i < plen ? n[i] : 0;
	1055	bn_restore_invariant(result);
	1056	}
	1057
	1058	/* Free temporary arrays */
	1059	for (i = 0; i < mlen; i++)
	1060	m[i] = 0;
	1061	sfree(m);
	1062	for (i = 0; i < plen; i++)
	1063	n[i] = 0;
	1064	sfree(n);
	1065	}
	1066
	1067	/*
	1068	* Decrement a number.
	1069	*/
	1070	void decbn(Bignum bn)
	1071	{
	1072	int i = 1;
	1073	while (i < (int)bn[0] && bn[i] == 0)
	1074	bn[i++] = BIGNUM_INT_MASK;
	1075	bn[i]--;
	1076	}
	1077
	1078	Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
	1079	{
	1080	Bignum result;
	1081	int w, i;
	1082
	1083	w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
	1084
	1085	result = newbn(w);
	1086	for (i = 1; i <= w; i++)
	1087	result[i] = 0;
	1088	for (i = nbytes; i--;) {
	1089	unsigned char byte = *data++;
	1090	result[1 + i / BIGNUM_INT_BYTES] \|= byte << (8*i % BIGNUM_INT_BITS);
	1091	}
	1092
	1093	while (result[0] > 1 && result[result[0]] == 0)
	1094	result[0]--;
	1095	return result;
	1096	}
	1097
	1098	/*
	1099	* Read an SSH-1-format bignum from a data buffer. Return the number
	1100	* of bytes consumed, or -1 if there wasn't enough data.
	1101	*/
	1102	int ssh1_read_bignum(const unsigned char data, int len, Bignum result)
	1103	{
	1104	const unsigned char *p = data;
	1105	int i;
	1106	int w, b;
	1107
	1108	if (len < 2)
	1109	return -1;
	1110
	1111	w = 0;
	1112	for (i = 0; i < 2; i++)
	1113	w = (w << 8) + *p++;
	1114	b = (w + 7) / 8; /* bits -> bytes */
	1115
	1116	if (len < b+2)
	1117	return -1;
	1118
	1119	if (!result) /* just return length */
	1120	return b + 2;
	1121
	1122	*result = bignum_from_bytes(p, b);
	1123
	1124	return p + b - data;
	1125	}
	1126
	1127	/*
	1128	* Return the bit count of a bignum, for SSH-1 encoding.
	1129	*/
	1130	int bignum_bitcount(Bignum bn)
	1131	{
	1132	int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
	1133	while (bitcount >= 0
	1134	&& (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
	1135	return bitcount + 1;
	1136	}
	1137
	1138	/*
	1139	* Return the byte length of a bignum when SSH-1 encoded.
	1140	*/
	1141	int ssh1_bignum_length(Bignum bn)
	1142	{
	1143	return 2 + (bignum_bitcount(bn) + 7) / 8;
	1144	}
	1145
	1146	/*
	1147	* Return the byte length of a bignum when SSH-2 encoded.
	1148	*/
	1149	int ssh2_bignum_length(Bignum bn)
	1150	{
	1151	return 4 + (bignum_bitcount(bn) + 8) / 8;
	1152	}
	1153
	1154	/*
	1155	* Return a byte from a bignum; 0 is least significant, etc.
	1156	*/
	1157	int bignum_byte(Bignum bn, int i)
	1158	{
	1159	if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
	1160	return 0; /* beyond the end */
	1161	else
	1162	return (bn[i / BIGNUM_INT_BYTES + 1] >>
	1163	((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
	1164	}
	1165
	1166	/*
	1167	* Return a bit from a bignum; 0 is least significant, etc.
	1168	*/
	1169	int bignum_bit(Bignum bn, int i)
	1170	{
	1171	if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
	1172	return 0; /* beyond the end */
	1173	else
	1174	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
	1175	}
	1176
	1177	/*
	1178	* Set a bit in a bignum; 0 is least significant, etc.
	1179	*/
	1180	void bignum_set_bit(Bignum bn, int bitnum, int value)
	1181	{
	1182	if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
	1183	abort(); /* beyond the end */
	1184	else {
	1185	int v = bitnum / BIGNUM_INT_BITS + 1;
	1186	int mask = 1 << (bitnum % BIGNUM_INT_BITS);
	1187	if (value)
	1188	bn[v] \|= mask;
	1189	else
	1190	bn[v] &= ~mask;
	1191	}
	1192	}
	1193
	1194	/*
	1195	* Write a SSH-1-format bignum into a buffer. It is assumed the
	1196	* buffer is big enough. Returns the number of bytes used.
	1197	*/
	1198	int ssh1_write_bignum(void *data, Bignum bn)
	1199	{
	1200	unsigned char *p = data;
	1201	int len = ssh1_bignum_length(bn);
	1202	int i;
	1203	int bitc = bignum_bitcount(bn);
	1204
	1205	*p++ = (bitc >> 8) & 0xFF;
	1206	*p++ = (bitc) & 0xFF;
	1207	for (i = len - 2; i--;)
	1208	*p++ = bignum_byte(bn, i);
	1209	return len;
	1210	}
	1211
	1212	/*
	1213	* Compare two bignums. Returns like strcmp.
	1214	*/
	1215	int bignum_cmp(Bignum a, Bignum b)
	1216	{
	1217	int amax = a[0], bmax = b[0];
	1218	int i = (amax > bmax ? amax : bmax);
	1219	while (i) {
	1220	BignumInt aval = (i > amax ? 0 : a[i]);
	1221	BignumInt bval = (i > bmax ? 0 : b[i]);
	1222	if (aval < bval)
	1223	return -1;
	1224	if (aval > bval)
	1225	return +1;
	1226	i--;
	1227	}
	1228	return 0;
	1229	}
	1230
	1231	/*
	1232	* Right-shift one bignum to form another.
	1233	*/
	1234	Bignum bignum_rshift(Bignum a, int shift)
	1235	{
	1236	Bignum ret;
	1237	int i, shiftw, shiftb, shiftbb, bits;
	1238	BignumInt ai, ai1;
	1239
	1240	bits = bignum_bitcount(a) - shift;
	1241	ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
	1242
	1243	if (ret) {
	1244	shiftw = shift / BIGNUM_INT_BITS;
	1245	shiftb = shift % BIGNUM_INT_BITS;
	1246	shiftbb = BIGNUM_INT_BITS - shiftb;
	1247
	1248	ai1 = a[shiftw + 1];
	1249	for (i = 1; i <= (int)ret[0]; i++) {
	1250	ai = ai1;
	1251	ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
	1252	ret[i] = ((ai >> shiftb) \| (ai1 << shiftbb)) & BIGNUM_INT_MASK;
	1253	}
	1254	}
	1255
	1256	return ret;
	1257	}
	1258
	1259	/*
	1260	* Non-modular multiplication and addition.
	1261	*/
	1262	Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
	1263	{
	1264	int alen = a[0], blen = b[0];
	1265	int mlen = (alen > blen ? alen : blen);
	1266	int rlen, i, maxspot;
	1267	int wslen;
	1268	BignumInt *workspace;
	1269	Bignum ret;
	1270
	1271	/* mlen space for a, mlen space for b, 2*mlen for result,
	1272	* plus scratch space for multiplication */
	1273	wslen = mlen * 4 + mul_compute_scratch(mlen);
	1274	workspace = snewn(wslen, BignumInt);
	1275	for (i = 0; i < mlen; i++) {
	1276	workspace[0 * mlen + i] = i < (int)a[0] ? a[i + 1] : 0;
	1277	workspace[1 * mlen + i] = i < (int)b[0] ? b[i + 1] : 0;
	1278	}
	1279
	1280	internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
	1281	workspace + 2 * mlen, mlen, workspace + 4 * mlen);
	1282
	1283	/* now just copy the result back */
	1284	rlen = alen + blen + 1;
	1285	if (addend && rlen <= (int)addend[0])
	1286	rlen = addend[0] + 1;
	1287	ret = newbn(rlen);
	1288	maxspot = 0;
	1289	for (i = 0; i < (int)ret[0]; i++) {
	1290	ret[i + 1] = (i < 2 * mlen ? workspace[2 * mlen + i] : 0);
	1291	if (ret[i + 1] != 0)
	1292	maxspot = i + 1;
	1293	}
	1294	ret[0] = maxspot;
	1295
	1296	/* now add in the addend, if any */
	1297	if (addend) {
	1298	BignumDblInt carry = 0;
	1299	for (i = 1; i <= rlen; i++) {
	1300	carry += (i <= (int)ret[0] ? ret[i] : 0);
	1301	carry += (i <= (int)addend[0] ? addend[i] : 0);
	1302	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1303	carry >>= BIGNUM_INT_BITS;
	1304	if (ret[i] != 0 && i > maxspot)
	1305	maxspot = i;
	1306	}
	1307	}
	1308	ret[0] = maxspot;
	1309
	1310	for (i = 0; i < wslen; i++)
	1311	workspace[i] = 0;
	1312	sfree(workspace);
	1313	return ret;
	1314	}
	1315
	1316	/*
	1317	* Non-modular multiplication.
	1318	*/
	1319	Bignum bigmul(Bignum a, Bignum b)
	1320	{
	1321	return bigmuladd(a, b, NULL);
	1322	}
	1323
	1324	/*
	1325	* Simple addition.
	1326	*/
	1327	Bignum bigadd(Bignum a, Bignum b)
	1328	{
	1329	int alen = a[0], blen = b[0];
	1330	int rlen = (alen > blen ? alen : blen) + 1;
	1331	int i, maxspot;
	1332	Bignum ret;
	1333	BignumDblInt carry;
	1334
	1335	ret = newbn(rlen);
	1336
	1337	carry = 0;
	1338	maxspot = 0;
	1339	for (i = 1; i <= rlen; i++) {
	1340	carry += (i <= (int)a[0] ? a[i] : 0);
	1341	carry += (i <= (int)b[0] ? b[i] : 0);
	1342	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1343	carry >>= BIGNUM_INT_BITS;
	1344	if (ret[i] != 0 && i > maxspot)
	1345	maxspot = i;
	1346	}
	1347	ret[0] = maxspot;
	1348
	1349	return ret;
	1350	}
	1351
	1352	/*
	1353	* Subtraction. Returns a-b, or NULL if the result would come out
	1354	* negative (recall that this entire bignum module only handles
	1355	* positive numbers).
	1356	*/
	1357	Bignum bigsub(Bignum a, Bignum b)
	1358	{
	1359	int alen = a[0], blen = b[0];
	1360	int rlen = (alen > blen ? alen : blen);
	1361	int i, maxspot;
	1362	Bignum ret;
	1363	BignumDblInt carry;
	1364
	1365	ret = newbn(rlen);
	1366
	1367	carry = 1;
	1368	maxspot = 0;
	1369	for (i = 1; i <= rlen; i++) {
	1370	carry += (i <= (int)a[0] ? a[i] : 0);
	1371	carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
	1372	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1373	carry >>= BIGNUM_INT_BITS;
	1374	if (ret[i] != 0 && i > maxspot)
	1375	maxspot = i;
	1376	}
	1377	ret[0] = maxspot;
	1378
	1379	if (!carry) {
	1380	freebn(ret);
	1381	return NULL;
	1382	}
	1383
	1384	return ret;
	1385	}
	1386
	1387	/*
	1388	* Return a bignum which is the result of shifting another left by N bits.
	1389	* If N is negative then you get a right shift instead.
	1390	*/
	1391	Bignum biglsl(Bignum x, int n)
	1392	{
	1393	Bignum d;
	1394	unsigned o, i;
	1395
	1396	/* Eliminate some simple special cases. */
	1397	if (!n \|\| !x[0]) return copybn(x);
	1398	else if (n < 0) return biglsr(x, -n);
	1399
	1400	/* Some initial setup. */
	1401	o = n/BIGNUM_INT_BITS;
	1402	n %= BIGNUM_INT_BITS;
	1403	d = newbn(x[0] + o + !!n);
	1404
	1405	/* Clear the low-significant words of d. */
	1406	for (i = 1; i <= o; i++) d[i] = 0;
	1407
	1408	if (!n) {
	1409	/* Easy case: we're shifting by a multiple of the word size, so we
	1410	* can just copy whole words.
	1411	*/
	1412	for (i = 1; i <= x[0]; i++) d[o + i] = x[i];
	1413	} else {
	1414	/* Hard case: destination words can be a combination of two source
	1415	* words.
	1416	*/
	1417
	1418	/* Take the low bits from the least significant source word. */
	1419	d[o + 1] = x[1] << n;
	1420
	1421	/* The intermediate words really are a combination of two source
	1422	* words.
	1423	*/
	1424	for (i = 2; i <= x[0]; i++)
	1425	d[o + i] = (x[i] << n) \| (x[i - 1] >> (BIGNUM_INT_BITS - n));
	1426
	1427	/* Finally, the high bits of the most significant input word. */
	1428	d[o + i + 1] = x[i] >> (BIGNUM_INT_BITS - n);
	1429	}
	1430
	1431	/* The destination length is a conservative estimate, so we'll need to
	1432	* sort that out.
	1433	*/
	1434	bn_restore_invariant(d);
	1435
	1436	/* We're done. */
	1437	return d;
	1438	}
	1439
	1440	/*
	1441	* Return a bignum which is the result of shifting another right by N bits
	1442	* (discarding the least significant N bits, and shifting zeroes in at the
	1443	* most significant end). If N is negative then you get a left shift
	1444	* instead.
	1445	*/
	1446	Bignum biglsr(Bignum x, int n)
	1447	{
	1448	Bignum d;
	1449	unsigned o, i;
	1450
	1451	/* Eliminate some simple special cases. */
	1452	if (!n \|\| !x[0]) return copybn(x);
	1453	else if (n < 0) return biglsl(x, -n);
	1454
	1455	/* Some initial setup. */
	1456	o = n/BIGNUM_INT_BITS;
	1457	n %= BIGNUM_INT_BITS;
	1458	d = newbn(x[0] - o);
	1459
	1460	if (!n) {
	1461	/* Simple case: we're shifting by a multiple of the word size, so we
	1462	* can just copy whole words across.
	1463	*/
	1464	for (i = o + 1; i <= x[0]; i++) d[i - o] = x[i];
	1465	} else {
	1466	/* Hard case: some destination words will be a combination of two
	1467	* source words. We get to discard some of the input words.
	1468	*/
	1469
	1470	/* The intermediate words are combinations of two input words. */
	1471	for (i = o + 1; i < x[0]; i++)
	1472	d[i - o] = (x[i] >> n) \| (x[i + 1] << (BIGNUM_INT_BITS - n));
	1473
	1474	/* And finally the high-significance bits of the top source word. */
	1475	d[i - o + 1] = x[i] << (BIGNUM_INT_BITS - n);
	1476	}
	1477
	1478	/* The destination length is a conservative estimate, so we'll need to
	1479	* sort that out.
	1480	*/
	1481	bn_restore_invariant(d);
	1482
	1483	/* And we're done. */
	1484	return d;
	1485	}
	1486
	1487	/*
	1488	* Create a bignum which is the bitmask covering another one. That
	1489	* is, the smallest integer which is >= N and is also one less than
	1490	* a power of two.
	1491	*/
	1492	Bignum bignum_bitmask(Bignum n)
	1493	{
	1494	Bignum ret = copybn(n);
	1495	int i;
	1496	BignumInt j;
	1497
	1498	i = ret[0];
	1499	while (n[i] == 0 && i > 0)
	1500	i--;
	1501	if (i <= 0)
	1502	return ret; /* input was zero */
	1503	j = 1;
	1504	while (j < n[i])
	1505	j = 2 * j + 1;
	1506	ret[i] = j;
	1507	while (--i > 0)
	1508	ret[i] = BIGNUM_INT_MASK;
	1509	return ret;
	1510	}
	1511
	1512	/*
	1513	* Convert a (max 32-bit) long into a bignum.
	1514	*/
	1515	Bignum bignum_from_long(unsigned long nn)
	1516	{
	1517	Bignum ret;
	1518	BignumDblInt n = nn;
	1519
	1520	ret = newbn(3);
	1521	ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
	1522	ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
	1523	ret[3] = 0;
	1524	ret[0] = (ret[2] ? 2 : 1);
	1525	return ret;
	1526	}
	1527
	1528	/*
	1529	* Add a long to a bignum.
	1530	*/
	1531	Bignum bignum_add_long(Bignum number, unsigned long addendx)
	1532	{
	1533	Bignum ret = newbn(number[0] + 1);
	1534	int i, maxspot = 0;
	1535	BignumDblInt carry = 0, addend = addendx;
	1536
	1537	for (i = 1; i <= (int)ret[0]; i++) {
	1538	carry += addend & BIGNUM_INT_MASK;
	1539	carry += (i <= (int)number[0] ? number[i] : 0);
	1540	addend >>= BIGNUM_INT_BITS;
	1541	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1542	carry >>= BIGNUM_INT_BITS;
	1543	if (ret[i] != 0)
	1544	maxspot = i;
	1545	}
	1546	ret[0] = maxspot;
	1547	return ret;
	1548	}
	1549
	1550	/*
	1551	* Compute the residue of a bignum, modulo a (max 16-bit) short.
	1552	*/
	1553	unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
	1554	{
	1555	BignumDblInt mod, r;
	1556	int i;
	1557
	1558	r = 0;
	1559	mod = modulus;
	1560	for (i = number[0]; i > 0; i--)
	1561	r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
	1562	return (unsigned short) r;
	1563	}
	1564
	1565	#ifdef DEBUG
	1566	void diagbn(char *prefix, Bignum md)
	1567	{
	1568	int i, nibbles, morenibbles;
	1569	static const char hex[] = "0123456789ABCDEF";
	1570
	1571	debug(("%s0x", prefix ? prefix : ""));
	1572
	1573	nibbles = (3 + bignum_bitcount(md)) / 4;
	1574	if (nibbles < 1)
	1575	nibbles = 1;
	1576	morenibbles = 4 * md[0] - nibbles;
	1577	for (i = 0; i < morenibbles; i++)
	1578	debug(("-"));
	1579	for (i = nibbles; i--;)
	1580	debug(("%c",
	1581	hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
	1582
	1583	if (prefix)
	1584	debug(("\n"));
	1585	}
	1586	#endif
	1587
	1588	/*
	1589	* Simple division.
	1590	*/
	1591	Bignum bigdiv(Bignum a, Bignum b)
	1592	{
	1593	Bignum q = newbn(a[0]);
	1594	bigdivmod(a, b, NULL, q);
	1595	return q;
	1596	}
	1597
	1598	/*
	1599	* Simple remainder.
	1600	*/
	1601	Bignum bigmod(Bignum a, Bignum b)
	1602	{
	1603	Bignum r = newbn(b[0]);
	1604	bigdivmod(a, b, r, NULL);
	1605	return r;
	1606	}
	1607
	1608	/*
	1609	* Greatest common divisor.
	1610	*/
	1611	Bignum biggcd(Bignum av, Bignum bv)
	1612	{
	1613	Bignum a = copybn(av);
	1614	Bignum b = copybn(bv);
	1615
	1616	while (bignum_cmp(b, Zero) != 0) {
	1617	Bignum t = newbn(b[0]);
	1618	bigdivmod(a, b, t, NULL);
	1619	while (t[0] > 1 && t[t[0]] == 0)
	1620	t[0]--;
	1621	freebn(a);
	1622	a = b;
	1623	b = t;
	1624	}
	1625
	1626	freebn(b);
	1627	return a;
	1628	}
	1629
	1630	/*
	1631	* Modular inverse, using Euclid's extended algorithm.
	1632	*/
	1633	Bignum modinv(Bignum number, Bignum modulus)
	1634	{
	1635	Bignum a = copybn(modulus);
	1636	Bignum b = copybn(number);
	1637	Bignum xp = copybn(Zero);
	1638	Bignum x = copybn(One);
	1639	int sign = +1;
	1640
	1641	while (bignum_cmp(b, One) != 0) {
	1642	Bignum t = newbn(b[0]);
	1643	Bignum q = newbn(a[0]);
	1644	bigdivmod(a, b, t, q);
	1645	while (t[0] > 1 && t[t[0]] == 0)
	1646	t[0]--;
	1647	freebn(a);
	1648	a = b;
	1649	b = t;
	1650	t = xp;
	1651	xp = x;
	1652	x = bigmuladd(q, xp, t);
	1653	sign = -sign;
	1654	freebn(t);
	1655	freebn(q);
	1656	}
	1657
	1658	freebn(b);
	1659	freebn(a);
	1660	freebn(xp);
	1661
	1662	/* now we know that sign * x == 1, and that x < modulus */
	1663	if (sign < 0) {
	1664	/* set a new x to be modulus - x */
	1665	Bignum newx = newbn(modulus[0]);
	1666	BignumInt carry = 0;
	1667	int maxspot = 1;
	1668	int i;
	1669
	1670	for (i = 1; i <= (int)newx[0]; i++) {
	1671	BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
	1672	BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
	1673	newx[i] = aword - bword - carry;
	1674	bword = ~bword;
	1675	carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
	1676	if (newx[i] != 0)
	1677	maxspot = i;
	1678	}
	1679	newx[0] = maxspot;
	1680	freebn(x);
	1681	x = newx;
	1682	}
	1683
	1684	/* and return. */
	1685	return x;
	1686	}
	1687
	1688	/*
	1689	* Extract the largest power of 2 dividing x, storing it in p2, and returning
	1690	* the product of the remaining factors.
	1691	*/
	1692	static Bignum extract_p2(Bignum x, unsigned *p2)
	1693	{
	1694	unsigned i, j, k, n;
	1695	Bignum y;
	1696
	1697	/* If x is zero then the following won't work. And if x is odd then
	1698	* there's nothing very useful to do.
	1699	*/
	1700	if (!x[0] \|\| (x[1] & 1)) {
	1701	*p2 = 0;
	1702	return copybn(x);
	1703	}
	1704
	1705	/* Find the power of two. */
	1706	for (i = 0; !x[i + 1]; i++);
	1707	for (j = 0; !((x[i + 1] >> j) & 1); j++);
	1708	p2 = iBIGNUM_INT_BITS + j;
	1709
	1710	/* Work out how big the copy should be. */
	1711	n = x[0] - i - 1;
	1712	if (x[x[0]] >> j) n++;
	1713
	1714	/* Copy and shift down. */
	1715	y = newbn(n);
	1716	for (k = 1; k <= n; k++) {
	1717	y[k] = x[k + i] >> j;
	1718	if (j && k < x[0]) y[k] \|= x[k + i + 1] << (BIGNUM_INT_BITS - j);
	1719	}
	1720
	1721	/* Done. */
	1722	return y;
	1723	}
	1724
	1725	/*
	1726	* Kronecker symbol (a\|n). The result is always in { -1, 0, +1 }, and is
	1727	* zero if and only if a and n have a nontrivial common factor. Most
	1728	* usefully, if n is prime, this is the Legendre symbol, taking the value +1
	1729	* if a is a quadratic residue mod n, and -1 otherwise; i.e., (a\|p) ==
	1730	* a^{(p-1)/2} (mod p).
	1731	*/
	1732	int kronecker(Bignum a, Bignum n)
	1733	{
	1734	unsigned s, nn;
	1735	int r = +1;
	1736	Bignum t;
	1737
	1738	/* Special case for n = 0. This is the same convention PARI uses,
	1739	* except that we can't represent negative numbers.
	1740	*/
	1741	if (bignum_cmp(n, Zero) == 0) {
	1742	if (bignum_cmp(a, One) == 0) return +1;
	1743	else return 0;
	1744	}
	1745
	1746	/* Write n = 2^s t, with t odd. If s > 0 and a is even, then the answer
	1747	* is zero; otherwise throw in a factor of (-1)^s if a == 3 or 5 (mod 8).
	1748	*
	1749	* At this point, we have a copy of n, and must remember to free it when
	1750	* we're done. It's convenient to take a copy of a at the same time.
	1751	*/
	1752	a = copybn(a);
	1753	n = extract_p2(n, &s);
	1754
	1755	if (s && (!a[0] \|\| !(a[1] & 1))) { r = 0; goto done; }
	1756	else if ((s & 1) && ((a[1] & 7) == 3 \|\| (a[1] & 7) == 5)) r = -r;
	1757
	1758	/* If n is (now) a unit then we're done. */
	1759	if (bignum_cmp(n, One) == 0) goto done;
	1760
	1761	/* Reduce a modulo n before we go any further. */
	1762	if (bignum_cmp(a, n) >= 0) { t = bigmod(a, n); freebn(a); a = t; }
	1763
	1764	/* Main loop. */
	1765	for (;;) {
	1766	if (bignum_cmp(a, Zero) == 0) { r = 0; goto done; }
	1767
	1768	/* Strip out and handle powers of two from a. */
	1769	t = extract_p2(a, &s); freebn(a); a = t;
	1770	nn = n[1] & 7;
	1771	if ((s & 1) && (nn == 3 \|\| nn == 5)) r = -r;
	1772	if (bignum_cmp(a, One) == 0) break;
	1773
	1774	/* Swap, applying quadratic reciprocity. */
	1775	if ((nn & 3) == 3 && (a[1] & 3) == 3) r = -r;
	1776	t = bigmod(n, a); freebn(n); n = a; a = t;
	1777	}
	1778
	1779	/* Tidy up: we're done. */
	1780	done:
	1781	freebn(a); freebn(n);
	1782	return r;
	1783	}
	1784
	1785	/*
	1786	* Modular square root. We must have p prime: extracting square roots modulo
	1787	* composites is equivalent to factoring (but we don't check: you'll just get
	1788	* the wrong answer). Returns NULL if x is not a quadratic residue mod p.
	1789	*/
	1790	Bignum modsqrt(Bignum x, Bignum p)
	1791	{
	1792	Bignum xinv, b, c, r, t, z, X, mone;
	1793	unsigned i, j, s;
	1794
	1795	/* If x is not a quadratic residue then we will not go to space today. */
	1796	if (kronecker(x, p) != +1) return NULL;
	1797
	1798	/* We need a quadratic nonresidue from somewhere. Exactly half of all
	1799	* units mod p are quadratic residues, but no efficient deterministic
	1800	* algorithm for finding one is known. So pick at random: we don't
	1801	* expect this to take long.
	1802	*/
	1803	z = newbn(p[0]);
	1804	do {
	1805	for (i = 1; i <= p[0]; i++) z[i] = rand();
	1806	z[0] = p[0]; bn_restore_invariant(z);
	1807	} while (kronecker(z, p) != -1);
	1808	b = bigmod(z, p); freebn(z);
	1809
	1810	/* We need to compute a few things before we really get started. */
	1811	xinv = modinv(x, p); /* x^{-1} mod p */
	1812	mone = bigsub(p, One); /* p - 1 == -1 (mod p) */
	1813	t = extract_p2(mone, &s); /* 2^s t = p - 1 */
	1814	c = modpow(b, t, p); /* b^t (mod p) */
	1815	z = bigadd(t, One); freebn(t); t = z; /* (t + 1) */
	1816	shift_right(t + 1, t[0], 1); if (!t[t[0]]) t[0]--;
	1817	r = modpow(x, t, p); /* x^{(t+1)/2} (mod p) */
	1818	freebn(b); freebn(mone); freebn(t);
	1819
	1820	/* OK, so how does this work anyway?
	1821	*
	1822	* We know that x^t is somewhere in the order-2^s subgroup of GF(p)^*;
	1823	* and g = c^{-1} is a generator for this subgroup (since we know that
	1824	* g^{2^{s-1}} = b^{(p-1)/2} = (b\|p) = -1); so x^t = g^m for some m. In
	1825	* fact, we know that m is even because x is a square. Suppose we can
	1826	* determine m; then we know that x^t/g^m = 1, so x^{t+1}/c^m = x -- but
	1827	* both t + 1 and m are even, so x^{(t+1)/2}/g^{m/2} is a square root of
	1828	* x.
	1829	*
	1830	* Conveniently, finding the discrete log of an element X in a group of
	1831	* order 2^s is easy. Write X = g^m = g^{m_0+2k'}; then X^{2^{s-1}} =
	1832	* g^{m_0 2^{s-1}} c^{m' 2^s} = g^{m_0 2^{s-1}} is either -1 or +1,
	1833	* telling us that m_0 is 1 or 0 respectively. Then X/g^{m_0} =
	1834	* (g^2)^{m'} has order 2^{s-1} so we can continue inductively. What we
	1835	* end up with at the end is X/g^m.
	1836	*
	1837	* There are a few wrinkles. As we proceed through the induction, the
	1838	* generator for the subgroup will be c^{-2}, since we know that m is
	1839	* even. While we want the discrete log of X = x^t, we're actually going
	1840	* to keep track of r, which will eventually be x^{(t+1)/2}/g^{m/2} =
	1841	* x^{(t+1)/2} c^m, recovering X/g^m = r^2/x as we go. We don't actually
	1842	* form the discrete log explicitly, because the final result will
	1843	* actually be the square root we want.
	1844	*/
	1845	for (i = 1; i < s; i++) {
	1846
	1847	/* Determine X. We could optimize this, only recomputing it when
	1848	* it's been invalidated, but that's fiddlier and this isn't
	1849	* performance critical.
	1850	*/
	1851	z = modmul(r, r, p);
	1852	X = modmul(z, xinv, p);
	1853	freebn(z);
	1854
	1855	/* Determine X^{2^{s-1-i}}. */
	1856	for (j = i + 1; j < s; j++)
	1857	z = modmul(X, X, p), freebn(X), X = z;
	1858
	1859	/* Maybe accumulate a factor of c. */
	1860	if (bignum_cmp(X, One) != 0)
	1861	z = modmul(r, c, p), freebn(r), r = z;
	1862
	1863	/* Move on to the next smaller subgroup. */
	1864	z = modmul(c, c, p), freebn(c), c = z;
	1865	freebn(X);
	1866	}
	1867
	1868	/* Of course, there are two square roots of x. For predictability's sake
	1869	* we'll always return the one in [1..(p - 1)/2]. The other is, of
	1870	* course, p - r.
	1871	*/
	1872	z = bigsub(p, r);
	1873	if (bignum_cmp(r, z) < 0)
	1874	freebn(z);
	1875	else {
	1876	freebn(r);
	1877	r = z;
	1878	}
	1879
	1880	/* We're done. */
	1881	freebn(xinv); freebn(c);
	1882	return r;
	1883	}
	1884
	1885	/*
	1886	* Render a bignum into decimal. Return a malloced string holding
	1887	* the decimal representation.
	1888	*/
	1889	char *bignum_decimal(Bignum x)
	1890	{
	1891	int ndigits, ndigit;
	1892	int i, iszero;
	1893	BignumDblInt carry;
	1894	char *ret;
	1895	BignumInt *workspace;
	1896
	1897	/*
	1898	* First, estimate the number of digits. Since log(10)/log(2)
	1899	* is just greater than 93/28 (the joys of continued fraction
	1900	* approximations...) we know that for every 93 bits, we need
	1901	* at most 28 digits. This will tell us how much to malloc.
	1902	*
	1903	* Formally: if x has i bits, that means x is strictly less
	1904	* than 2^i. Since 2 is less than 10^(28/93), this is less than
	1905	* 10^(28i/93). We need an integer power of ten, so we must
	1906	* round up (rounding down might make it less than x again).
	1907	* Therefore if we multiply the bit count by 28/93, rounding
	1908	* up, we will have enough digits.
	1909	*
	1910	* i=0 (i.e., x=0) is an irritating special case.
	1911	*/
	1912	i = bignum_bitcount(x);
	1913	if (!i)
	1914	ndigits = 1; /* x = 0 */
	1915	else
	1916	ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */
	1917	ndigits++; /* allow for trailing \0 */
	1918	ret = snewn(ndigits, char);
	1919
	1920	/*
	1921	* Now allocate some workspace to hold the binary form as we
	1922	* repeatedly divide it by ten. Initialise this to the
	1923	* big-endian form of the number.
	1924	*/
	1925	workspace = snewn(x[0], BignumInt);
	1926	for (i = 0; i < (int)x[0]; i++)
	1927	workspace[i] = x[x[0] - i];
	1928
	1929	/*
	1930	* Next, write the decimal number starting with the last digit.
	1931	* We use ordinary short division, dividing 10 into the
	1932	* workspace.
	1933	*/
	1934	ndigit = ndigits - 1;
	1935	ret[ndigit] = '\0';
	1936	do {
	1937	iszero = 1;
	1938	carry = 0;
	1939	for (i = 0; i < (int)x[0]; i++) {
	1940	carry = (carry << BIGNUM_INT_BITS) + workspace[i];
	1941	workspace[i] = (BignumInt) (carry / 10);
	1942	if (workspace[i])
	1943	iszero = 0;
	1944	carry %= 10;
	1945	}
	1946	ret[--ndigit] = (char) (carry + '0');
	1947	} while (!iszero);
	1948
	1949	/*
	1950	* There's a chance we've fallen short of the start of the
	1951	* string. Correct if so.
	1952	*/
	1953	if (ndigit > 0)
	1954	memmove(ret, ret + ndigit, ndigits - ndigit);
	1955
	1956	/*
	1957	* Done.
	1958	*/
	1959	sfree(workspace);
	1960	return ret;
	1961	}
	1962
	1963	#ifdef TESTBN
	1964
	1965	#include <stdio.h>
	1966	#include <stdlib.h>
	1967	#include <ctype.h>
	1968
	1969	/*
	1970	* gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset
	1971	*
	1972	* Then feed to this program's standard input the output of
	1973	* testdata/bignum.py .
	1974	*/
	1975
	1976	void modalfatalbox(char *p, ...)
	1977	{
	1978	va_list ap;
	1979	fprintf(stderr, "FATAL ERROR: ");
	1980	va_start(ap, p);
	1981	vfprintf(stderr, p, ap);
	1982	va_end(ap);
	1983	fputc('\n', stderr);
	1984	exit(1);
	1985	}
	1986
	1987	#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
	1988
	1989	int main(int argc, char **argv)
	1990	{
	1991	char *buf;
	1992	int line = 0;
	1993	int passes = 0, fails = 0;
	1994
	1995	while ((buf = fgetline(stdin)) != NULL) {
	1996	int maxlen = strlen(buf);
	1997	unsigned char *data = snewn(maxlen, unsigned char);
	1998	unsigned char ptrs[5], q;
	1999	int ptrnum;
	2000	char *bufp = buf;
	2001
	2002	line++;
	2003
	2004	q = data;
	2005	ptrnum = 0;
	2006
	2007	while (bufp && !isspace((unsigned char)bufp))
	2008	bufp++;
	2009	if (bufp)
	2010	*bufp++ = '\0';
	2011
	2012	while (*bufp) {
	2013	char start, end;
	2014	int i;
	2015
	2016	while (bufp && !isxdigit((unsigned char)bufp))
	2017	bufp++;
	2018	start = bufp;
	2019
	2020	if (!*bufp)
	2021	break;
	2022
	2023	while (bufp && isxdigit((unsigned char)bufp))
	2024	bufp++;
	2025	end = bufp;
	2026
	2027	if (ptrnum >= lenof(ptrs))
	2028	break;
	2029	ptrs[ptrnum++] = q;
	2030
	2031	for (i = -((end - start) & 1); i < end-start; i += 2) {
	2032	unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
	2033	val = val * 16 + fromxdigit(start[i+1]);
	2034	*q++ = val;
	2035	}
	2036
	2037	ptrs[ptrnum] = q;
	2038	}
	2039
	2040	if (!strcmp(buf, "mul")) {
	2041	Bignum a, b, c, p;
	2042
	2043	if (ptrnum != 3) {
	2044	printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
	2045	exit(1);
	2046	}
	2047	a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	2048	b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	2049	c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	2050	p = bigmul(a, b);
	2051
	2052	if (bignum_cmp(c, p) == 0) {
	2053	passes++;
	2054	} else {
	2055	char *as = bignum_decimal(a);
	2056	char *bs = bignum_decimal(b);
	2057	char *cs = bignum_decimal(c);
	2058	char *ps = bignum_decimal(p);
	2059
	2060	printf("%d: fail: %s * %s gave %s expected %s\n",
	2061	line, as, bs, ps, cs);
	2062	fails++;
	2063
	2064	sfree(as);
	2065	sfree(bs);
	2066	sfree(cs);
	2067	sfree(ps);
	2068	}
	2069	freebn(a);
	2070	freebn(b);
	2071	freebn(c);
	2072	freebn(p);
	2073	} else if (!strcmp(buf, "pow")) {
	2074	Bignum base, expt, modulus, expected, answer;
	2075
	2076	if (ptrnum != 4) {
	2077	printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);
	2078	exit(1);
	2079	}
	2080
	2081	base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	2082	expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	2083	modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	2084	expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
	2085	answer = modpow(base, expt, modulus);
	2086
	2087	if (bignum_cmp(expected, answer) == 0) {
	2088	passes++;
	2089	} else {
	2090	char *as = bignum_decimal(base);
	2091	char *bs = bignum_decimal(expt);
	2092	char *cs = bignum_decimal(modulus);
	2093	char *ds = bignum_decimal(answer);
	2094	char *ps = bignum_decimal(expected);
	2095
	2096	printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
	2097	line, as, bs, cs, ds, ps);
	2098	fails++;
	2099
	2100	sfree(as);
	2101	sfree(bs);
	2102	sfree(cs);
	2103	sfree(ds);
	2104	sfree(ps);
	2105	}
	2106	freebn(base);
	2107	freebn(expt);
	2108	freebn(modulus);
	2109	freebn(expected);
	2110	freebn(answer);
	2111	} else if (!strcmp(buf, "modsqrt")) {
	2112	Bignum x, p, expected, answer;
	2113
	2114	if (ptrnum != 3) {
	2115	printf("%d: modsqrt with %d parameters, expected 3\n", line, ptrnum);
	2116	exit(1);
	2117	}
	2118
	2119	x = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	2120	p = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	2121	expected = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	2122	answer = modsqrt(x, p);
	2123	if (!answer)
	2124	answer = copybn(Zero);
	2125
	2126	if (bignum_cmp(expected, answer) == 0) {
	2127	passes++;
	2128	} else {
	2129	char *xs = bignum_decimal(x);
	2130	char *ps = bignum_decimal(p);
	2131	char *qs = bignum_decimal(answer);
	2132	char *ws = bignum_decimal(expected);
	2133
	2134	printf("%d: fail: sqrt(%s) mod %s gave %s expected %s\n",
	2135	line, xs, ps, qs, ws);
	2136	fails++;
	2137
	2138	sfree(xs);
	2139	sfree(ps);
	2140	sfree(qs);
	2141	sfree(ws);
	2142	}
	2143	freebn(p);
	2144	freebn(x);
	2145	freebn(expected);
	2146	freebn(answer);
	2147	} else {
	2148	printf("%d: unrecognised test keyword: '%s'\n", line, buf);
	2149	exit(1);
	2150	}
	2151
	2152	sfree(buf);
	2153	sfree(data);
	2154	}
	2155
	2156	printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
	2157	return fails != 0;
	2158	}
	2159
	2160	#endif