git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Bignum routines for RSA and DH and stuff.
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include "misc.h"
	11
	12	/*
	13	* Usage notes:
	14	* * Do not call the DIVMOD_WORD macro with expressions such as array
	15	* subscripts, as some implementations object to this (see below).
	16	* * Note that none of the division methods below will cope if the
	17	* quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
	18	* to avoid this case.
	19	* If this condition occurs, in the case of the x86 DIV instruction,
	20	* an overflow exception will occur, which (according to a correspondent)
	21	* will manifest on Windows as something like
	22	* 0xC0000095: Integer overflow
	23	* The C variant won't give the right answer, either.
	24	*/
	25
	26	#if defined __GNUC__ && defined __i386__
	27	typedef unsigned long BignumInt;
	28	typedef unsigned long long BignumDblInt;
	29	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	30	#define BIGNUM_TOP_BIT 0x80000000UL
	31	#define BIGNUM_INT_BITS 32
	32	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	33	#define DIVMOD_WORD(q, r, hi, lo, w) \
	34	__asm__("div %2" : \
	35	"=d" (r), "=a" (q) : \
	36	"r" (w), "d" (hi), "a" (lo))
	37	#elif defined _MSC_VER && defined _M_IX86
	38	typedef unsigned __int32 BignumInt;
	39	typedef unsigned __int64 BignumDblInt;
	40	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	41	#define BIGNUM_TOP_BIT 0x80000000UL
	42	#define BIGNUM_INT_BITS 32
	43	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	44	/* Note: MASM interprets array subscripts in the macro arguments as
	45	* assembler syntax, which gives the wrong answer. Don't supply them.
	46	* <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
	47	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	48	__asm mov edx, hi \
	49	__asm mov eax, lo \
	50	__asm div w \
	51	__asm mov r, edx \
	52	__asm mov q, eax \
	53	} while(0)
	54	#elif defined _LP64
	55	/* 64-bit architectures can do 32x32->64 chunks at a time */
	56	typedef unsigned int BignumInt;
	57	typedef unsigned long BignumDblInt;
	58	#define BIGNUM_INT_MASK 0xFFFFFFFFU
	59	#define BIGNUM_TOP_BIT 0x80000000U
	60	#define BIGNUM_INT_BITS 32
	61	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	62	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	63	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	64	q = n / w; \
	65	r = n % w; \
	66	} while (0)
	67	#elif defined _LLP64
	68	/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
	69	typedef unsigned long BignumInt;
	70	typedef unsigned long long BignumDblInt;
	71	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	72	#define BIGNUM_TOP_BIT 0x80000000UL
	73	#define BIGNUM_INT_BITS 32
	74	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	75	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	76	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	77	q = n / w; \
	78	r = n % w; \
	79	} while (0)
	80	#else
	81	/* Fallback for all other cases */
	82	typedef unsigned short BignumInt;
	83	typedef unsigned long BignumDblInt;
	84	#define BIGNUM_INT_MASK 0xFFFFU
	85	#define BIGNUM_TOP_BIT 0x8000U
	86	#define BIGNUM_INT_BITS 16
	87	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	88	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	89	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	90	q = n / w; \
	91	r = n % w; \
	92	} while (0)
	93	#endif
	94
	95	#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
	96
	97	#define BIGNUM_INTERNAL
	98	typedef BignumInt *Bignum;
	99
	100	#include "ssh.h"
	101
	102	BignumInt bnZero[1] = { 0 };
	103	BignumInt bnOne[2] = { 1, 1 };
	104
	105	/*
	106	* The Bignum format is an array of `BignumInt'. The first
	107	* element of the array counts the remaining elements. The
	108	* remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
	109	* significant digit first. (So it's trivial to extract the bit
	110	* with value 2^n for any n.)
	111	*
	112	* All Bignums in this module are positive. Negative numbers must
	113	* be dealt with outside it.
	114	*
	115	* INVARIANT: the most significant word of any Bignum must be
	116	* nonzero.
	117	*/
	118
	119	Bignum Zero = bnZero, One = bnOne;
	120
	121	static Bignum newbn(int length)
	122	{
	123	Bignum b = snewn(length + 1, BignumInt);
	124	if (!b)
	125	abort(); /* FIXME */
	126	memset(b, 0, (length + 1) * sizeof(*b));
	127	b[0] = length;
	128	return b;
	129	}
	130
	131	void bn_restore_invariant(Bignum b)
	132	{
	133	while (b[0] > 1 && b[b[0]] == 0)
	134	b[0]--;
	135	}
	136
	137	Bignum copybn(Bignum orig)
	138	{
	139	Bignum b = snewn(orig[0] + 1, BignumInt);
	140	if (!b)
	141	abort(); /* FIXME */
	142	memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
	143	return b;
	144	}
	145
	146	void freebn(Bignum b)
	147	{
	148	/*
	149	* Burn the evidence, just in case.
	150	*/
	151	memset(b, 0, sizeof(b[0]) * (b[0] + 1));
	152	sfree(b);
	153	}
	154
	155	Bignum bn_power_2(int n)
	156	{
	157	Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
	158	bignum_set_bit(ret, n, 1);
	159	return ret;
	160	}
	161
	162	/*
	163	* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
	164	* big-endian arrays of 'len' BignumInts. Returns a BignumInt carried
	165	* off the top.
	166	*/
	167	static BignumInt internal_add(const BignumInt a, const BignumInt b,
	168	BignumInt *c, int len)
	169	{
	170	int i;
	171	BignumDblInt carry = 0;
	172
	173	for (i = len-1; i >= 0; i--) {
	174	carry += (BignumDblInt)a[i] + b[i];
	175	c[i] = (BignumInt)carry;
	176	carry >>= BIGNUM_INT_BITS;
	177	}
	178
	179	return (BignumInt)carry;
	180	}
	181
	182	/*
	183	* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
	184	* all big-endian arrays of 'len' BignumInts. Any borrow from the top
	185	* is ignored.
	186	*/
	187	static void internal_sub(const BignumInt a, const BignumInt b,
	188	BignumInt *c, int len)
	189	{
	190	int i;
	191	BignumDblInt carry = 1;
	192
	193	for (i = len-1; i >= 0; i--) {
	194	carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
	195	c[i] = (BignumInt)carry;
	196	carry >>= BIGNUM_INT_BITS;
	197	}
	198	}
	199
	200	/*
	201	* Compute c = a * b.
	202	* Input is in the first len words of a and b.
	203	* Result is returned in the first 2*len words of c.
	204	*/
	205	#define KARATSUBA_THRESHOLD 50
	206	static void internal_mul(const BignumInt a, const BignumInt b,
	207	BignumInt *c, int len)
	208	{
	209	int i, j;
	210	BignumDblInt t;
	211
	212	if (len > KARATSUBA_THRESHOLD) {
	213
	214	/*
	215	* Karatsuba divide-and-conquer algorithm. Cut each input in
	216	* half, so that it's expressed as two big 'digits' in a giant
	217	* base D:
	218	*
	219	* a = a_1 D + a_0
	220	* b = b_1 D + b_0
	221	*
	222	* Then the product is of course
	223	*
	224	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	225	*
	226	* and we compute the three coefficients by recursively
	227	* calling ourself to do half-length multiplications.
	228	*
	229	* The clever bit that makes this worth doing is that we only
	230	* need _one_ half-length multiplication for the central
	231	* coefficient rather than the two that it obviouly looks
	232	* like, because we can use a single multiplication to compute
	233	*
	234	* (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
	235	*
	236	* and then we subtract the other two coefficients (a_1 b_1
	237	* and a_0 b_0) which we were computing anyway.
	238	*
	239	* Hence we get to multiply two numbers of length N in about
	240	* three times as much work as it takes to multiply numbers of
	241	* length N/2, which is obviously better than the four times
	242	* as much work it would take if we just did a long
	243	* conventional multiply.
	244	*/
	245
	246	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	247	int midlen = botlen + 1;
	248	BignumInt *scratch;
	249	BignumDblInt carry;
	250
	251	/*
	252	* The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
	253	* in the output array, so we can compute them immediately in
	254	* place.
	255	*/
	256
	257	/* a_1 b_1 */
	258	internal_mul(a, b, c, toplen);
	259
	260	/* a_0 b_0 */
	261	internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen);
	262
	263	/*
	264	* We must allocate scratch space for the central coefficient,
	265	* and also for the two input values that we multiply when
	266	* computing it. Since either or both may carry into the
	267	* (botlen+1)th word, we must use a slightly longer length
	268	* 'midlen'.
	269	*/
	270	scratch = snewn(4 * midlen, BignumInt);
	271
	272	/* Zero padding. midlen exceeds toplen by at most 2, so just
	273	* zero the first two words of each input and the rest will be
	274	* copied over. */
	275	scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
	276
	277	for (j = 0; j < toplen; j++) {
	278	scratch[midlen - toplen + j] = a[j]; /* a_1 */
	279	scratch[2midlen - toplen + j] = b[j]; / b_1 */
	280	}
	281
	282	/* compute a_1 + a_0 */
	283	scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);
	284	/* compute b_1 + b_0 */
	285	scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,
	286	scratch+midlen+1, botlen);
	287
	288	/*
	289	* Now we can do the third multiplication.
	290	*/
	291	internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen);
	292
	293	/*
	294	* Now we can reuse the first half of 'scratch' to compute the
	295	* sum of the outer two coefficients, to subtract from that
	296	* product to obtain the middle one.
	297	*/
	298	scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
	299	for (j = 0; j < 2*toplen; j++)
	300	scratch[2midlen - 2toplen + j] = c[j];
	301	scratch[1] = internal_add(scratch+2, c + 2*toplen,
	302	scratch+2, 2*botlen);
	303
	304	internal_sub(scratch + 2*midlen, scratch,
	305	scratch + 2midlen, 2midlen);
	306
	307	/*
	308	* And now all we need to do is to add that middle coefficient
	309	* back into the output. We may have to propagate a carry
	310	* further up the output, but we can be sure it won't
	311	* propagate right the way off the top.
	312	*/
	313	carry = internal_add(c + 2len - botlen - 2midlen,
	314	scratch + 2*midlen,
	315	c + 2len - botlen - 2midlen, 2*midlen);
	316	j = 2len - botlen - 2midlen - 1;
	317	while (carry) {
	318	assert(j >= 0);
	319	carry += c[j];
	320	c[j] = (BignumInt)carry;
	321	carry >>= BIGNUM_INT_BITS;
	322	}
	323
	324	/* Free scratch. */
	325	for (j = 0; j < 4 * midlen; j++)
	326	scratch[j] = 0;
	327	sfree(scratch);
	328
	329	} else {
	330
	331	/*
	332	* Multiply in the ordinary O(N^2) way.
	333	*/
	334
	335	for (j = 0; j < 2 * len; j++)
	336	c[j] = 0;
	337
	338	for (i = len - 1; i >= 0; i--) {
	339	t = 0;
	340	for (j = len - 1; j >= 0; j--) {
	341	t += MUL_WORD(a[i], (BignumDblInt) b[j]);
	342	t += (BignumDblInt) c[i + j + 1];
	343	c[i + j + 1] = (BignumInt) t;
	344	t = t >> BIGNUM_INT_BITS;
	345	}
	346	c[i] = (BignumInt) t;
	347	}
	348	}
	349	}
	350
	351	/*
	352	* Variant form of internal_mul used for the initial step of
	353	* Montgomery reduction. Only bothers outputting 'len' words
	354	* (everything above that is thrown away).
	355	*/
	356	static void internal_mul_low(const BignumInt a, const BignumInt b,
	357	BignumInt *c, int len)
	358	{
	359	int i, j;
	360	BignumDblInt t;
	361
	362	if (len > KARATSUBA_THRESHOLD) {
	363
	364	/*
	365	* Karatsuba-aware version of internal_mul_low. As before, we
	366	* express each input value as a shifted combination of two
	367	* halves:
	368	*
	369	* a = a_1 D + a_0
	370	* b = b_1 D + b_0
	371	*
	372	* Then the full product is, as before,
	373	*
	374	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	375	*
	376	* Provided we choose D on the large side (so that a_0 and b_0
	377	* are _at least_ as long as a_1 and b_1), we don't need the
	378	* topmost term at all, and we only need half of the middle
	379	* term. So there's no point in doing the proper Karatsuba
	380	* optimisation which computes the middle term using the top
	381	* one, because we'd take as long computing the top one as
	382	* just computing the middle one directly.
	383	*
	384	* So instead, we do a much more obvious thing: we call the
	385	* fully optimised internal_mul to compute a_0 b_0, and we
	386	* recursively call ourself to compute the _bottom halves_ of
	387	* a_1 b_0 and a_0 b_1, each of which we add into the result
	388	* in the obvious way.
	389	*
	390	* In other words, there's no actual Karatsuba _optimisation_
	391	* in this function; the only benefit in doing it this way is
	392	* that we call internal_mul proper for a large part of the
	393	* work, and _that_ can optimise its operation.
	394	*/
	395
	396	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	397	BignumInt *scratch;
	398
	399	/*
	400	* Allocate scratch space for the various bits and pieces
	401	* we're going to be adding together. We need botlen*2 words
	402	* for a_0 b_0 (though we may end up throwing away its topmost
	403	* word), and toplen words for each of a_1 b_0 and a_0 b_1.
	404	* That adds up to exactly 2*len.
	405	*/
	406	scratch = snewn(len*2, BignumInt);
	407
	408	/* a_0 b_0 */
	409	internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen);
	410
	411	/* a_1 b_0 */
	412	internal_mul_low(a, b + len - toplen, scratch + toplen, toplen);
	413
	414	/* a_0 b_1 */
	415	internal_mul_low(a + len - toplen, b, scratch, toplen);
	416
	417	/* Copy the bottom half of the big coefficient into place */
	418	for (j = 0; j < botlen; j++)
	419	c[toplen + j] = scratch[2*toplen + botlen + j];
	420
	421	/* Add the two small coefficients, throwing away the returned carry */
	422	internal_add(scratch, scratch + toplen, scratch, toplen);
	423
	424	/* And add that to the large coefficient, leaving the result in c. */
	425	internal_add(scratch, scratch + 2*toplen + botlen - toplen,
	426	c, toplen);
	427
	428	/* Free scratch. */
	429	for (j = 0; j < len*2; j++)
	430	scratch[j] = 0;
	431	sfree(scratch);
	432
	433	} else {
	434
	435	for (j = 0; j < len; j++)
	436	c[j] = 0;
	437
	438	for (i = len - 1; i >= 0; i--) {
	439	t = 0;
	440	for (j = len - 1; j >= len - i - 1; j--) {
	441	t += MUL_WORD(a[i], (BignumDblInt) b[j]);
	442	t += (BignumDblInt) c[i + j + 1 - len];
	443	c[i + j + 1 - len] = (BignumInt) t;
	444	t = t >> BIGNUM_INT_BITS;
	445	}
	446	}
	447
	448	}
	449	}
	450
	451	/*
	452	* Montgomery reduction. Expects x to be a big-endian array of 2*len
	453	* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
	454	* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
	455	* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
	456	* x' < n.
	457	*
	458	* 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts
	459	* each, containing respectively n and the multiplicative inverse of
	460	* -n mod r.
	461	*
	462	* 'tmp' is an array of at least '3*len' BignumInts used as scratch
	463	* space.
	464	*/
	465	static void monty_reduce(BignumInt x, const BignumInt n,
	466	const BignumInt mninv, BignumInt tmp, int len)
	467	{
	468	int i;
	469	BignumInt carry;
	470
	471	/*
	472	* Multiply x by (-n)^{-1} mod r. This gives us a value m such
	473	* that mn is congruent to -x mod r. Hence, mn+x is an exact
	474	* multiple of r, and is also (obviously) congruent to x mod n.
	475	*/
	476	internal_mul_low(x + len, mninv, tmp, len);
	477
	478	/*
	479	* Compute t = (mn+x)/r in ordinary, non-modular, integer
	480	* arithmetic. By construction this is exact, and is congruent mod
	481	* n to x * r^{-1}, i.e. the answer we want.
	482	*
	483	* The following multiply leaves that answer in the _most_
	484	* significant half of the 'x' array, so then we must shift it
	485	* down.
	486	*/
	487	internal_mul(tmp, n, tmp+len, len);
	488	carry = internal_add(x, tmp+len, x, 2*len);
	489	for (i = 0; i < len; i++)
	490	x[len + i] = x[i], x[i] = 0;
	491
	492	/*
	493	* Reduce t mod n. This doesn't require a full-on division by n,
	494	* but merely a test and single optional subtraction, since we can
	495	* show that 0 <= t < 2n.
	496	*
	497	* Proof:
	498	* + we computed m mod r, so 0 <= m < r.
	499	* + so 0 <= mn < rn, obviously
	500	* + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
	501	* + yielding 0 <= (mn+x)/r < 2n as required.
	502	*/
	503	if (!carry) {
	504	for (i = 0; i < len; i++)
	505	if (x[len + i] != n[i])
	506	break;
	507	}
	508	if (carry \|\| i >= len \|\| x[len + i] > n[i])
	509	internal_sub(x+len, n, x+len, len);
	510	}
	511
	512	static void internal_add_shifted(BignumInt *number,
	513	unsigned n, int shift)
	514	{
	515	int word = 1 + (shift / BIGNUM_INT_BITS);
	516	int bshift = shift % BIGNUM_INT_BITS;
	517	BignumDblInt addend;
	518
	519	addend = (BignumDblInt)n << bshift;
	520
	521	while (addend) {
	522	addend += number[word];
	523	number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
	524	addend >>= BIGNUM_INT_BITS;
	525	word++;
	526	}
	527	}
	528
	529	/*
	530	* Compute a = a % m.
	531	* Input in first alen words of a and first mlen words of m.
	532	* Output in first alen words of a
	533	* (of which first alen-mlen words will be zero).
	534	* The MSW of m MUST have its high bit set.
	535	* Quotient is accumulated in the `quotient' array, which is a Bignum
	536	* rather than the internal bigendian format. Quotient parts are shifted
	537	* left by `qshift' before adding into quot.
	538	*/
	539	static void internal_mod(BignumInt *a, int alen,
	540	BignumInt *m, int mlen,
	541	BignumInt *quot, int qshift)
	542	{
	543	BignumInt m0, m1;
	544	unsigned int h;
	545	int i, k;
	546
	547	m0 = m[0];
	548	if (mlen > 1)
	549	m1 = m[1];
	550	else
	551	m1 = 0;
	552
	553	for (i = 0; i <= alen - mlen; i++) {
	554	BignumDblInt t;
	555	unsigned int q, r, c, ai1;
	556
	557	if (i == 0) {
	558	h = 0;
	559	} else {
	560	h = a[i - 1];
	561	a[i - 1] = 0;
	562	}
	563
	564	if (i == alen - 1)
	565	ai1 = 0;
	566	else
	567	ai1 = a[i + 1];
	568
	569	/* Find q = h:a[i] / m0 */
	570	if (h >= m0) {
	571	/*
	572	* Special case.
	573	*
	574	* To illustrate it, suppose a BignumInt is 8 bits, and
	575	* we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
	576	* our initial division will be 0xA123 / 0xA1, which
	577	* will give a quotient of 0x100 and a divide overflow.
	578	* However, the invariants in this division algorithm
	579	* are not violated, since the full number A1:23:... is
	580	* _less_ than the quotient prefix A1:B2:... and so the
	581	* following correction loop would have sorted it out.
	582	*
	583	* In this situation we set q to be the largest
	584	* quotient we _can_ stomach (0xFF, of course).
	585	*/
	586	q = BIGNUM_INT_MASK;
	587	} else {
	588	/* Macro doesn't want an array subscript expression passed
	589	* into it (see definition), so use a temporary. */
	590	BignumInt tmplo = a[i];
	591	DIVMOD_WORD(q, r, h, tmplo, m0);
	592
	593	/* Refine our estimate of q by looking at
	594	h:a[i]:a[i+1] / m0:m1 */
	595	t = MUL_WORD(m1, q);
	596	if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
	597	q--;
	598	t -= m1;
	599	r = (r + m0) & BIGNUM_INT_MASK; /* overflow? */
	600	if (r >= (BignumDblInt) m0 &&
	601	t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
	602	}
	603	}
	604
	605	/* Subtract q * m from a[i...] */
	606	c = 0;
	607	for (k = mlen - 1; k >= 0; k--) {
	608	t = MUL_WORD(q, m[k]);
	609	t += c;
	610	c = (unsigned)(t >> BIGNUM_INT_BITS);
	611	if ((BignumInt) t > a[i + k])
	612	c++;
	613	a[i + k] -= (BignumInt) t;
	614	}
	615
	616	/* Add back m in case of borrow */
	617	if (c != h) {
	618	t = 0;
	619	for (k = mlen - 1; k >= 0; k--) {
	620	t += m[k];
	621	t += a[i + k];
	622	a[i + k] = (BignumInt) t;
	623	t = t >> BIGNUM_INT_BITS;
	624	}
	625	q--;
	626	}
	627	if (quot)
	628	internal_add_shifted(quot, q, qshift + BIGNUM_INT_BITS * (alen - mlen - i));
	629	}
	630	}
	631
	632	/*
	633	* Compute (base ^ exp) % mod. Uses the Montgomery multiplication
	634	* technique.
	635	*/
	636	Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
	637	{
	638	BignumInt a, b, x, n, mninv, tmp;
	639	int len, i, j;
	640	Bignum base, base2, r, rn, inv, result;
	641
	642	/*
	643	* The most significant word of mod needs to be non-zero. It
	644	* should already be, but let's make sure.
	645	*/
	646	assert(mod[mod[0]] != 0);
	647
	648	/*
	649	* Make sure the base is smaller than the modulus, by reducing
	650	* it modulo the modulus if not.
	651	*/
	652	base = bigmod(base_in, mod);
	653
	654	/*
	655	* mod had better be odd, or we can't do Montgomery multiplication
	656	* using a power of two at all.
	657	*/
	658	assert(mod[1] & 1);
	659
	660	/*
	661	* Compute the inverse of n mod r, for monty_reduce. (In fact we
	662	* want the inverse of _minus_ n mod r, but we'll sort that out
	663	* below.)
	664	*/
	665	len = mod[0];
	666	r = bn_power_2(BIGNUM_INT_BITS * len);
	667	inv = modinv(mod, r);
	668
	669	/*
	670	* Multiply the base by r mod n, to get it into Montgomery
	671	* representation.
	672	*/
	673	base2 = modmul(base, r, mod);
	674	freebn(base);
	675	base = base2;
	676
	677	rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */
	678
	679	freebn(r); /* won't need this any more */
	680
	681	/*
	682	* Set up internal arrays of the right lengths, in big-endian
	683	* format, containing the base, the modulus, and the modulus's
	684	* inverse.
	685	*/
	686	n = snewn(len, BignumInt);
	687	for (j = 0; j < len; j++)
	688	n[len - 1 - j] = mod[j + 1];
	689
	690	mninv = snewn(len, BignumInt);
	691	for (j = 0; j < len; j++)
	692	mninv[len - 1 - j] = (j < inv[0] ? inv[j + 1] : 0);
	693	freebn(inv); /* we don't need this copy of it any more */
	694	/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
	695	x = snewn(len, BignumInt);
	696	for (j = 0; j < len; j++)
	697	x[j] = 0;
	698	internal_sub(x, mninv, mninv, len);
	699
	700	/* x = snewn(len, BignumInt); / / already done above */
	701	for (j = 0; j < len; j++)
	702	x[len - 1 - j] = (j < base[0] ? base[j + 1] : 0);
	703	freebn(base); /* we don't need this copy of it any more */
	704
	705	a = snewn(2*len, BignumInt);
	706	b = snewn(2*len, BignumInt);
	707	for (j = 0; j < len; j++)
	708	a[2*len - 1 - j] = (j < rn[0] ? rn[j + 1] : 0);
	709	freebn(rn);
	710
	711	tmp = snewn(3*len, BignumInt);
	712
	713	/* Skip leading zero bits of exp. */
	714	i = 0;
	715	j = BIGNUM_INT_BITS-1;
	716	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	717	j--;
	718	if (j < 0) {
	719	i++;
	720	j = BIGNUM_INT_BITS-1;
	721	}
	722	}
	723
	724	/* Main computation */
	725	while (i < (int)exp[0]) {
	726	while (j >= 0) {
	727	internal_mul(a + len, a + len, b, len);
	728	monty_reduce(b, n, mninv, tmp, len);
	729	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	730	internal_mul(b + len, x, a, len);
	731	monty_reduce(a, n, mninv, tmp, len);
	732	} else {
	733	BignumInt *t;
	734	t = a;
	735	a = b;
	736	b = t;
	737	}
	738	j--;
	739	}
	740	i++;
	741	j = BIGNUM_INT_BITS-1;
	742	}
	743
	744	/*
	745	* Final monty_reduce to get back from the adjusted Montgomery
	746	* representation.
	747	*/
	748	monty_reduce(a, n, mninv, tmp, len);
	749
	750	/* Copy result to buffer */
	751	result = newbn(mod[0]);
	752	for (i = 0; i < len; i++)
	753	result[result[0] - i] = a[i + len];
	754	while (result[0] > 1 && result[result[0]] == 0)
	755	result[0]--;
	756
	757	/* Free temporary arrays */
	758	for (i = 0; i < 3 * len; i++)
	759	tmp[i] = 0;
	760	sfree(tmp);
	761	for (i = 0; i < 2 * len; i++)
	762	a[i] = 0;
	763	sfree(a);
	764	for (i = 0; i < 2 * len; i++)
	765	b[i] = 0;
	766	sfree(b);
	767	for (i = 0; i < len; i++)
	768	mninv[i] = 0;
	769	sfree(mninv);
	770	for (i = 0; i < len; i++)
	771	n[i] = 0;
	772	sfree(n);
	773	for (i = 0; i < len; i++)
	774	x[i] = 0;
	775	sfree(x);
	776
	777	return result;
	778	}
	779
	780	/*
	781	* Compute (p * q) % mod.
	782	* The most significant word of mod MUST be non-zero.
	783	* We assume that the result array is the same size as the mod array.
	784	*/
	785	Bignum modmul(Bignum p, Bignum q, Bignum mod)
	786	{
	787	BignumInt a, n, m, o;
	788	int mshift;
	789	int pqlen, mlen, rlen, i, j;
	790	Bignum result;
	791
	792	/* Allocate m of size mlen, copy mod to m */
	793	/* We use big endian internally */
	794	mlen = mod[0];
	795	m = snewn(mlen, BignumInt);
	796	for (j = 0; j < mlen; j++)
	797	m[j] = mod[mod[0] - j];
	798
	799	/* Shift m left to make msb bit set */
	800	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	801	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
	802	break;
	803	if (mshift) {
	804	for (i = 0; i < mlen - 1; i++)
	805	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
	806	m[mlen - 1] = m[mlen - 1] << mshift;
	807	}
	808
	809	pqlen = (p[0] > q[0] ? p[0] : q[0]);
	810
	811	/* Allocate n of size pqlen, copy p to n */
	812	n = snewn(pqlen, BignumInt);
	813	i = pqlen - p[0];
	814	for (j = 0; j < i; j++)
	815	n[j] = 0;
	816	for (j = 0; j < (int)p[0]; j++)
	817	n[i + j] = p[p[0] - j];
	818
	819	/* Allocate o of size pqlen, copy q to o */
	820	o = snewn(pqlen, BignumInt);
	821	i = pqlen - q[0];
	822	for (j = 0; j < i; j++)
	823	o[j] = 0;
	824	for (j = 0; j < (int)q[0]; j++)
	825	o[i + j] = q[q[0] - j];
	826
	827	/* Allocate a of size 2pqlen for result /
	828	a = snewn(2 * pqlen, BignumInt);
	829
	830	/* Main computation */
	831	internal_mul(n, o, a, pqlen);
	832	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	833
	834	/* Fixup result in case the modulus was shifted */
	835	if (mshift) {
	836	for (i = 2 * pqlen - mlen - 1; i < 2 * pqlen - 1; i++)
	837	a[i] = (a[i] << mshift) \| (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
	838	a[2 * pqlen - 1] = a[2 * pqlen - 1] << mshift;
	839	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	840	for (i = 2 * pqlen - 1; i >= 2 * pqlen - mlen; i--)
	841	a[i] = (a[i] >> mshift) \| (a[i - 1] << (BIGNUM_INT_BITS - mshift));
	842	}
	843
	844	/* Copy result to buffer */
	845	rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
	846	result = newbn(rlen);
	847	for (i = 0; i < rlen; i++)
	848	result[result[0] - i] = a[i + 2 * pqlen - rlen];
	849	while (result[0] > 1 && result[result[0]] == 0)
	850	result[0]--;
	851
	852	/* Free temporary arrays */
	853	for (i = 0; i < 2 * pqlen; i++)
	854	a[i] = 0;
	855	sfree(a);
	856	for (i = 0; i < mlen; i++)
	857	m[i] = 0;
	858	sfree(m);
	859	for (i = 0; i < pqlen; i++)
	860	n[i] = 0;
	861	sfree(n);
	862	for (i = 0; i < pqlen; i++)
	863	o[i] = 0;
	864	sfree(o);
	865
	866	return result;
	867	}
	868
	869	/*
	870	* Compute p % mod.
	871	* The most significant word of mod MUST be non-zero.
	872	* We assume that the result array is the same size as the mod array.
	873	* We optionally write out a quotient if `quotient' is non-NULL.
	874	* We can avoid writing out the result if `result' is NULL.
	875	*/
	876	static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
	877	{
	878	BignumInt n, m;
	879	int mshift;
	880	int plen, mlen, i, j;
	881
	882	/* Allocate m of size mlen, copy mod to m */
	883	/* We use big endian internally */
	884	mlen = mod[0];
	885	m = snewn(mlen, BignumInt);
	886	for (j = 0; j < mlen; j++)
	887	m[j] = mod[mod[0] - j];
	888
	889	/* Shift m left to make msb bit set */
	890	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	891	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
	892	break;
	893	if (mshift) {
	894	for (i = 0; i < mlen - 1; i++)
	895	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
	896	m[mlen - 1] = m[mlen - 1] << mshift;
	897	}
	898
	899	plen = p[0];
	900	/* Ensure plen > mlen */
	901	if (plen <= mlen)
	902	plen = mlen + 1;
	903
	904	/* Allocate n of size plen, copy p to n */
	905	n = snewn(plen, BignumInt);
	906	for (j = 0; j < plen; j++)
	907	n[j] = 0;
	908	for (j = 1; j <= (int)p[0]; j++)
	909	n[plen - j] = p[j];
	910
	911	/* Main computation */
	912	internal_mod(n, plen, m, mlen, quotient, mshift);
	913
	914	/* Fixup result in case the modulus was shifted */
	915	if (mshift) {
	916	for (i = plen - mlen - 1; i < plen - 1; i++)
	917	n[i] = (n[i] << mshift) \| (n[i + 1] >> (BIGNUM_INT_BITS - mshift));
	918	n[plen - 1] = n[plen - 1] << mshift;
	919	internal_mod(n, plen, m, mlen, quotient, 0);
	920	for (i = plen - 1; i >= plen - mlen; i--)
	921	n[i] = (n[i] >> mshift) \| (n[i - 1] << (BIGNUM_INT_BITS - mshift));
	922	}
	923
	924	/* Copy result to buffer */
	925	if (result) {
	926	for (i = 1; i <= (int)result[0]; i++) {
	927	int j = plen - i;
	928	result[i] = j >= 0 ? n[j] : 0;
	929	}
	930	}
	931
	932	/* Free temporary arrays */
	933	for (i = 0; i < mlen; i++)
	934	m[i] = 0;
	935	sfree(m);
	936	for (i = 0; i < plen; i++)
	937	n[i] = 0;
	938	sfree(n);
	939	}
	940
	941	/*
	942	* Decrement a number.
	943	*/
	944	void decbn(Bignum bn)
	945	{
	946	int i = 1;
	947	while (i < (int)bn[0] && bn[i] == 0)
	948	bn[i++] = BIGNUM_INT_MASK;
	949	bn[i]--;
	950	}
	951
	952	Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
	953	{
	954	Bignum result;
	955	int w, i;
	956
	957	w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
	958
	959	result = newbn(w);
	960	for (i = 1; i <= w; i++)
	961	result[i] = 0;
	962	for (i = nbytes; i--;) {
	963	unsigned char byte = *data++;
	964	result[1 + i / BIGNUM_INT_BYTES] \|= byte << (8*i % BIGNUM_INT_BITS);
	965	}
	966
	967	while (result[0] > 1 && result[result[0]] == 0)
	968	result[0]--;
	969	return result;
	970	}
	971
	972	/*
	973	* Read an SSH-1-format bignum from a data buffer. Return the number
	974	* of bytes consumed, or -1 if there wasn't enough data.
	975	*/
	976	int ssh1_read_bignum(const unsigned char data, int len, Bignum result)
	977	{
	978	const unsigned char *p = data;
	979	int i;
	980	int w, b;
	981
	982	if (len < 2)
	983	return -1;
	984
	985	w = 0;
	986	for (i = 0; i < 2; i++)
	987	w = (w << 8) + *p++;
	988	b = (w + 7) / 8; /* bits -> bytes */
	989
	990	if (len < b+2)
	991	return -1;
	992
	993	if (!result) /* just return length */
	994	return b + 2;
	995
	996	*result = bignum_from_bytes(p, b);
	997
	998	return p + b - data;
	999	}
	1000
	1001	/*
	1002	* Return the bit count of a bignum, for SSH-1 encoding.
	1003	*/
	1004	int bignum_bitcount(Bignum bn)
	1005	{
	1006	int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
	1007	while (bitcount >= 0
	1008	&& (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
	1009	return bitcount + 1;
	1010	}
	1011
	1012	/*
	1013	* Return the byte length of a bignum when SSH-1 encoded.
	1014	*/
	1015	int ssh1_bignum_length(Bignum bn)
	1016	{
	1017	return 2 + (bignum_bitcount(bn) + 7) / 8;
	1018	}
	1019
	1020	/*
	1021	* Return the byte length of a bignum when SSH-2 encoded.
	1022	*/
	1023	int ssh2_bignum_length(Bignum bn)
	1024	{
	1025	return 4 + (bignum_bitcount(bn) + 8) / 8;
	1026	}
	1027
	1028	/*
	1029	* Return a byte from a bignum; 0 is least significant, etc.
	1030	*/
	1031	int bignum_byte(Bignum bn, int i)
	1032	{
	1033	if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
	1034	return 0; /* beyond the end */
	1035	else
	1036	return (bn[i / BIGNUM_INT_BYTES + 1] >>
	1037	((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
	1038	}
	1039
	1040	/*
	1041	* Return a bit from a bignum; 0 is least significant, etc.
	1042	*/
	1043	int bignum_bit(Bignum bn, int i)
	1044	{
	1045	if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
	1046	return 0; /* beyond the end */
	1047	else
	1048	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
	1049	}
	1050
	1051	/*
	1052	* Set a bit in a bignum; 0 is least significant, etc.
	1053	*/
	1054	void bignum_set_bit(Bignum bn, int bitnum, int value)
	1055	{
	1056	if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
	1057	abort(); /* beyond the end */
	1058	else {
	1059	int v = bitnum / BIGNUM_INT_BITS + 1;
	1060	int mask = 1 << (bitnum % BIGNUM_INT_BITS);
	1061	if (value)
	1062	bn[v] \|= mask;
	1063	else
	1064	bn[v] &= ~mask;
	1065	}
	1066	}
	1067
	1068	/*
	1069	* Write a SSH-1-format bignum into a buffer. It is assumed the
	1070	* buffer is big enough. Returns the number of bytes used.
	1071	*/
	1072	int ssh1_write_bignum(void *data, Bignum bn)
	1073	{
	1074	unsigned char *p = data;
	1075	int len = ssh1_bignum_length(bn);
	1076	int i;
	1077	int bitc = bignum_bitcount(bn);
	1078
	1079	*p++ = (bitc >> 8) & 0xFF;
	1080	*p++ = (bitc) & 0xFF;
	1081	for (i = len - 2; i--;)
	1082	*p++ = bignum_byte(bn, i);
	1083	return len;
	1084	}
	1085
	1086	/*
	1087	* Compare two bignums. Returns like strcmp.
	1088	*/
	1089	int bignum_cmp(Bignum a, Bignum b)
	1090	{
	1091	int amax = a[0], bmax = b[0];
	1092	int i = (amax > bmax ? amax : bmax);
	1093	while (i) {
	1094	BignumInt aval = (i > amax ? 0 : a[i]);
	1095	BignumInt bval = (i > bmax ? 0 : b[i]);
	1096	if (aval < bval)
	1097	return -1;
	1098	if (aval > bval)
	1099	return +1;
	1100	i--;
	1101	}
	1102	return 0;
	1103	}
	1104
	1105	/*
	1106	* Right-shift one bignum to form another.
	1107	*/
	1108	Bignum bignum_rshift(Bignum a, int shift)
	1109	{
	1110	Bignum ret;
	1111	int i, shiftw, shiftb, shiftbb, bits;
	1112	BignumInt ai, ai1;
	1113
	1114	bits = bignum_bitcount(a) - shift;
	1115	ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
	1116
	1117	if (ret) {
	1118	shiftw = shift / BIGNUM_INT_BITS;
	1119	shiftb = shift % BIGNUM_INT_BITS;
	1120	shiftbb = BIGNUM_INT_BITS - shiftb;
	1121
	1122	ai1 = a[shiftw + 1];
	1123	for (i = 1; i <= (int)ret[0]; i++) {
	1124	ai = ai1;
	1125	ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
	1126	ret[i] = ((ai >> shiftb) \| (ai1 << shiftbb)) & BIGNUM_INT_MASK;
	1127	}
	1128	}
	1129
	1130	return ret;
	1131	}
	1132
	1133	/*
	1134	* Non-modular multiplication and addition.
	1135	*/
	1136	Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
	1137	{
	1138	int alen = a[0], blen = b[0];
	1139	int mlen = (alen > blen ? alen : blen);
	1140	int rlen, i, maxspot;
	1141	BignumInt *workspace;
	1142	Bignum ret;
	1143
	1144	/* mlen space for a, mlen space for b, 2mlen for result /
	1145	workspace = snewn(mlen * 4, BignumInt);
	1146	for (i = 0; i < mlen; i++) {
	1147	workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);
	1148	workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);
	1149	}
	1150
	1151	internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
	1152	workspace + 2 * mlen, mlen);
	1153
	1154	/* now just copy the result back */
	1155	rlen = alen + blen + 1;
	1156	if (addend && rlen <= (int)addend[0])
	1157	rlen = addend[0] + 1;
	1158	ret = newbn(rlen);
	1159	maxspot = 0;
	1160	for (i = 1; i <= (int)ret[0]; i++) {
	1161	ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);
	1162	if (ret[i] != 0)
	1163	maxspot = i;
	1164	}
	1165	ret[0] = maxspot;
	1166
	1167	/* now add in the addend, if any */
	1168	if (addend) {
	1169	BignumDblInt carry = 0;
	1170	for (i = 1; i <= rlen; i++) {
	1171	carry += (i <= (int)ret[0] ? ret[i] : 0);
	1172	carry += (i <= (int)addend[0] ? addend[i] : 0);
	1173	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1174	carry >>= BIGNUM_INT_BITS;
	1175	if (ret[i] != 0 && i > maxspot)
	1176	maxspot = i;
	1177	}
	1178	}
	1179	ret[0] = maxspot;
	1180
	1181	sfree(workspace);
	1182	return ret;
	1183	}
	1184
	1185	/*
	1186	* Non-modular multiplication.
	1187	*/
	1188	Bignum bigmul(Bignum a, Bignum b)
	1189	{
	1190	return bigmuladd(a, b, NULL);
	1191	}
	1192
	1193	/*
	1194	* Create a bignum which is the bitmask covering another one. That
	1195	* is, the smallest integer which is >= N and is also one less than
	1196	* a power of two.
	1197	*/
	1198	Bignum bignum_bitmask(Bignum n)
	1199	{
	1200	Bignum ret = copybn(n);
	1201	int i;
	1202	BignumInt j;
	1203
	1204	i = ret[0];
	1205	while (n[i] == 0 && i > 0)
	1206	i--;
	1207	if (i <= 0)
	1208	return ret; /* input was zero */
	1209	j = 1;
	1210	while (j < n[i])
	1211	j = 2 * j + 1;
	1212	ret[i] = j;
	1213	while (--i > 0)
	1214	ret[i] = BIGNUM_INT_MASK;
	1215	return ret;
	1216	}
	1217
	1218	/*
	1219	* Convert a (max 32-bit) long into a bignum.
	1220	*/
	1221	Bignum bignum_from_long(unsigned long nn)
	1222	{
	1223	Bignum ret;
	1224	BignumDblInt n = nn;
	1225
	1226	ret = newbn(3);
	1227	ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
	1228	ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
	1229	ret[3] = 0;
	1230	ret[0] = (ret[2] ? 2 : 1);
	1231	return ret;
	1232	}
	1233
	1234	/*
	1235	* Add a long to a bignum.
	1236	*/
	1237	Bignum bignum_add_long(Bignum number, unsigned long addendx)
	1238	{
	1239	Bignum ret = newbn(number[0] + 1);
	1240	int i, maxspot = 0;
	1241	BignumDblInt carry = 0, addend = addendx;
	1242
	1243	for (i = 1; i <= (int)ret[0]; i++) {
	1244	carry += addend & BIGNUM_INT_MASK;
	1245	carry += (i <= (int)number[0] ? number[i] : 0);
	1246	addend >>= BIGNUM_INT_BITS;
	1247	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1248	carry >>= BIGNUM_INT_BITS;
	1249	if (ret[i] != 0)
	1250	maxspot = i;
	1251	}
	1252	ret[0] = maxspot;
	1253	return ret;
	1254	}
	1255
	1256	/*
	1257	* Compute the residue of a bignum, modulo a (max 16-bit) short.
	1258	*/
	1259	unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
	1260	{
	1261	BignumDblInt mod, r;
	1262	int i;
	1263
	1264	r = 0;
	1265	mod = modulus;
	1266	for (i = number[0]; i > 0; i--)
	1267	r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
	1268	return (unsigned short) r;
	1269	}
	1270
	1271	#ifdef DEBUG
	1272	void diagbn(char *prefix, Bignum md)
	1273	{
	1274	int i, nibbles, morenibbles;
	1275	static const char hex[] = "0123456789ABCDEF";
	1276
	1277	debug(("%s0x", prefix ? prefix : ""));
	1278
	1279	nibbles = (3 + bignum_bitcount(md)) / 4;
	1280	if (nibbles < 1)
	1281	nibbles = 1;
	1282	morenibbles = 4 * md[0] - nibbles;
	1283	for (i = 0; i < morenibbles; i++)
	1284	debug(("-"));
	1285	for (i = nibbles; i--;)
	1286	debug(("%c",
	1287	hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
	1288
	1289	if (prefix)
	1290	debug(("\n"));
	1291	}
	1292	#endif
	1293
	1294	/*
	1295	* Simple division.
	1296	*/
	1297	Bignum bigdiv(Bignum a, Bignum b)
	1298	{
	1299	Bignum q = newbn(a[0]);
	1300	bigdivmod(a, b, NULL, q);
	1301	return q;
	1302	}
	1303
	1304	/*
	1305	* Simple remainder.
	1306	*/
	1307	Bignum bigmod(Bignum a, Bignum b)
	1308	{
	1309	Bignum r = newbn(b[0]);
	1310	bigdivmod(a, b, r, NULL);
	1311	return r;
	1312	}
	1313
	1314	/*
	1315	* Greatest common divisor.
	1316	*/
	1317	Bignum biggcd(Bignum av, Bignum bv)
	1318	{
	1319	Bignum a = copybn(av);
	1320	Bignum b = copybn(bv);
	1321
	1322	while (bignum_cmp(b, Zero) != 0) {
	1323	Bignum t = newbn(b[0]);
	1324	bigdivmod(a, b, t, NULL);
	1325	while (t[0] > 1 && t[t[0]] == 0)
	1326	t[0]--;
	1327	freebn(a);
	1328	a = b;
	1329	b = t;
	1330	}
	1331
	1332	freebn(b);
	1333	return a;
	1334	}
	1335
	1336	/*
	1337	* Modular inverse, using Euclid's extended algorithm.
	1338	*/
	1339	Bignum modinv(Bignum number, Bignum modulus)
	1340	{
	1341	Bignum a = copybn(modulus);
	1342	Bignum b = copybn(number);
	1343	Bignum xp = copybn(Zero);
	1344	Bignum x = copybn(One);
	1345	int sign = +1;
	1346
	1347	while (bignum_cmp(b, One) != 0) {
	1348	Bignum t = newbn(b[0]);
	1349	Bignum q = newbn(a[0]);
	1350	bigdivmod(a, b, t, q);
	1351	while (t[0] > 1 && t[t[0]] == 0)
	1352	t[0]--;
	1353	freebn(a);
	1354	a = b;
	1355	b = t;
	1356	t = xp;
	1357	xp = x;
	1358	x = bigmuladd(q, xp, t);
	1359	sign = -sign;
	1360	freebn(t);
	1361	freebn(q);
	1362	}
	1363
	1364	freebn(b);
	1365	freebn(a);
	1366	freebn(xp);
	1367
	1368	/* now we know that sign * x == 1, and that x < modulus */
	1369	if (sign < 0) {
	1370	/* set a new x to be modulus - x */
	1371	Bignum newx = newbn(modulus[0]);
	1372	BignumInt carry = 0;
	1373	int maxspot = 1;
	1374	int i;
	1375
	1376	for (i = 1; i <= (int)newx[0]; i++) {
	1377	BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
	1378	BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
	1379	newx[i] = aword - bword - carry;
	1380	bword = ~bword;
	1381	carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
	1382	if (newx[i] != 0)
	1383	maxspot = i;
	1384	}
	1385	newx[0] = maxspot;
	1386	freebn(x);
	1387	x = newx;
	1388	}
	1389
	1390	/* and return. */
	1391	return x;
	1392	}
	1393
	1394	/*
	1395	* Render a bignum into decimal. Return a malloced string holding
	1396	* the decimal representation.
	1397	*/
	1398	char *bignum_decimal(Bignum x)
	1399	{
	1400	int ndigits, ndigit;
	1401	int i, iszero;
	1402	BignumDblInt carry;
	1403	char *ret;
	1404	BignumInt *workspace;
	1405
	1406	/*
	1407	* First, estimate the number of digits. Since log(10)/log(2)
	1408	* is just greater than 93/28 (the joys of continued fraction
	1409	* approximations...) we know that for every 93 bits, we need
	1410	* at most 28 digits. This will tell us how much to malloc.
	1411	*
	1412	* Formally: if x has i bits, that means x is strictly less
	1413	* than 2^i. Since 2 is less than 10^(28/93), this is less than
	1414	* 10^(28i/93). We need an integer power of ten, so we must
	1415	* round up (rounding down might make it less than x again).
	1416	* Therefore if we multiply the bit count by 28/93, rounding
	1417	* up, we will have enough digits.
	1418	*
	1419	* i=0 (i.e., x=0) is an irritating special case.
	1420	*/
	1421	i = bignum_bitcount(x);
	1422	if (!i)
	1423	ndigits = 1; /* x = 0 */
	1424	else
	1425	ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */
	1426	ndigits++; /* allow for trailing \0 */
	1427	ret = snewn(ndigits, char);
	1428
	1429	/*
	1430	* Now allocate some workspace to hold the binary form as we
	1431	* repeatedly divide it by ten. Initialise this to the
	1432	* big-endian form of the number.
	1433	*/
	1434	workspace = snewn(x[0], BignumInt);
	1435	for (i = 0; i < (int)x[0]; i++)
	1436	workspace[i] = x[x[0] - i];
	1437
	1438	/*
	1439	* Next, write the decimal number starting with the last digit.
	1440	* We use ordinary short division, dividing 10 into the
	1441	* workspace.
	1442	*/
	1443	ndigit = ndigits - 1;
	1444	ret[ndigit] = '\0';
	1445	do {
	1446	iszero = 1;
	1447	carry = 0;
	1448	for (i = 0; i < (int)x[0]; i++) {
	1449	carry = (carry << BIGNUM_INT_BITS) + workspace[i];
	1450	workspace[i] = (BignumInt) (carry / 10);
	1451	if (workspace[i])
	1452	iszero = 0;
	1453	carry %= 10;
	1454	}
	1455	ret[--ndigit] = (char) (carry + '0');
	1456	} while (!iszero);
	1457
	1458	/*
	1459	* There's a chance we've fallen short of the start of the
	1460	* string. Correct if so.
	1461	*/
	1462	if (ndigit > 0)
	1463	memmove(ret, ret + ndigit, ndigits - ndigit);
	1464
	1465	/*
	1466	* Done.
	1467	*/
	1468	sfree(workspace);
	1469	return ret;
	1470	}