git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Bignum routines for RSA and DH and stuff.
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include "misc.h"
	11
	12	/*
	13	* Usage notes:
	14	* * Do not call the DIVMOD_WORD macro with expressions such as array
	15	* subscripts, as some implementations object to this (see below).
	16	* * Note that none of the division methods below will cope if the
	17	* quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
	18	* to avoid this case.
	19	* If this condition occurs, in the case of the x86 DIV instruction,
	20	* an overflow exception will occur, which (according to a correspondent)
	21	* will manifest on Windows as something like
	22	* 0xC0000095: Integer overflow
	23	* The C variant won't give the right answer, either.
	24	*/
	25
	26	#if defined __GNUC__ && defined __i386__
	27	typedef unsigned long BignumInt;
	28	typedef unsigned long long BignumDblInt;
	29	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	30	#define BIGNUM_TOP_BIT 0x80000000UL
	31	#define BIGNUM_INT_BITS 32
	32	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	33	#define DIVMOD_WORD(q, r, hi, lo, w) \
	34	__asm__("div %2" : \
	35	"=d" (r), "=a" (q) : \
	36	"r" (w), "d" (hi), "a" (lo))
	37	#elif defined _MSC_VER && defined _M_IX86
	38	typedef unsigned __int32 BignumInt;
	39	typedef unsigned __int64 BignumDblInt;
	40	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	41	#define BIGNUM_TOP_BIT 0x80000000UL
	42	#define BIGNUM_INT_BITS 32
	43	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	44	/* Note: MASM interprets array subscripts in the macro arguments as
	45	* assembler syntax, which gives the wrong answer. Don't supply them.
	46	* <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
	47	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	48	__asm mov edx, hi \
	49	__asm mov eax, lo \
	50	__asm div w \
	51	__asm mov r, edx \
	52	__asm mov q, eax \
	53	} while(0)
	54	#elif defined _LP64
	55	/* 64-bit architectures can do 32x32->64 chunks at a time */
	56	typedef unsigned int BignumInt;
	57	typedef unsigned long BignumDblInt;
	58	#define BIGNUM_INT_MASK 0xFFFFFFFFU
	59	#define BIGNUM_TOP_BIT 0x80000000U
	60	#define BIGNUM_INT_BITS 32
	61	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	62	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	63	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	64	q = n / w; \
	65	r = n % w; \
	66	} while (0)
	67	#elif defined _LLP64
	68	/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
	69	typedef unsigned long BignumInt;
	70	typedef unsigned long long BignumDblInt;
	71	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	72	#define BIGNUM_TOP_BIT 0x80000000UL
	73	#define BIGNUM_INT_BITS 32
	74	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	75	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	76	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	77	q = n / w; \
	78	r = n % w; \
	79	} while (0)
	80	#else
	81	/* Fallback for all other cases */
	82	typedef unsigned short BignumInt;
	83	typedef unsigned long BignumDblInt;
	84	#define BIGNUM_INT_MASK 0xFFFFU
	85	#define BIGNUM_TOP_BIT 0x8000U
	86	#define BIGNUM_INT_BITS 16
	87	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	88	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	89	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	90	q = n / w; \
	91	r = n % w; \
	92	} while (0)
	93	#endif
	94
	95	#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
	96
	97	#define BIGNUM_INTERNAL
	98	typedef BignumInt *Bignum;
	99
	100	#include "ssh.h"
	101
	102	BignumInt bnZero[1] = { 0 };
	103	BignumInt bnOne[2] = { 1, 1 };
	104
	105	/*
	106	* The Bignum format is an array of `BignumInt'. The first
	107	* element of the array counts the remaining elements. The
	108	* remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
	109	* significant digit first. (So it's trivial to extract the bit
	110	* with value 2^n for any n.)
	111	*
	112	* All Bignums in this module are positive. Negative numbers must
	113	* be dealt with outside it.
	114	*
	115	* INVARIANT: the most significant word of any Bignum must be
	116	* nonzero.
	117	*/
	118
	119	Bignum Zero = bnZero, One = bnOne;
	120
	121	static Bignum newbn(int length)
	122	{
	123	Bignum b = snewn(length + 1, BignumInt);
	124	if (!b)
	125	abort(); /* FIXME */
	126	memset(b, 0, (length + 1) * sizeof(*b));
	127	b[0] = length;
	128	return b;
	129	}
	130
	131	void bn_restore_invariant(Bignum b)
	132	{
	133	while (b[0] > 1 && b[b[0]] == 0)
	134	b[0]--;
	135	}
	136
	137	Bignum copybn(Bignum orig)
	138	{
	139	Bignum b = snewn(orig[0] + 1, BignumInt);
	140	if (!b)
	141	abort(); /* FIXME */
	142	memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
	143	return b;
	144	}
	145
	146	void freebn(Bignum b)
	147	{
	148	/*
	149	* Burn the evidence, just in case.
	150	*/
	151	smemclr(b, sizeof(b[0]) * (b[0] + 1));
	152	sfree(b);
	153	}
	154
	155	Bignum bn_power_2(int n)
	156	{
	157	Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
	158	bignum_set_bit(ret, n, 1);
	159	return ret;
	160	}
	161
	162	/*
	163	* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
	164	* little-endian arrays of 'len' BignumInts. Returns a BignumInt carried
	165	* off the top.
	166	*/
	167	static BignumInt internal_add(const BignumInt a, const BignumInt b,
	168	BignumInt *c, int len)
	169	{
	170	int i;
	171	BignumDblInt carry = 0;
	172
	173	for (i = 0; i < len; i++) {
	174	carry += (BignumDblInt)a[i] + b[i];
	175	c[i] = (BignumInt)carry;
	176	carry >>= BIGNUM_INT_BITS;
	177	}
	178
	179	return (BignumInt)carry;
	180	}
	181
	182	/*
	183	* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
	184	* all little-endian arrays of 'len' BignumInts. Any borrow from the top
	185	* is ignored.
	186	*/
	187	static void internal_sub(const BignumInt a, const BignumInt b,
	188	BignumInt *c, int len)
	189	{
	190	int i;
	191	BignumDblInt carry = 1;
	192
	193	for (i = 0; i < len; i++) {
	194	carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
	195	c[i] = (BignumInt)carry;
	196	carry >>= BIGNUM_INT_BITS;
	197	}
	198	}
	199
	200	/*
	201	* Compute c = a * b.
	202	* Input is in the first len words of a and b.
	203	* Result is returned in the first 2*len words of c.
	204	*
	205	* 'scratch' must point to an array of BignumInt of size at least
	206	* mul_compute_scratch(len). (This covers the needs of internal_mul
	207	* and all its recursive calls to itself.)
	208	*/
	209	#define KARATSUBA_THRESHOLD 50
	210	static int mul_compute_scratch(int len)
	211	{
	212	int ret = 0;
	213	while (len > KARATSUBA_THRESHOLD) {
	214	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	215	int midlen = botlen + 1;
	216	ret += 4*midlen;
	217	len = midlen;
	218	}
	219	return ret;
	220	}
	221	static void internal_mul(const BignumInt a, const BignumInt b,
	222	BignumInt c, int len, BignumInt scratch)
	223	{
	224	if (len > KARATSUBA_THRESHOLD) {
	225	int i;
	226
	227	/*
	228	* Karatsuba divide-and-conquer algorithm. Cut each input in
	229	* half, so that it's expressed as two big 'digits' in a giant
	230	* base D:
	231	*
	232	* a = a_1 D + a_0
	233	* b = b_1 D + b_0
	234	*
	235	* Then the product is of course
	236	*
	237	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	238	*
	239	* and we compute the three coefficients by recursively
	240	* calling ourself to do half-length multiplications.
	241	*
	242	* The clever bit that makes this worth doing is that we only
	243	* need _one_ half-length multiplication for the central
	244	* coefficient rather than the two that it obviouly looks
	245	* like, because we can use a single multiplication to compute
	246	*
	247	* (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
	248	*
	249	* and then we subtract the other two coefficients (a_1 b_1
	250	* and a_0 b_0) which we were computing anyway.
	251	*
	252	* Hence we get to multiply two numbers of length N in about
	253	* three times as much work as it takes to multiply numbers of
	254	* length N/2, which is obviously better than the four times
	255	* as much work it would take if we just did a long
	256	* conventional multiply.
	257	*/
	258
	259	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	260	int midlen = botlen + 1;
	261	BignumDblInt carry;
	262
	263	/*
	264	* The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
	265	* in the output array, so we can compute them immediately in
	266	* place.
	267	*/
	268
	269	#ifdef KARA_DEBUG
	270	printf("a1,a0 = 0x");
	271	for (i = 0; i < len; i++) {
	272	if (i == toplen) printf(", 0x");
	273	printf("%0*x", BIGNUM_INT_BITS/4, a[len - 1 - i]);
	274	}
	275	printf("\n");
	276	printf("b1,b0 = 0x");
	277	for (i = 0; i < len; i++) {
	278	if (i == toplen) printf(", 0x");
	279	printf("%0*x", BIGNUM_INT_BITS/4, b[len - 1 - i]);
	280	}
	281	printf("\n");
	282	#endif
	283
	284	/* a_1 b_1 */
	285	internal_mul(a + botlen, b + botlen, c + 2*botlen, toplen, scratch);
	286	#ifdef KARA_DEBUG
	287	printf("a1b1 = 0x");
	288	for (i = 0; i < 2*toplen; i++) {
	289	printf("%0x", BIGNUM_INT_BITS/4, c[2len - 1 - i]);
	290	}
	291	printf("\n");
	292	#endif
	293
	294	/* a_0 b_0 */
	295	internal_mul(a, b, c, botlen, scratch);
	296	#ifdef KARA_DEBUG
	297	printf("a0b0 = 0x");
	298	for (i = 0; i < 2*botlen; i++) {
	299	printf("%0x", BIGNUM_INT_BITS/4, c[2botlen - 1 - i]);
	300	}
	301	printf("\n");
	302	#endif
	303
	304	/* Zero padding. botlen exceeds toplen by at most 1, and we'll set
	305	* the extra carry explicitly below, so we only need to zero at most
	306	* one of the top words here.
	307	*/
	308	scratch[midlen - 2] = scratch[2*midlen - 2] = 0;
	309
	310	for (i = 0; i < toplen; i++) {
	311	scratch[i] = a[i + botlen]; /* a_1 */
	312	scratch[midlen + i] = b[i + botlen]; /* b_1 */
	313	}
	314
	315	/* compute a_1 + a_0 */
	316	scratch[midlen - 1] = internal_add(scratch, a, scratch, botlen);
	317	#ifdef KARA_DEBUG
	318	printf("a1plusa0 = 0x");
	319	for (i = 0; i < midlen; i++) {
	320	printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen - 1 - i]);
	321	}
	322	printf("\n");
	323	#endif
	324	/* compute b_1 + b_0 */
	325	scratch[2*midlen - 1] = internal_add(scratch+midlen, b,
	326	scratch+midlen, botlen);
	327	#ifdef KARA_DEBUG
	328	printf("b1plusb0 = 0x");
	329	for (i = 0; i < midlen; i++) {
	330	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen - 1 - i]);
	331	}
	332	printf("\n");
	333	#endif
	334
	335	/*
	336	* Now we can do the third multiplication.
	337	*/
	338	internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
	339	scratch + 4*midlen);
	340	#ifdef KARA_DEBUG
	341	printf("a1plusa0timesb1plusb0 = 0x");
	342	for (i = 0; i < 2*midlen; i++) {
	343	printf("%0x", BIGNUM_INT_BITS/4, scratch[4midlen - 1 - i]);
	344	}
	345	printf("\n");
	346	#endif
	347
	348	/*
	349	* Now we can reuse the first half of 'scratch' to compute the
	350	* sum of the outer two coefficients, to subtract from that
	351	* product to obtain the middle one.
	352	*/
	353	scratch[2botlen - 2] = scratch[2botlen - 1] = 0;
	354	for (i = 0; i < 2*toplen; i++)
	355	scratch[i] = c[2*botlen + i];
	356	scratch[2botlen] = internal_add(scratch, c, scratch, 2botlen);
	357	scratch[2*botlen + 1] = 0;
	358	#ifdef KARA_DEBUG
	359	printf("a1b1plusa0b0 = 0x");
	360	for (i = 0; i < 2*midlen; i++) {
	361	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen - 1 - i]);
	362	}
	363	printf("\n");
	364	#endif
	365
	366	internal_sub(scratch + 2midlen, scratch, scratch, 2midlen);
	367	#ifdef KARA_DEBUG
	368	printf("a1b0plusa0b1 = 0x");
	369	for (i = 0; i < 2*midlen; i++) {
	370	printf("%0x", BIGNUM_INT_BITS/4, scratch[4midlen - 1 - i]);
	371	}
	372	printf("\n");
	373	#endif
	374
	375	/*
	376	* And now all we need to do is to add that middle coefficient
	377	* back into the output. We may have to propagate a carry
	378	* further up the output, but we can be sure it won't
	379	* propagate right the way off the top.
	380	*/
	381	carry = internal_add(c + botlen, scratch, c + botlen, 2*midlen);
	382	i = botlen + 2*midlen;
	383	while (carry) {
	384	assert(i <= 2*len);
	385	carry += c[i];
	386	c[i] = (BignumInt)carry;
	387	carry >>= BIGNUM_INT_BITS;
	388	i++;
	389	}
	390	#ifdef KARA_DEBUG
	391	printf("ab = 0x");
	392	for (i = 0; i < 2*len; i++) {
	393	printf("%0x", BIGNUM_INT_BITS/4, c[2len - i]);
	394	}
	395	printf("\n");
	396	#endif
	397
	398	} else {
	399	int i;
	400	BignumInt carry;
	401	BignumDblInt t;
	402	const BignumInt ap, alim = a + len, bp, blim = b + len;
	403	BignumInt cp, cps;
	404
	405	/*
	406	* Multiply in the ordinary O(N^2) way.
	407	*/
	408
	409	for (i = 0; i < 2 * len; i++)
	410	c[i] = 0;
	411
	412	for (cps = c, ap = a; ap < alim; ap++, cps++) {
	413	carry = 0;
	414	for (cp = cps, bp = b, i = blim - bp; i--; bp++, cp++) {
	415	t = (MUL_WORD(ap, bp) + carry) + *cp;
	416	*cp = (BignumInt) t;
	417	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
	418	}
	419	*cp = carry;
	420	}
	421	}
	422	}
	423
	424	/*
	425	* Variant form of internal_mul used for the initial step of
	426	* Montgomery reduction. Only bothers outputting 'len' words
	427	* (everything above that is thrown away).
	428	*/
	429	static void internal_mul_low(const BignumInt a, const BignumInt b,
	430	BignumInt c, int len, BignumInt scratch)
	431	{
	432	if (len > KARATSUBA_THRESHOLD) {
	433	int i;
	434
	435	/*
	436	* Karatsuba-aware version of internal_mul_low. As before, we
	437	* express each input value as a shifted combination of two
	438	* halves:
	439	*
	440	* a = a_1 D + a_0
	441	* b = b_1 D + b_0
	442	*
	443	* Then the full product is, as before,
	444	*
	445	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	446	*
	447	* Provided we choose D on the large side (so that a_0 and b_0
	448	* are _at least_ as long as a_1 and b_1), we don't need the
	449	* topmost term at all, and we only need half of the middle
	450	* term. So there's no point in doing the proper Karatsuba
	451	* optimisation which computes the middle term using the top
	452	* one, because we'd take as long computing the top one as
	453	* just computing the middle one directly.
	454	*
	455	* So instead, we do a much more obvious thing: we call the
	456	* fully optimised internal_mul to compute a_0 b_0, and we
	457	* recursively call ourself to compute the _bottom halves_ of
	458	* a_1 b_0 and a_0 b_1, each of which we add into the result
	459	* in the obvious way.
	460	*
	461	* In other words, there's no actual Karatsuba _optimisation_
	462	* in this function; the only benefit in doing it this way is
	463	* that we call internal_mul proper for a large part of the
	464	* work, and _that_ can optimise its operation.
	465	*/
	466
	467	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	468
	469	/*
	470	* Scratch space for the various bits and pieces we're going
	471	* to be adding together: we need botlen*2 words for a_0 b_0
	472	* (though we may end up throwing away its topmost word), and
	473	* toplen words for each of a_1 b_0 and a_0 b_1. That adds up
	474	* to exactly 2*len.
	475	*/
	476
	477	/* a_0 b_0 */
	478	internal_mul(a, b, scratch + 2toplen, botlen, scratch + 2len);
	479
	480	/* a_1 b_0 */
	481	internal_mul_low(a + botlen, b, scratch + toplen, toplen,
	482	scratch + 2*len);
	483
	484	/* a_0 b_1 */
	485	internal_mul_low(a, b + botlen, scratch, toplen, scratch + 2*len);
	486
	487	/* Copy the bottom half of the big coefficient into place */
	488	for (i = 0; i < botlen; i++)
	489	c[i] = scratch[2*toplen + i];
	490
	491	/* Add the two small coefficients, throwing away the returned carry */
	492	internal_add(scratch, scratch + toplen, scratch, toplen);
	493
	494	/* And add that to the large coefficient, leaving the result in c. */
	495	internal_add(scratch, scratch + 2*toplen + botlen,
	496	c + botlen, toplen);
	497
	498	} else {
	499	int i;
	500	BignumInt carry;
	501	BignumDblInt t;
	502	const BignumInt ap, alim = a + len, *bp;
	503	BignumInt cp, cps, *clim = c + len;
	504
	505	/*
	506	* Multiply in the ordinary O(N^2) way.
	507	*/
	508
	509	for (i = 0; i < len; i++)
	510	c[i] = 0;
	511
	512	for (cps = c, ap = a; ap < alim; ap++, cps++) {
	513	carry = 0;
	514	for (cp = cps, bp = b, i = clim - cp; i--; bp++, cp++) {
	515	t = (MUL_WORD(ap, bp) + carry) + *cp;
	516	*cp = (BignumInt) t;
	517	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
	518	}
	519	}
	520	}
	521	}
	522
	523	/*
	524	* Montgomery reduction. Expects x to be a little-endian array of 2*len
	525	* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
	526	* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
	527	* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
	528	* x' < n.
	529	*
	530	* 'n' and 'mninv' should be little-endian arrays of 'len' BignumInts
	531	* each, containing respectively n and the multiplicative inverse of
	532	* -n mod r.
	533	*
	534	* 'tmp' is an array of BignumInt used as scratch space, of length at
	535	* least 3*len + mul_compute_scratch(len).
	536	*/
	537	static void monty_reduce(BignumInt x, const BignumInt n,
	538	const BignumInt mninv, BignumInt tmp, int len)
	539	{
	540	int i;
	541	BignumInt carry;
	542
	543	/*
	544	* Multiply x by (-n)^{-1} mod r. This gives us a value m such
	545	* that mn is congruent to -x mod r. Hence, mn+x is an exact
	546	* multiple of r, and is also (obviously) congruent to x mod n.
	547	*/
	548	internal_mul_low(x, mninv, tmp, len, tmp + 3*len);
	549
	550	/*
	551	* Compute t = (mn+x)/r in ordinary, non-modular, integer
	552	* arithmetic. By construction this is exact, and is congruent mod
	553	* n to x * r^{-1}, i.e. the answer we want.
	554	*
	555	* The following multiply leaves that answer in the _most_
	556	* significant half of the 'x' array, so then we must shift it
	557	* down.
	558	*/
	559	internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
	560	carry = internal_add(x, tmp+len, x, 2*len);
	561	for (i = 0; i < len; i++)
	562	x[i] = x[len + i], x[len + i] = 0;
	563
	564	/*
	565	* Reduce t mod n. This doesn't require a full-on division by n,
	566	* but merely a test and single optional subtraction, since we can
	567	* show that 0 <= t < 2n.
	568	*
	569	* Proof:
	570	* + we computed m mod r, so 0 <= m < r.
	571	* + so 0 <= mn < rn, obviously
	572	* + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
	573	* + yielding 0 <= (mn+x)/r < 2n as required.
	574	*/
	575	if (!carry) {
	576	for (i = len; i-- > 0; )
	577	if (x[i] != n[i])
	578	break;
	579	}
	580	if (carry \|\| i < 0 \|\| x[i] > n[i])
	581	internal_sub(x, n, x, len);
	582	}
	583
	584	static void internal_add_shifted(BignumInt *number,
	585	unsigned n, int shift)
	586	{
	587	int word = 1 + (shift / BIGNUM_INT_BITS);
	588	int bshift = shift % BIGNUM_INT_BITS;
	589	BignumDblInt addend;
	590
	591	addend = (BignumDblInt)n << bshift;
	592
	593	while (addend) {
	594	addend += number[word];
	595	number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
	596	addend >>= BIGNUM_INT_BITS;
	597	word++;
	598	}
	599	}
	600
	601	/*
	602	* Compute a = a % m.
	603	* Input in first alen words of a and first mlen words of m.
	604	* Output in first alen words of a
	605	* (of which last alen-mlen words will be zero).
	606	* The MSW of m MUST have its high bit set.
	607	* Quotient is accumulated in the `quotient' array. Quotient parts
	608	* are shifted left by `qshift' before adding into quot.
	609	*/
	610	static void internal_mod(BignumInt *a, int alen,
	611	BignumInt *m, int mlen,
	612	BignumInt *quot, int qshift)
	613	{
	614	BignumInt m0, m1;
	615	unsigned int h;
	616	int i, j, k;
	617
	618	m0 = m[mlen - 1];
	619	if (mlen > 1)
	620	m1 = m[mlen - 2];
	621	else
	622	m1 = 0;
	623
	624	for (i = alen, h = 0; i-- >= mlen; ) {
	625	BignumDblInt t;
	626	unsigned int q, r, c, ai1;
	627
	628	if (i)
	629	ai1 = a[i - 1];
	630	else
	631	ai1 = 0;
	632
	633	/* Find q = h:a[i] / m0 */
	634	if (h >= m0) {
	635	/*
	636	* Special case.
	637	*
	638	* To illustrate it, suppose a BignumInt is 8 bits, and
	639	* we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
	640	* our initial division will be 0xA123 / 0xA1, which
	641	* will give a quotient of 0x100 and a divide overflow.
	642	* However, the invariants in this division algorithm
	643	* are not violated, since the full number A1:23:... is
	644	* _less_ than the quotient prefix A1:B2:... and so the
	645	* following correction loop would have sorted it out.
	646	*
	647	* In this situation we set q to be the largest
	648	* quotient we _can_ stomach (0xFF, of course).
	649	*/
	650	q = BIGNUM_INT_MASK;
	651	} else {
	652	/* Macro doesn't want an array subscript expression passed
	653	* into it (see definition), so use a temporary. */
	654	BignumInt tmplo = a[i];
	655	DIVMOD_WORD(q, r, h, tmplo, m0);
	656
	657	/* Refine our estimate of q by looking at
	658	h:a[i]:a[i-1] / m0:m1 */
	659	t = MUL_WORD(m1, q);
	660	if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
	661	q--;
	662	t -= m1;
	663	r = (r + m0) & BIGNUM_INT_MASK; /* overflow? */
	664	if (r >= (BignumDblInt) m0 &&
	665	t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
	666	}
	667	}
	668
	669	j = i + 1 - mlen;
	670
	671	/* Subtract q * m from a[i...] */
	672	c = 0;
	673	for (k = 0; k < mlen; k++) {
	674	t = MUL_WORD(q, m[k]);
	675	t += c;
	676	c = (unsigned)(t >> BIGNUM_INT_BITS);
	677	if ((BignumInt) t > a[j + k])
	678	c++;
	679	a[j + k] -= (BignumInt) t;
	680	}
	681
	682	/* Add back m in case of borrow */
	683	if (c != h) {
	684	t = 0;
	685	for (k = 0; k < mlen; k++) {
	686	t += m[k];
	687	t += a[j + k];
	688	a[j + k] = (BignumInt) t;
	689	t = t >> BIGNUM_INT_BITS;
	690	}
	691	q--;
	692	}
	693
	694	if (quot)
	695	internal_add_shifted(quot, q,
	696	qshift + BIGNUM_INT_BITS * (i + 1 - mlen));
	697
	698	if (i >= mlen) {
	699	h = a[i];
	700	a[i] = 0;
	701	}
	702	}
	703	}
	704
	705	static void shift_left(BignumInt *x, int xlen, int shift)
	706	{
	707	int i;
	708
	709	if (!shift)
	710	return;
	711	for (i = xlen; --i > 0; )
	712	x[i] = (x[i] << shift) \| (x[i - 1] >> (BIGNUM_INT_BITS - shift));
	713	x[0] = x[0] << shift;
	714	}
	715
	716	static void shift_right(BignumInt *x, int xlen, int shift)
	717	{
	718	int i;
	719
	720	if (!shift \|\| !xlen)
	721	return;
	722	xlen--;
	723	for (i = 0; i < xlen; i++)
	724	x[i] = (x[i] >> shift) \| (x[i + 1] << (BIGNUM_INT_BITS - shift));
	725	x[i] = x[i] >> shift;
	726	}
	727
	728	/*
	729	* Compute (base ^ exp) % mod, the pedestrian way.
	730	*/
	731	Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
	732	{
	733	BignumInt a, b, n, m, *scratch;
	734	int mshift;
	735	int mlen, scratchlen, i, j;
	736	Bignum base, result;
	737
	738	/*
	739	* The most significant word of mod needs to be non-zero. It
	740	* should already be, but let's make sure.
	741	*/
	742	assert(mod[mod[0]] != 0);
	743
	744	/*
	745	* Make sure the base is smaller than the modulus, by reducing
	746	* it modulo the modulus if not.
	747	*/
	748	base = bigmod(base_in, mod);
	749
	750	/* Allocate m of size mlen, copy mod to m */
	751	mlen = mod[0];
	752	m = snewn(mlen, BignumInt);
	753	for (j = 0; j < mlen; j++)
	754	m[j] = mod[j + 1];
	755
	756	/* Shift m left to make msb bit set */
	757	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	758	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	759	break;
	760	if (mshift)
	761	shift_left(m, mlen, mshift);
	762
	763	/* Allocate n of size mlen, copy base to n */
	764	n = snewn(mlen, BignumInt);
	765	for (i = 0; i < (int)base[0]; i++)
	766	n[i] = base[i + 1];
	767	for (; i < mlen; i++)
	768	n[i] = 0;
	769
	770	/* Allocate a and b of size 2mlen. Set a = 1 /
	771	a = snewn(2 * mlen, BignumInt);
	772	b = snewn(2 * mlen, BignumInt);
	773	a[0] = 1;
	774	for (i = 1; i < 2 * mlen; i++)
	775	a[i] = 0;
	776
	777	/* Scratch space for multiplies */
	778	scratchlen = mul_compute_scratch(mlen);
	779	scratch = snewn(scratchlen, BignumInt);
	780
	781	/* Skip leading zero bits of exp. */
	782	i = 0;
	783	j = BIGNUM_INT_BITS-1;
	784	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	785	j--;
	786	if (j < 0) {
	787	i++;
	788	j = BIGNUM_INT_BITS-1;
	789	}
	790	}
	791
	792	/* Main computation */
	793	while (i < (int)exp[0]) {
	794	while (j >= 0) {
	795	internal_mul(a, a, b, mlen, scratch);
	796	internal_mod(b, mlen * 2, m, mlen, NULL, 0);
	797	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	798	internal_mul(b, n, a, mlen, scratch);
	799	internal_mod(a, mlen * 2, m, mlen, NULL, 0);
	800	} else {
	801	BignumInt *t;
	802	t = a;
	803	a = b;
	804	b = t;
	805	}
	806	j--;
	807	}
	808	i++;
	809	j = BIGNUM_INT_BITS-1;
	810	}
	811
	812	/* Fixup result in case the modulus was shifted */
	813	if (mshift) {
	814	shift_left(a, mlen + 1, mshift);
	815	internal_mod(a, mlen + 1, m, mlen, NULL, 0);
	816	shift_right(a, mlen, mshift);
	817	}
	818
	819	/* Copy result to buffer */
	820	result = newbn(mod[0]);
	821	for (i = 0; i < mlen; i++)
	822	result[i + 1] = a[i];
	823	while (result[0] > 1 && result[result[0]] == 0)
	824	result[0]--;
	825
	826	/* Free temporary arrays */
	827	for (i = 0; i < 2 * mlen; i++)
	828	a[i] = 0;
	829	sfree(a);
	830	for (i = 0; i < scratchlen; i++)
	831	scratch[i] = 0;
	832	sfree(scratch);
	833	for (i = 0; i < 2 * mlen; i++)
	834	b[i] = 0;
	835	sfree(b);
	836	for (i = 0; i < mlen; i++)
	837	m[i] = 0;
	838	sfree(m);
	839	for (i = 0; i < mlen; i++)
	840	n[i] = 0;
	841	sfree(n);
	842
	843	freebn(base);
	844
	845	return result;
	846	}
	847
	848	/*
	849	* Compute (base ^ exp) % mod. Uses the Montgomery multiplication
	850	* technique where possible, falling back to modpow_simple otherwise.
	851	*/
	852	Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
	853	{
	854	BignumInt a, b, x, n, mninv, scratch;
	855	int len, scratchlen, i, j;
	856	Bignum base, base2, r, rn, inv, result;
	857
	858	/*
	859	* The most significant word of mod needs to be non-zero. It
	860	* should already be, but let's make sure.
	861	*/
	862	assert(mod[mod[0]] != 0);
	863
	864	/*
	865	* mod had better be odd, or we can't do Montgomery multiplication
	866	* using a power of two at all.
	867	*/
	868	if (!(mod[1] & 1))
	869	return modpow_simple(base_in, exp, mod);
	870
	871	/*
	872	* Make sure the base is smaller than the modulus, by reducing
	873	* it modulo the modulus if not.
	874	*/
	875	base = bigmod(base_in, mod);
	876
	877	/*
	878	* Compute the inverse of n mod r, for monty_reduce. (In fact we
	879	* want the inverse of _minus_ n mod r, but we'll sort that out
	880	* below.)
	881	*/
	882	len = mod[0];
	883	r = bn_power_2(BIGNUM_INT_BITS * len);
	884	inv = modinv(mod, r);
	885
	886	/*
	887	* Multiply the base by r mod n, to get it into Montgomery
	888	* representation.
	889	*/
	890	base2 = modmul(base, r, mod);
	891	freebn(base);
	892	base = base2;
	893
	894	rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */
	895
	896	freebn(r); /* won't need this any more */
	897
	898	/*
	899	* Set up internal arrays of the right lengths containing the base,
	900	* the modulus, and the modulus's inverse.
	901	*/
	902	n = snewn(len, BignumInt);
	903	for (j = 0; j < len; j++)
	904	n[j] = mod[j + 1];
	905
	906	mninv = snewn(len, BignumInt);
	907	for (j = 0; j < len; j++)
	908	mninv[j] = (j < (int)inv[0] ? inv[j + 1] : 0);
	909	freebn(inv); /* we don't need this copy of it any more */
	910	/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
	911	x = snewn(len, BignumInt);
	912	for (j = 0; j < len; j++)
	913	x[j] = 0;
	914	internal_sub(x, mninv, mninv, len);
	915
	916	/* x = snewn(len, BignumInt); / / already done above */
	917	for (j = 0; j < len; j++)
	918	x[j] = (j < (int)base[0] ? base[j + 1] : 0);
	919	freebn(base); /* we don't need this copy of it any more */
	920
	921	a = snewn(2*len, BignumInt);
	922	b = snewn(2*len, BignumInt);
	923	for (j = 0; j < len; j++)
	924	a[j] = (j < (int)rn[0] ? rn[j + 1] : 0);
	925	freebn(rn);
	926
	927	/* Scratch space for multiplies */
	928	scratchlen = 3*len + mul_compute_scratch(len);
	929	scratch = snewn(scratchlen, BignumInt);
	930
	931	/* Skip leading zero bits of exp. */
	932	i = 0;
	933	j = BIGNUM_INT_BITS-1;
	934	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	935	j--;
	936	if (j < 0) {
	937	i++;
	938	j = BIGNUM_INT_BITS-1;
	939	}
	940	}
	941
	942	/* Main computation */
	943	while (i < (int)exp[0]) {
	944	while (j >= 0) {
	945	internal_mul(a, a, b, len, scratch);
	946	monty_reduce(b, n, mninv, scratch, len);
	947	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	948	internal_mul(b, x, a, len, scratch);
	949	monty_reduce(a, n, mninv, scratch, len);
	950	} else {
	951	BignumInt *t;
	952	t = a;
	953	a = b;
	954	b = t;
	955	}
	956	j--;
	957	}
	958	i++;
	959	j = BIGNUM_INT_BITS-1;
	960	}
	961
	962	/*
	963	* Final monty_reduce to get back from the adjusted Montgomery
	964	* representation.
	965	*/
	966	monty_reduce(a, n, mninv, scratch, len);
	967
	968	/* Copy result to buffer */
	969	result = newbn(mod[0]);
	970	for (i = 0; i < len; i++)
	971	result[i + 1] = a[i];
	972	while (result[0] > 1 && result[result[0]] == 0)
	973	result[0]--;
	974
	975	/* Free temporary arrays */
	976	for (i = 0; i < scratchlen; i++)
	977	scratch[i] = 0;
	978	sfree(scratch);
	979	for (i = 0; i < 2 * len; i++)
	980	a[i] = 0;
	981	sfree(a);
	982	for (i = 0; i < 2 * len; i++)
	983	b[i] = 0;
	984	sfree(b);
	985	for (i = 0; i < len; i++)
	986	mninv[i] = 0;
	987	sfree(mninv);
	988	for (i = 0; i < len; i++)
	989	n[i] = 0;
	990	sfree(n);
	991	for (i = 0; i < len; i++)
	992	x[i] = 0;
	993	sfree(x);
	994
	995	return result;
	996	}
	997
	998	/*
	999	* Compute (p * q) % mod.
	1000	* The most significant word of mod MUST be non-zero.
	1001	* We assume that the result array is the same size as the mod array.
	1002	*/
	1003	Bignum modmul(Bignum p, Bignum q, Bignum mod)
	1004	{
	1005	BignumInt a, n, m, o, *scratch;
	1006	int mshift, scratchlen;
	1007	int pqlen, mlen, rlen, i, j;
	1008	Bignum result;
	1009
	1010	/* Allocate m of size mlen, copy mod to m */
	1011	mlen = mod[0];
	1012	m = snewn(mlen, BignumInt);
	1013	for (j = 0; j < mlen; j++)
	1014	m[j] = mod[j + 1];
	1015
	1016	/* Shift m left to make msb bit set */
	1017	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	1018	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	1019	break;
	1020	if (mshift)
	1021	shift_left(m, mlen, mshift);
	1022
	1023	pqlen = (p[0] > q[0] ? p[0] : q[0]);
	1024
	1025	/* Make sure that we're allowing enough space. The shifting below will
	1026	* underflow the vectors we allocate if `pqlen' is too small.
	1027	*/
	1028	if (2*pqlen <= mlen)
	1029	pqlen = mlen/2 + 1;
	1030
	1031	/* Allocate n of size pqlen, copy p to n */
	1032	n = snewn(pqlen, BignumInt);
	1033	for (i = 0; i < (int)p[0]; i++)
	1034	n[i] = p[i + 1];
	1035	for (; i < pqlen; i++)
	1036	n[i] = 0;
	1037
	1038	/* Allocate o of size pqlen, copy q to o */
	1039	o = snewn(pqlen, BignumInt);
	1040	for (i = 0; i < (int)q[0]; i++)
	1041	o[i] = q[i + 1];
	1042	for (; i < pqlen; i++)
	1043	o[i] = 0;
	1044
	1045	/* Allocate a of size 2pqlen for result /
	1046	a = snewn(2 * pqlen, BignumInt);
	1047
	1048	/* Scratch space for multiplies */
	1049	scratchlen = mul_compute_scratch(pqlen);
	1050	scratch = snewn(scratchlen, BignumInt);
	1051
	1052	/* Main computation */
	1053	internal_mul(n, o, a, pqlen, scratch);
	1054	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	1055
	1056	/* Fixup result in case the modulus was shifted */
	1057	if (mshift) {
	1058	shift_left(a, mlen + 1, mshift);
	1059	internal_mod(a, mlen + 1, m, mlen, NULL, 0);
	1060	shift_right(a, mlen, mshift);
	1061	}
	1062
	1063	/* Copy result to buffer */
	1064	rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
	1065	result = newbn(rlen);
	1066	for (i = 0; i < rlen; i++)
	1067	result[i + 1] = a[i];
	1068	while (result[0] > 1 && result[result[0]] == 0)
	1069	result[0]--;
	1070
	1071	/* Free temporary arrays */
	1072	for (i = 0; i < scratchlen; i++)
	1073	scratch[i] = 0;
	1074	sfree(scratch);
	1075	for (i = 0; i < 2 * pqlen; i++)
	1076	a[i] = 0;
	1077	sfree(a);
	1078	for (i = 0; i < mlen; i++)
	1079	m[i] = 0;
	1080	sfree(m);
	1081	for (i = 0; i < pqlen; i++)
	1082	n[i] = 0;
	1083	sfree(n);
	1084	for (i = 0; i < pqlen; i++)
	1085	o[i] = 0;
	1086	sfree(o);
	1087
	1088	return result;
	1089	}
	1090
	1091	/*
	1092	* Compute p % mod.
	1093	* The most significant word of mod MUST be non-zero.
	1094	* We assume that the result array is the same size as the mod array.
	1095	* We optionally write out a quotient if `quotient' is non-NULL.
	1096	* We can avoid writing out the result if `result' is NULL.
	1097	*/
	1098	static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
	1099	{
	1100	BignumInt n, m;
	1101	int mshift;
	1102	int plen, mlen, i, j;
	1103
	1104	/* Allocate m of size mlen, copy mod to m */
	1105	mlen = mod[0];
	1106	m = snewn(mlen, BignumInt);
	1107	for (j = 0; j < mlen; j++)
	1108	m[j] = mod[j + 1];
	1109
	1110	/* Shift m left to make msb bit set */
	1111	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	1112	if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
	1113	break;
	1114	if (mshift)
	1115	shift_left(m, mlen, mshift);
	1116
	1117	plen = p[0];
	1118	/* Ensure plen > mlen */
	1119	if (plen <= mlen)
	1120	plen = mlen + 1;
	1121
	1122	/* Allocate n of size plen, copy p to n */
	1123	n = snewn(plen, BignumInt);
	1124	for (i = 0; i < (int)p[0]; i++)
	1125	n[i] = p[i + 1];
	1126	for (; i < plen; i++)
	1127	n[i] = 0;
	1128
	1129	/* Main computation */
	1130	internal_mod(n, plen, m, mlen, quotient, mshift);
	1131
	1132	/* Fixup result in case the modulus was shifted */
	1133	if (mshift) {
	1134	shift_left(n, mlen + 1, mshift);
	1135	internal_mod(n, plen, m, mlen, quotient, 0);
	1136	shift_right(n, mlen, mshift);
	1137	}
	1138
	1139	/* Copy result to buffer */
	1140	if (result) {
	1141	for (i = 0; i < (int)result[0]; i++)
	1142	result[i + 1] = i < plen ? n[i] : 0;
	1143	bn_restore_invariant(result);
	1144	}
	1145
	1146	/* Free temporary arrays */
	1147	for (i = 0; i < mlen; i++)
	1148	m[i] = 0;
	1149	sfree(m);
	1150	for (i = 0; i < plen; i++)
	1151	n[i] = 0;
	1152	sfree(n);
	1153	}
	1154
	1155	/*
	1156	* Decrement a number.
	1157	*/
	1158	void decbn(Bignum bn)
	1159	{
	1160	int i = 1;
	1161	while (i < (int)bn[0] && bn[i] == 0)
	1162	bn[i++] = BIGNUM_INT_MASK;
	1163	bn[i]--;
	1164	}
	1165
	1166	Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
	1167	{
	1168	Bignum result;
	1169	int w, i;
	1170
	1171	w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
	1172
	1173	result = newbn(w);
	1174	for (i = 1; i <= w; i++)
	1175	result[i] = 0;
	1176	for (i = nbytes; i--;) {
	1177	unsigned char byte = *data++;
	1178	result[1 + i / BIGNUM_INT_BYTES] \|= byte << (8*i % BIGNUM_INT_BITS);
	1179	}
	1180
	1181	while (result[0] > 1 && result[result[0]] == 0)
	1182	result[0]--;
	1183	return result;
	1184	}
	1185
	1186	/*
	1187	* Read an SSH-1-format bignum from a data buffer. Return the number
	1188	* of bytes consumed, or -1 if there wasn't enough data.
	1189	*/
	1190	int ssh1_read_bignum(const unsigned char data, int len, Bignum result)
	1191	{
	1192	const unsigned char *p = data;
	1193	int i;
	1194	int w, b;
	1195
	1196	if (len < 2)
	1197	return -1;
	1198
	1199	w = 0;
	1200	for (i = 0; i < 2; i++)
	1201	w = (w << 8) + *p++;
	1202	b = (w + 7) / 8; /* bits -> bytes */
	1203
	1204	if (len < b+2)
	1205	return -1;
	1206
	1207	if (!result) /* just return length */
	1208	return b + 2;
	1209
	1210	*result = bignum_from_bytes(p, b);
	1211
	1212	return p + b - data;
	1213	}
	1214
	1215	/*
	1216	* Return the bit count of a bignum, for SSH-1 encoding.
	1217	*/
	1218	int bignum_bitcount(Bignum bn)
	1219	{
	1220	int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
	1221	while (bitcount >= 0
	1222	&& (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
	1223	return bitcount + 1;
	1224	}
	1225
	1226	/*
	1227	* Return the byte length of a bignum when SSH-1 encoded.
	1228	*/
	1229	int ssh1_bignum_length(Bignum bn)
	1230	{
	1231	return 2 + (bignum_bitcount(bn) + 7) / 8;
	1232	}
	1233
	1234	/*
	1235	* Return the byte length of a bignum when SSH-2 encoded.
	1236	*/
	1237	int ssh2_bignum_length(Bignum bn)
	1238	{
	1239	return 4 + (bignum_bitcount(bn) + 8) / 8;
	1240	}
	1241
	1242	/*
	1243	* Return a byte from a bignum; 0 is least significant, etc.
	1244	*/
	1245	int bignum_byte(Bignum bn, int i)
	1246	{
	1247	if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
	1248	return 0; /* beyond the end */
	1249	else
	1250	return (bn[i / BIGNUM_INT_BYTES + 1] >>
	1251	((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
	1252	}
	1253
	1254	/*
	1255	* Return a bit from a bignum; 0 is least significant, etc.
	1256	*/
	1257	int bignum_bit(Bignum bn, int i)
	1258	{
	1259	if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
	1260	return 0; /* beyond the end */
	1261	else
	1262	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
	1263	}
	1264
	1265	/*
	1266	* Set a bit in a bignum; 0 is least significant, etc.
	1267	*/
	1268	void bignum_set_bit(Bignum bn, int bitnum, int value)
	1269	{
	1270	if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
	1271	abort(); /* beyond the end */
	1272	else {
	1273	int v = bitnum / BIGNUM_INT_BITS + 1;
	1274	int mask = 1 << (bitnum % BIGNUM_INT_BITS);
	1275	if (value)
	1276	bn[v] \|= mask;
	1277	else
	1278	bn[v] &= ~mask;
	1279	}
	1280	}
	1281
	1282	/*
	1283	* Write a SSH-1-format bignum into a buffer. It is assumed the
	1284	* buffer is big enough. Returns the number of bytes used.
	1285	*/
	1286	int ssh1_write_bignum(void *data, Bignum bn)
	1287	{
	1288	unsigned char *p = data;
	1289	int len = ssh1_bignum_length(bn);
	1290	int i;
	1291	int bitc = bignum_bitcount(bn);
	1292
	1293	*p++ = (bitc >> 8) & 0xFF;
	1294	*p++ = (bitc) & 0xFF;
	1295	for (i = len - 2; i--;)
	1296	*p++ = bignum_byte(bn, i);
	1297	return len;
	1298	}
	1299
	1300	/*
	1301	* Compare two bignums. Returns like strcmp.
	1302	*/
	1303	int bignum_cmp(Bignum a, Bignum b)
	1304	{
	1305	int amax = a[0], bmax = b[0];
	1306	int i = (amax > bmax ? amax : bmax);
	1307	while (i) {
	1308	BignumInt aval = (i > amax ? 0 : a[i]);
	1309	BignumInt bval = (i > bmax ? 0 : b[i]);
	1310	if (aval < bval)
	1311	return -1;
	1312	if (aval > bval)
	1313	return +1;
	1314	i--;
	1315	}
	1316	return 0;
	1317	}
	1318
	1319	/*
	1320	* Right-shift one bignum to form another.
	1321	*/
	1322	Bignum bignum_rshift(Bignum a, int shift)
	1323	{
	1324	Bignum ret;
	1325	int i, shiftw, shiftb, shiftbb, bits;
	1326	BignumInt ai, ai1;
	1327
	1328	bits = bignum_bitcount(a) - shift;
	1329	ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
	1330
	1331	if (ret) {
	1332	shiftw = shift / BIGNUM_INT_BITS;
	1333	shiftb = shift % BIGNUM_INT_BITS;
	1334	shiftbb = BIGNUM_INT_BITS - shiftb;
	1335
	1336	ai1 = a[shiftw + 1];
	1337	for (i = 1; i <= (int)ret[0]; i++) {
	1338	ai = ai1;
	1339	ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
	1340	ret[i] = ((ai >> shiftb) \| (ai1 << shiftbb)) & BIGNUM_INT_MASK;
	1341	}
	1342	}
	1343
	1344	return ret;
	1345	}
	1346
	1347	/*
	1348	* Non-modular multiplication and addition.
	1349	*/
	1350	Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
	1351	{
	1352	int alen = a[0], blen = b[0];
	1353	int mlen = (alen > blen ? alen : blen);
	1354	int rlen, i, maxspot;
	1355	int wslen;
	1356	BignumInt *workspace;
	1357	Bignum ret;
	1358
	1359	/* mlen space for a, mlen space for b, 2*mlen for result,
	1360	* plus scratch space for multiplication */
	1361	wslen = mlen * 4 + mul_compute_scratch(mlen);
	1362	workspace = snewn(wslen, BignumInt);
	1363	for (i = 0; i < mlen; i++) {
	1364	workspace[0 * mlen + i] = i < (int)a[0] ? a[i + 1] : 0;
	1365	workspace[1 * mlen + i] = i < (int)b[0] ? b[i + 1] : 0;
	1366	}
	1367
	1368	internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
	1369	workspace + 2 * mlen, mlen, workspace + 4 * mlen);
	1370
	1371	/* now just copy the result back */
	1372	rlen = alen + blen + 1;
	1373	if (addend && rlen <= (int)addend[0])
	1374	rlen = addend[0] + 1;
	1375	ret = newbn(rlen);
	1376	maxspot = 0;
	1377	for (i = 0; i < (int)ret[0]; i++) {
	1378	ret[i + 1] = (i < 2 * mlen ? workspace[2 * mlen + i] : 0);
	1379	if (ret[i + 1] != 0)
	1380	maxspot = i + 1;
	1381	}
	1382	ret[0] = maxspot;
	1383
	1384	/* now add in the addend, if any */
	1385	if (addend) {
	1386	BignumDblInt carry = 0;
	1387	for (i = 1; i <= rlen; i++) {
	1388	carry += (i <= (int)ret[0] ? ret[i] : 0);
	1389	carry += (i <= (int)addend[0] ? addend[i] : 0);
	1390	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1391	carry >>= BIGNUM_INT_BITS;
	1392	if (ret[i] != 0 && i > maxspot)
	1393	maxspot = i;
	1394	}
	1395	}
	1396	ret[0] = maxspot;
	1397
	1398	for (i = 0; i < wslen; i++)
	1399	workspace[i] = 0;
	1400	sfree(workspace);
	1401	return ret;
	1402	}
	1403
	1404	/*
	1405	* Non-modular multiplication.
	1406	*/
	1407	Bignum bigmul(Bignum a, Bignum b)
	1408	{
	1409	return bigmuladd(a, b, NULL);
	1410	}
	1411
	1412	/*
	1413	* Simple addition.
	1414	*/
	1415	Bignum bigadd(Bignum a, Bignum b)
	1416	{
	1417	int alen = a[0], blen = b[0];
	1418	int rlen = (alen > blen ? alen : blen) + 1;
	1419	int i, maxspot;
	1420	Bignum ret;
	1421	BignumDblInt carry;
	1422
	1423	ret = newbn(rlen);
	1424
	1425	carry = 0;
	1426	maxspot = 0;
	1427	for (i = 1; i <= rlen; i++) {
	1428	carry += (i <= (int)a[0] ? a[i] : 0);
	1429	carry += (i <= (int)b[0] ? b[i] : 0);
	1430	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1431	carry >>= BIGNUM_INT_BITS;
	1432	if (ret[i] != 0 && i > maxspot)
	1433	maxspot = i;
	1434	}
	1435	ret[0] = maxspot;
	1436
	1437	return ret;
	1438	}
	1439
	1440	/*
	1441	* Subtraction. Returns a-b, or NULL if the result would come out
	1442	* negative (recall that this entire bignum module only handles
	1443	* positive numbers).
	1444	*/
	1445	Bignum bigsub(Bignum a, Bignum b)
	1446	{
	1447	int alen = a[0], blen = b[0];
	1448	int rlen = (alen > blen ? alen : blen);
	1449	int i, maxspot;
	1450	Bignum ret;
	1451	BignumDblInt carry;
	1452
	1453	ret = newbn(rlen);
	1454
	1455	carry = 1;
	1456	maxspot = 0;
	1457	for (i = 1; i <= rlen; i++) {
	1458	carry += (i <= (int)a[0] ? a[i] : 0);
	1459	carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
	1460	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1461	carry >>= BIGNUM_INT_BITS;
	1462	if (ret[i] != 0 && i > maxspot)
	1463	maxspot = i;
	1464	}
	1465	ret[0] = maxspot;
	1466
	1467	if (!carry) {
	1468	freebn(ret);
	1469	return NULL;
	1470	}
	1471
	1472	return ret;
	1473	}
	1474
	1475	/*
	1476	* Create a bignum which is the bitmask covering another one. That
	1477	* is, the smallest integer which is >= N and is also one less than
	1478	* a power of two.
	1479	*/
	1480	Bignum bignum_bitmask(Bignum n)
	1481	{
	1482	Bignum ret = copybn(n);
	1483	int i;
	1484	BignumInt j;
	1485
	1486	i = ret[0];
	1487	while (n[i] == 0 && i > 0)
	1488	i--;
	1489	if (i <= 0)
	1490	return ret; /* input was zero */
	1491	j = 1;
	1492	while (j < n[i])
	1493	j = 2 * j + 1;
	1494	ret[i] = j;
	1495	while (--i > 0)
	1496	ret[i] = BIGNUM_INT_MASK;
	1497	return ret;
	1498	}
	1499
	1500	/*
	1501	* Convert a (max 32-bit) long into a bignum.
	1502	*/
	1503	Bignum bignum_from_long(unsigned long nn)
	1504	{
	1505	Bignum ret;
	1506	BignumDblInt n = nn;
	1507
	1508	ret = newbn(3);
	1509	ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
	1510	ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
	1511	ret[3] = 0;
	1512	ret[0] = (ret[2] ? 2 : 1);
	1513	return ret;
	1514	}
	1515
	1516	/*
	1517	* Add a long to a bignum.
	1518	*/
	1519	Bignum bignum_add_long(Bignum number, unsigned long addendx)
	1520	{
	1521	Bignum ret = newbn(number[0] + 1);
	1522	int i, maxspot = 0;
	1523	BignumDblInt carry = 0, addend = addendx;
	1524
	1525	for (i = 1; i <= (int)ret[0]; i++) {
	1526	carry += addend & BIGNUM_INT_MASK;
	1527	carry += (i <= (int)number[0] ? number[i] : 0);
	1528	addend >>= BIGNUM_INT_BITS;
	1529	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1530	carry >>= BIGNUM_INT_BITS;
	1531	if (ret[i] != 0)
	1532	maxspot = i;
	1533	}
	1534	ret[0] = maxspot;
	1535	return ret;
	1536	}
	1537
	1538	/*
	1539	* Compute the residue of a bignum, modulo a (max 16-bit) short.
	1540	*/
	1541	unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
	1542	{
	1543	BignumDblInt mod, r;
	1544	int i;
	1545
	1546	r = 0;
	1547	mod = modulus;
	1548	for (i = number[0]; i > 0; i--)
	1549	r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
	1550	return (unsigned short) r;
	1551	}
	1552
	1553	#ifdef DEBUG
	1554	void diagbn(char *prefix, Bignum md)
	1555	{
	1556	int i, nibbles, morenibbles;
	1557	static const char hex[] = "0123456789ABCDEF";
	1558
	1559	debug(("%s0x", prefix ? prefix : ""));
	1560
	1561	nibbles = (3 + bignum_bitcount(md)) / 4;
	1562	if (nibbles < 1)
	1563	nibbles = 1;
	1564	morenibbles = 4 * md[0] - nibbles;
	1565	for (i = 0; i < morenibbles; i++)
	1566	debug(("-"));
	1567	for (i = nibbles; i--;)
	1568	debug(("%c",
	1569	hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
	1570
	1571	if (prefix)
	1572	debug(("\n"));
	1573	}
	1574	#endif
	1575
	1576	/*
	1577	* Simple division.
	1578	*/
	1579	Bignum bigdiv(Bignum a, Bignum b)
	1580	{
	1581	Bignum q = newbn(a[0]);
	1582	bigdivmod(a, b, NULL, q);
	1583	return q;
	1584	}
	1585
	1586	/*
	1587	* Simple remainder.
	1588	*/
	1589	Bignum bigmod(Bignum a, Bignum b)
	1590	{
	1591	Bignum r = newbn(b[0]);
	1592	bigdivmod(a, b, r, NULL);
	1593	return r;
	1594	}
	1595
	1596	/*
	1597	* Greatest common divisor.
	1598	*/
	1599	Bignum biggcd(Bignum av, Bignum bv)
	1600	{
	1601	Bignum a = copybn(av);
	1602	Bignum b = copybn(bv);
	1603
	1604	while (bignum_cmp(b, Zero) != 0) {
	1605	Bignum t = newbn(b[0]);
	1606	bigdivmod(a, b, t, NULL);
	1607	while (t[0] > 1 && t[t[0]] == 0)
	1608	t[0]--;
	1609	freebn(a);
	1610	a = b;
	1611	b = t;
	1612	}
	1613
	1614	freebn(b);
	1615	return a;
	1616	}
	1617
	1618	/*
	1619	* Modular inverse, using Euclid's extended algorithm.
	1620	*/
	1621	Bignum modinv(Bignum number, Bignum modulus)
	1622	{
	1623	Bignum a = copybn(modulus);
	1624	Bignum b = copybn(number);
	1625	Bignum xp = copybn(Zero);
	1626	Bignum x = copybn(One);
	1627	int sign = +1;
	1628
	1629	while (bignum_cmp(b, One) != 0) {
	1630	Bignum t = newbn(b[0]);
	1631	Bignum q = newbn(a[0]);
	1632	bigdivmod(a, b, t, q);
	1633	while (t[0] > 1 && t[t[0]] == 0)
	1634	t[0]--;
	1635	freebn(a);
	1636	a = b;
	1637	b = t;
	1638	t = xp;
	1639	xp = x;
	1640	x = bigmuladd(q, xp, t);
	1641	sign = -sign;
	1642	freebn(t);
	1643	freebn(q);
	1644	}
	1645
	1646	freebn(b);
	1647	freebn(a);
	1648	freebn(xp);
	1649
	1650	/* now we know that sign * x == 1, and that x < modulus */
	1651	if (sign < 0) {
	1652	/* set a new x to be modulus - x */
	1653	Bignum newx = newbn(modulus[0]);
	1654	BignumInt carry = 0;
	1655	int maxspot = 1;
	1656	int i;
	1657
	1658	for (i = 1; i <= (int)newx[0]; i++) {
	1659	BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
	1660	BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
	1661	newx[i] = aword - bword - carry;
	1662	bword = ~bword;
	1663	carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
	1664	if (newx[i] != 0)
	1665	maxspot = i;
	1666	}
	1667	newx[0] = maxspot;
	1668	freebn(x);
	1669	x = newx;
	1670	}
	1671
	1672	/* and return. */
	1673	return x;
	1674	}
	1675
	1676	/*
	1677	* Render a bignum into decimal. Return a malloced string holding
	1678	* the decimal representation.
	1679	*/
	1680	char *bignum_decimal(Bignum x)
	1681	{
	1682	int ndigits, ndigit;
	1683	int i, iszero;
	1684	BignumDblInt carry;
	1685	char *ret;
	1686	BignumInt *workspace;
	1687
	1688	/*
	1689	* First, estimate the number of digits. Since log(10)/log(2)
	1690	* is just greater than 93/28 (the joys of continued fraction
	1691	* approximations...) we know that for every 93 bits, we need
	1692	* at most 28 digits. This will tell us how much to malloc.
	1693	*
	1694	* Formally: if x has i bits, that means x is strictly less
	1695	* than 2^i. Since 2 is less than 10^(28/93), this is less than
	1696	* 10^(28i/93). We need an integer power of ten, so we must
	1697	* round up (rounding down might make it less than x again).
	1698	* Therefore if we multiply the bit count by 28/93, rounding
	1699	* up, we will have enough digits.
	1700	*
	1701	* i=0 (i.e., x=0) is an irritating special case.
	1702	*/
	1703	i = bignum_bitcount(x);
	1704	if (!i)
	1705	ndigits = 1; /* x = 0 */
	1706	else
	1707	ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */
	1708	ndigits++; /* allow for trailing \0 */
	1709	ret = snewn(ndigits, char);
	1710
	1711	/*
	1712	* Now allocate some workspace to hold the binary form as we
	1713	* repeatedly divide it by ten. Initialise this to the
	1714	* big-endian form of the number.
	1715	*/
	1716	workspace = snewn(x[0], BignumInt);
	1717	for (i = 0; i < (int)x[0]; i++)
	1718	workspace[i] = x[x[0] - i];
	1719
	1720	/*
	1721	* Next, write the decimal number starting with the last digit.
	1722	* We use ordinary short division, dividing 10 into the
	1723	* workspace.
	1724	*/
	1725	ndigit = ndigits - 1;
	1726	ret[ndigit] = '\0';
	1727	do {
	1728	iszero = 1;
	1729	carry = 0;
	1730	for (i = 0; i < (int)x[0]; i++) {
	1731	carry = (carry << BIGNUM_INT_BITS) + workspace[i];
	1732	workspace[i] = (BignumInt) (carry / 10);
	1733	if (workspace[i])
	1734	iszero = 0;
	1735	carry %= 10;
	1736	}
	1737	ret[--ndigit] = (char) (carry + '0');
	1738	} while (!iszero);
	1739
	1740	/*
	1741	* There's a chance we've fallen short of the start of the
	1742	* string. Correct if so.
	1743	*/
	1744	if (ndigit > 0)
	1745	memmove(ret, ret + ndigit, ndigits - ndigit);
	1746
	1747	/*
	1748	* Done.
	1749	*/
	1750	sfree(workspace);
	1751	return ret;
	1752	}
	1753
	1754	#ifdef TESTBN
	1755
	1756	#include <stdio.h>
	1757	#include <stdlib.h>
	1758	#include <ctype.h>
	1759
	1760	/*
	1761	* gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset
	1762	*
	1763	* Then feed to this program's standard input the output of
	1764	* testdata/bignum.py .
	1765	*/
	1766
	1767	void modalfatalbox(char *p, ...)
	1768	{
	1769	va_list ap;
	1770	fprintf(stderr, "FATAL ERROR: ");
	1771	va_start(ap, p);
	1772	vfprintf(stderr, p, ap);
	1773	va_end(ap);
	1774	fputc('\n', stderr);
	1775	exit(1);
	1776	}
	1777
	1778	#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
	1779
	1780	int main(int argc, char **argv)
	1781	{
	1782	char *buf;
	1783	int line = 0;
	1784	int passes = 0, fails = 0;
	1785
	1786	while ((buf = fgetline(stdin)) != NULL) {
	1787	int maxlen = strlen(buf);
	1788	unsigned char *data = snewn(maxlen, unsigned char);
	1789	unsigned char ptrs[5], q;
	1790	int ptrnum;
	1791	char *bufp = buf;
	1792
	1793	line++;
	1794
	1795	q = data;
	1796	ptrnum = 0;
	1797
	1798	while (bufp && !isspace((unsigned char)bufp))
	1799	bufp++;
	1800	if (bufp)
	1801	*bufp++ = '\0';
	1802
	1803	while (*bufp) {
	1804	char start, end;
	1805	int i;
	1806
	1807	while (bufp && !isxdigit((unsigned char)bufp))
	1808	bufp++;
	1809	start = bufp;
	1810
	1811	if (!*bufp)
	1812	break;
	1813
	1814	while (bufp && isxdigit((unsigned char)bufp))
	1815	bufp++;
	1816	end = bufp;
	1817
	1818	if (ptrnum >= lenof(ptrs))
	1819	break;
	1820	ptrs[ptrnum++] = q;
	1821
	1822	for (i = -((end - start) & 1); i < end-start; i += 2) {
	1823	unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
	1824	val = val * 16 + fromxdigit(start[i+1]);
	1825	*q++ = val;
	1826	}
	1827
	1828	ptrs[ptrnum] = q;
	1829	}
	1830
	1831	if (!strcmp(buf, "mul")) {
	1832	Bignum a, b, c, p;
	1833
	1834	if (ptrnum != 3) {
	1835	printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
	1836	exit(1);
	1837	}
	1838	a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1839	b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1840	c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1841	p = bigmul(a, b);
	1842
	1843	if (bignum_cmp(c, p) == 0) {
	1844	passes++;
	1845	} else {
	1846	char *as = bignum_decimal(a);
	1847	char *bs = bignum_decimal(b);
	1848	char *cs = bignum_decimal(c);
	1849	char *ps = bignum_decimal(p);
	1850
	1851	printf("%d: fail: %s * %s gave %s expected %s\n",
	1852	line, as, bs, ps, cs);
	1853	fails++;
	1854
	1855	sfree(as);
	1856	sfree(bs);
	1857	sfree(cs);
	1858	sfree(ps);
	1859	}
	1860	freebn(a);
	1861	freebn(b);
	1862	freebn(c);
	1863	freebn(p);
	1864	} else if (!strcmp(buf, "pow")) {
	1865	Bignum base, expt, modulus, expected, answer;
	1866
	1867	if (ptrnum != 4) {
	1868	printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);
	1869	exit(1);
	1870	}
	1871
	1872	base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1873	expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1874	modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1875	expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
	1876	answer = modpow(base, expt, modulus);
	1877
	1878	if (bignum_cmp(expected, answer) == 0) {
	1879	passes++;
	1880	} else {
	1881	char *as = bignum_decimal(base);
	1882	char *bs = bignum_decimal(expt);
	1883	char *cs = bignum_decimal(modulus);
	1884	char *ds = bignum_decimal(answer);
	1885	char *ps = bignum_decimal(expected);
	1886
	1887	printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
	1888	line, as, bs, cs, ds, ps);
	1889	fails++;
	1890
	1891	sfree(as);
	1892	sfree(bs);
	1893	sfree(cs);
	1894	sfree(ds);
	1895	sfree(ps);
	1896	}
	1897	freebn(base);
	1898	freebn(expt);
	1899	freebn(modulus);
	1900	freebn(expected);
	1901	freebn(answer);
	1902	} else {
	1903	printf("%d: unrecognised test keyword: '%s'\n", line, buf);
	1904	exit(1);
	1905	}
	1906
	1907	sfree(buf);
	1908	sfree(data);
	1909	}
	1910
	1911	printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
	1912	return fails != 0;
	1913	}
	1914
	1915	#endif