git.distorted.org.uk Git - u/mdw/putty/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Bignum routines for RSA and DH and stuff.
	3	*/
	4
	5	#include <stdio.h>
	6	#include <assert.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include "misc.h"
	11
	12	/*
	13	* Usage notes:
	14	* * Do not call the DIVMOD_WORD macro with expressions such as array
	15	* subscripts, as some implementations object to this (see below).
	16	* * Note that none of the division methods below will cope if the
	17	* quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
	18	* to avoid this case.
	19	* If this condition occurs, in the case of the x86 DIV instruction,
	20	* an overflow exception will occur, which (according to a correspondent)
	21	* will manifest on Windows as something like
	22	* 0xC0000095: Integer overflow
	23	* The C variant won't give the right answer, either.
	24	*/
	25
	26	#if defined __GNUC__ && defined __i386__
	27	typedef unsigned long BignumInt;
	28	typedef unsigned long long BignumDblInt;
	29	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	30	#define BIGNUM_TOP_BIT 0x80000000UL
	31	#define BIGNUM_INT_BITS 32
	32	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	33	#define DIVMOD_WORD(q, r, hi, lo, w) \
	34	__asm__("div %2" : \
	35	"=d" (r), "=a" (q) : \
	36	"r" (w), "d" (hi), "a" (lo))
	37	#elif defined _MSC_VER && defined _M_IX86
	38	typedef unsigned __int32 BignumInt;
	39	typedef unsigned __int64 BignumDblInt;
	40	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	41	#define BIGNUM_TOP_BIT 0x80000000UL
	42	#define BIGNUM_INT_BITS 32
	43	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	44	/* Note: MASM interprets array subscripts in the macro arguments as
	45	* assembler syntax, which gives the wrong answer. Don't supply them.
	46	* <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
	47	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	48	__asm mov edx, hi \
	49	__asm mov eax, lo \
	50	__asm div w \
	51	__asm mov r, edx \
	52	__asm mov q, eax \
	53	} while(0)
	54	#elif defined _LP64
	55	/* 64-bit architectures can do 32x32->64 chunks at a time */
	56	typedef unsigned int BignumInt;
	57	typedef unsigned long BignumDblInt;
	58	#define BIGNUM_INT_MASK 0xFFFFFFFFU
	59	#define BIGNUM_TOP_BIT 0x80000000U
	60	#define BIGNUM_INT_BITS 32
	61	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	62	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	63	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	64	q = n / w; \
	65	r = n % w; \
	66	} while (0)
	67	#elif defined _LLP64
	68	/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
	69	typedef unsigned long BignumInt;
	70	typedef unsigned long long BignumDblInt;
	71	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	72	#define BIGNUM_TOP_BIT 0x80000000UL
	73	#define BIGNUM_INT_BITS 32
	74	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	75	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	76	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	77	q = n / w; \
	78	r = n % w; \
	79	} while (0)
	80	#else
	81	/* Fallback for all other cases */
	82	typedef unsigned short BignumInt;
	83	typedef unsigned long BignumDblInt;
	84	#define BIGNUM_INT_MASK 0xFFFFU
	85	#define BIGNUM_TOP_BIT 0x8000U
	86	#define BIGNUM_INT_BITS 16
	87	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	88	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	89	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	90	q = n / w; \
	91	r = n % w; \
	92	} while (0)
	93	#endif
	94
	95	#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
	96
	97	#define BIGNUM_INTERNAL
	98	typedef BignumInt *Bignum;
	99
	100	#include "ssh.h"
	101
	102	BignumInt bnZero[1] = { 0 };
	103	BignumInt bnOne[2] = { 1, 1 };
	104
	105	/*
	106	* The Bignum format is an array of `BignumInt'. The first
	107	* element of the array counts the remaining elements. The
	108	* remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
	109	* significant digit first. (So it's trivial to extract the bit
	110	* with value 2^n for any n.)
	111	*
	112	* All Bignums in this module are positive. Negative numbers must
	113	* be dealt with outside it.
	114	*
	115	* INVARIANT: the most significant word of any Bignum must be
	116	* nonzero.
	117	*/
	118
	119	Bignum Zero = bnZero, One = bnOne;
	120
	121	static Bignum newbn(int length)
	122	{
	123	Bignum b = snewn(length + 1, BignumInt);
	124	if (!b)
	125	abort(); /* FIXME */
	126	memset(b, 0, (length + 1) * sizeof(*b));
	127	b[0] = length;
	128	return b;
	129	}
	130
	131	void bn_restore_invariant(Bignum b)
	132	{
	133	while (b[0] > 1 && b[b[0]] == 0)
	134	b[0]--;
	135	}
	136
	137	Bignum copybn(Bignum orig)
	138	{
	139	Bignum b = snewn(orig[0] + 1, BignumInt);
	140	if (!b)
	141	abort(); /* FIXME */
	142	memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
	143	return b;
	144	}
	145
	146	void freebn(Bignum b)
	147	{
	148	/*
	149	* Burn the evidence, just in case.
	150	*/
	151	memset(b, 0, sizeof(b[0]) * (b[0] + 1));
	152	sfree(b);
	153	}
	154
	155	Bignum bn_power_2(int n)
	156	{
	157	Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
	158	bignum_set_bit(ret, n, 1);
	159	return ret;
	160	}
	161
	162	/*
	163	* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
	164	* big-endian arrays of 'len' BignumInts. Returns a BignumInt carried
	165	* off the top.
	166	*/
	167	static BignumInt internal_add(const BignumInt a, const BignumInt b,
	168	BignumInt *c, int len)
	169	{
	170	int i;
	171	BignumDblInt carry = 0;
	172
	173	for (i = len-1; i >= 0; i--) {
	174	carry += (BignumDblInt)a[i] + b[i];
	175	c[i] = (BignumInt)carry;
	176	carry >>= BIGNUM_INT_BITS;
	177	}
	178
	179	return (BignumInt)carry;
	180	}
	181
	182	/*
	183	* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
	184	* all big-endian arrays of 'len' BignumInts. Any borrow from the top
	185	* is ignored.
	186	*/
	187	static void internal_sub(const BignumInt a, const BignumInt b,
	188	BignumInt *c, int len)
	189	{
	190	int i;
	191	BignumDblInt carry = 1;
	192
	193	for (i = len-1; i >= 0; i--) {
	194	carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
	195	c[i] = (BignumInt)carry;
	196	carry >>= BIGNUM_INT_BITS;
	197	}
	198	}
	199
	200	/*
	201	* Compute c = a * b.
	202	* Input is in the first len words of a and b.
	203	* Result is returned in the first 2*len words of c.
	204	*/
	205	#define KARATSUBA_THRESHOLD 50
	206	static void internal_mul(const BignumInt a, const BignumInt b,
	207	BignumInt *c, int len)
	208	{
	209	int i, j;
	210	BignumDblInt t;
	211
	212	if (len > KARATSUBA_THRESHOLD) {
	213
	214	/*
	215	* Karatsuba divide-and-conquer algorithm. Cut each input in
	216	* half, so that it's expressed as two big 'digits' in a giant
	217	* base D:
	218	*
	219	* a = a_1 D + a_0
	220	* b = b_1 D + b_0
	221	*
	222	* Then the product is of course
	223	*
	224	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	225	*
	226	* and we compute the three coefficients by recursively
	227	* calling ourself to do half-length multiplications.
	228	*
	229	* The clever bit that makes this worth doing is that we only
	230	* need _one_ half-length multiplication for the central
	231	* coefficient rather than the two that it obviouly looks
	232	* like, because we can use a single multiplication to compute
	233	*
	234	* (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
	235	*
	236	* and then we subtract the other two coefficients (a_1 b_1
	237	* and a_0 b_0) which we were computing anyway.
	238	*
	239	* Hence we get to multiply two numbers of length N in about
	240	* three times as much work as it takes to multiply numbers of
	241	* length N/2, which is obviously better than the four times
	242	* as much work it would take if we just did a long
	243	* conventional multiply.
	244	*/
	245
	246	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	247	int midlen = botlen + 1;
	248	BignumInt *scratch;
	249	BignumDblInt carry;
	250	#ifdef KARA_DEBUG
	251	int i;
	252	#endif
	253
	254	/*
	255	* The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
	256	* in the output array, so we can compute them immediately in
	257	* place.
	258	*/
	259
	260	#ifdef KARA_DEBUG
	261	printf("a1,a0 = 0x");
	262	for (i = 0; i < len; i++) {
	263	if (i == toplen) printf(", 0x");
	264	printf("%0*x", BIGNUM_INT_BITS/4, a[i]);
	265	}
	266	printf("\n");
	267	printf("b1,b0 = 0x");
	268	for (i = 0; i < len; i++) {
	269	if (i == toplen) printf(", 0x");
	270	printf("%0*x", BIGNUM_INT_BITS/4, b[i]);
	271	}
	272	printf("\n");
	273	#endif
	274
	275	/* a_1 b_1 */
	276	internal_mul(a, b, c, toplen);
	277	#ifdef KARA_DEBUG
	278	printf("a1b1 = 0x");
	279	for (i = 0; i < 2*toplen; i++) {
	280	printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
	281	}
	282	printf("\n");
	283	#endif
	284
	285	/* a_0 b_0 */
	286	internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen);
	287	#ifdef KARA_DEBUG
	288	printf("a0b0 = 0x");
	289	for (i = 0; i < 2*botlen; i++) {
	290	printf("%0x", BIGNUM_INT_BITS/4, c[2toplen+i]);
	291	}
	292	printf("\n");
	293	#endif
	294
	295	/*
	296	* We must allocate scratch space for the central coefficient,
	297	* and also for the two input values that we multiply when
	298	* computing it. Since either or both may carry into the
	299	* (botlen+1)th word, we must use a slightly longer length
	300	* 'midlen'.
	301	*/
	302	scratch = snewn(4 * midlen, BignumInt);
	303
	304	/* Zero padding. midlen exceeds toplen by at most 2, so just
	305	* zero the first two words of each input and the rest will be
	306	* copied over. */
	307	scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
	308
	309	for (j = 0; j < toplen; j++) {
	310	scratch[midlen - toplen + j] = a[j]; /* a_1 */
	311	scratch[2midlen - toplen + j] = b[j]; / b_1 */
	312	}
	313
	314	/* compute a_1 + a_0 */
	315	scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);
	316	#ifdef KARA_DEBUG
	317	printf("a1plusa0 = 0x");
	318	for (i = 0; i < midlen; i++) {
	319	printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
	320	}
	321	printf("\n");
	322	#endif
	323	/* compute b_1 + b_0 */
	324	scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,
	325	scratch+midlen+1, botlen);
	326	#ifdef KARA_DEBUG
	327	printf("b1plusb0 = 0x");
	328	for (i = 0; i < midlen; i++) {
	329	printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);
	330	}
	331	printf("\n");
	332	#endif
	333
	334	/*
	335	* Now we can do the third multiplication.
	336	*/
	337	internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen);
	338	#ifdef KARA_DEBUG
	339	printf("a1plusa0timesb1plusb0 = 0x");
	340	for (i = 0; i < 2*midlen; i++) {
	341	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen+i]);
	342	}
	343	printf("\n");
	344	#endif
	345
	346	/*
	347	* Now we can reuse the first half of 'scratch' to compute the
	348	* sum of the outer two coefficients, to subtract from that
	349	* product to obtain the middle one.
	350	*/
	351	scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
	352	for (j = 0; j < 2*toplen; j++)
	353	scratch[2midlen - 2toplen + j] = c[j];
	354	scratch[1] = internal_add(scratch+2, c + 2*toplen,
	355	scratch+2, 2*botlen);
	356	#ifdef KARA_DEBUG
	357	printf("a1b1plusa0b0 = 0x");
	358	for (i = 0; i < 2*midlen; i++) {
	359	printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
	360	}
	361	printf("\n");
	362	#endif
	363
	364	internal_sub(scratch + 2*midlen, scratch,
	365	scratch + 2midlen, 2midlen);
	366	#ifdef KARA_DEBUG
	367	printf("a1b0plusa0b1 = 0x");
	368	for (i = 0; i < 2*midlen; i++) {
	369	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen+i]);
	370	}
	371	printf("\n");
	372	#endif
	373
	374	/*
	375	* And now all we need to do is to add that middle coefficient
	376	* back into the output. We may have to propagate a carry
	377	* further up the output, but we can be sure it won't
	378	* propagate right the way off the top.
	379	*/
	380	carry = internal_add(c + 2len - botlen - 2midlen,
	381	scratch + 2*midlen,
	382	c + 2len - botlen - 2midlen, 2*midlen);
	383	j = 2len - botlen - 2midlen - 1;
	384	while (carry) {
	385	assert(j >= 0);
	386	carry += c[j];
	387	c[j] = (BignumInt)carry;
	388	carry >>= BIGNUM_INT_BITS;
	389	j--;
	390	}
	391	#ifdef KARA_DEBUG
	392	printf("ab = 0x");
	393	for (i = 0; i < 2*len; i++) {
	394	printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
	395	}
	396	printf("\n");
	397	#endif
	398
	399	/* Free scratch. */
	400	for (j = 0; j < 4 * midlen; j++)
	401	scratch[j] = 0;
	402	sfree(scratch);
	403
	404	} else {
	405
	406	/*
	407	* Multiply in the ordinary O(N^2) way.
	408	*/
	409
	410	for (j = 0; j < 2 * len; j++)
	411	c[j] = 0;
	412
	413	for (i = len - 1; i >= 0; i--) {
	414	t = 0;
	415	for (j = len - 1; j >= 0; j--) {
	416	t += MUL_WORD(a[i], (BignumDblInt) b[j]);
	417	t += (BignumDblInt) c[i + j + 1];
	418	c[i + j + 1] = (BignumInt) t;
	419	t = t >> BIGNUM_INT_BITS;
	420	}
	421	c[i] = (BignumInt) t;
	422	}
	423	}
	424	}
	425
	426	/*
	427	* Variant form of internal_mul used for the initial step of
	428	* Montgomery reduction. Only bothers outputting 'len' words
	429	* (everything above that is thrown away).
	430	*/
	431	static void internal_mul_low(const BignumInt a, const BignumInt b,
	432	BignumInt *c, int len)
	433	{
	434	int i, j;
	435	BignumDblInt t;
	436
	437	if (len > KARATSUBA_THRESHOLD) {
	438
	439	/*
	440	* Karatsuba-aware version of internal_mul_low. As before, we
	441	* express each input value as a shifted combination of two
	442	* halves:
	443	*
	444	* a = a_1 D + a_0
	445	* b = b_1 D + b_0
	446	*
	447	* Then the full product is, as before,
	448	*
	449	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	450	*
	451	* Provided we choose D on the large side (so that a_0 and b_0
	452	* are _at least_ as long as a_1 and b_1), we don't need the
	453	* topmost term at all, and we only need half of the middle
	454	* term. So there's no point in doing the proper Karatsuba
	455	* optimisation which computes the middle term using the top
	456	* one, because we'd take as long computing the top one as
	457	* just computing the middle one directly.
	458	*
	459	* So instead, we do a much more obvious thing: we call the
	460	* fully optimised internal_mul to compute a_0 b_0, and we
	461	* recursively call ourself to compute the _bottom halves_ of
	462	* a_1 b_0 and a_0 b_1, each of which we add into the result
	463	* in the obvious way.
	464	*
	465	* In other words, there's no actual Karatsuba _optimisation_
	466	* in this function; the only benefit in doing it this way is
	467	* that we call internal_mul proper for a large part of the
	468	* work, and _that_ can optimise its operation.
	469	*/
	470
	471	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	472	BignumInt *scratch;
	473
	474	/*
	475	* Allocate scratch space for the various bits and pieces
	476	* we're going to be adding together. We need botlen*2 words
	477	* for a_0 b_0 (though we may end up throwing away its topmost
	478	* word), and toplen words for each of a_1 b_0 and a_0 b_1.
	479	* That adds up to exactly 2*len.
	480	*/
	481	scratch = snewn(len*2, BignumInt);
	482
	483	/* a_0 b_0 */
	484	internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen);
	485
	486	/* a_1 b_0 */
	487	internal_mul_low(a, b + len - toplen, scratch + toplen, toplen);
	488
	489	/* a_0 b_1 */
	490	internal_mul_low(a + len - toplen, b, scratch, toplen);
	491
	492	/* Copy the bottom half of the big coefficient into place */
	493	for (j = 0; j < botlen; j++)
	494	c[toplen + j] = scratch[2*toplen + botlen + j];
	495
	496	/* Add the two small coefficients, throwing away the returned carry */
	497	internal_add(scratch, scratch + toplen, scratch, toplen);
	498
	499	/* And add that to the large coefficient, leaving the result in c. */
	500	internal_add(scratch, scratch + 2*toplen + botlen - toplen,
	501	c, toplen);
	502
	503	/* Free scratch. */
	504	for (j = 0; j < len*2; j++)
	505	scratch[j] = 0;
	506	sfree(scratch);
	507
	508	} else {
	509
	510	for (j = 0; j < len; j++)
	511	c[j] = 0;
	512
	513	for (i = len - 1; i >= 0; i--) {
	514	t = 0;
	515	for (j = len - 1; j >= len - i - 1; j--) {
	516	t += MUL_WORD(a[i], (BignumDblInt) b[j]);
	517	t += (BignumDblInt) c[i + j + 1 - len];
	518	c[i + j + 1 - len] = (BignumInt) t;
	519	t = t >> BIGNUM_INT_BITS;
	520	}
	521	}
	522
	523	}
	524	}
	525
	526	/*
	527	* Montgomery reduction. Expects x to be a big-endian array of 2*len
	528	* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
	529	* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
	530	* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
	531	* x' < n.
	532	*
	533	* 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts
	534	* each, containing respectively n and the multiplicative inverse of
	535	* -n mod r.
	536	*
	537	* 'tmp' is an array of at least '3*len' BignumInts used as scratch
	538	* space.
	539	*/
	540	static void monty_reduce(BignumInt x, const BignumInt n,
	541	const BignumInt mninv, BignumInt tmp, int len)
	542	{
	543	int i;
	544	BignumInt carry;
	545
	546	/*
	547	* Multiply x by (-n)^{-1} mod r. This gives us a value m such
	548	* that mn is congruent to -x mod r. Hence, mn+x is an exact
	549	* multiple of r, and is also (obviously) congruent to x mod n.
	550	*/
	551	internal_mul_low(x + len, mninv, tmp, len);
	552
	553	/*
	554	* Compute t = (mn+x)/r in ordinary, non-modular, integer
	555	* arithmetic. By construction this is exact, and is congruent mod
	556	* n to x * r^{-1}, i.e. the answer we want.
	557	*
	558	* The following multiply leaves that answer in the _most_
	559	* significant half of the 'x' array, so then we must shift it
	560	* down.
	561	*/
	562	internal_mul(tmp, n, tmp+len, len);
	563	carry = internal_add(x, tmp+len, x, 2*len);
	564	for (i = 0; i < len; i++)
	565	x[len + i] = x[i], x[i] = 0;
	566
	567	/*
	568	* Reduce t mod n. This doesn't require a full-on division by n,
	569	* but merely a test and single optional subtraction, since we can
	570	* show that 0 <= t < 2n.
	571	*
	572	* Proof:
	573	* + we computed m mod r, so 0 <= m < r.
	574	* + so 0 <= mn < rn, obviously
	575	* + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
	576	* + yielding 0 <= (mn+x)/r < 2n as required.
	577	*/
	578	if (!carry) {
	579	for (i = 0; i < len; i++)
	580	if (x[len + i] != n[i])
	581	break;
	582	}
	583	if (carry \|\| i >= len \|\| x[len + i] > n[i])
	584	internal_sub(x+len, n, x+len, len);
	585	}
	586
	587	static void internal_add_shifted(BignumInt *number,
	588	unsigned n, int shift)
	589	{
	590	int word = 1 + (shift / BIGNUM_INT_BITS);
	591	int bshift = shift % BIGNUM_INT_BITS;
	592	BignumDblInt addend;
	593
	594	addend = (BignumDblInt)n << bshift;
	595
	596	while (addend) {
	597	addend += number[word];
	598	number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
	599	addend >>= BIGNUM_INT_BITS;
	600	word++;
	601	}
	602	}
	603
	604	/*
	605	* Compute a = a % m.
	606	* Input in first alen words of a and first mlen words of m.
	607	* Output in first alen words of a
	608	* (of which first alen-mlen words will be zero).
	609	* The MSW of m MUST have its high bit set.
	610	* Quotient is accumulated in the `quotient' array, which is a Bignum
	611	* rather than the internal bigendian format. Quotient parts are shifted
	612	* left by `qshift' before adding into quot.
	613	*/
	614	static void internal_mod(BignumInt *a, int alen,
	615	BignumInt *m, int mlen,
	616	BignumInt *quot, int qshift)
	617	{
	618	BignumInt m0, m1;
	619	unsigned int h;
	620	int i, k;
	621
	622	m0 = m[0];
	623	if (mlen > 1)
	624	m1 = m[1];
	625	else
	626	m1 = 0;
	627
	628	for (i = 0; i <= alen - mlen; i++) {
	629	BignumDblInt t;
	630	unsigned int q, r, c, ai1;
	631
	632	if (i == 0) {
	633	h = 0;
	634	} else {
	635	h = a[i - 1];
	636	a[i - 1] = 0;
	637	}
	638
	639	if (i == alen - 1)
	640	ai1 = 0;
	641	else
	642	ai1 = a[i + 1];
	643
	644	/* Find q = h:a[i] / m0 */
	645	if (h >= m0) {
	646	/*
	647	* Special case.
	648	*
	649	* To illustrate it, suppose a BignumInt is 8 bits, and
	650	* we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
	651	* our initial division will be 0xA123 / 0xA1, which
	652	* will give a quotient of 0x100 and a divide overflow.
	653	* However, the invariants in this division algorithm
	654	* are not violated, since the full number A1:23:... is
	655	* _less_ than the quotient prefix A1:B2:... and so the
	656	* following correction loop would have sorted it out.
	657	*
	658	* In this situation we set q to be the largest
	659	* quotient we _can_ stomach (0xFF, of course).
	660	*/
	661	q = BIGNUM_INT_MASK;
	662	} else {
	663	/* Macro doesn't want an array subscript expression passed
	664	* into it (see definition), so use a temporary. */
	665	BignumInt tmplo = a[i];
	666	DIVMOD_WORD(q, r, h, tmplo, m0);
	667
	668	/* Refine our estimate of q by looking at
	669	h:a[i]:a[i+1] / m0:m1 */
	670	t = MUL_WORD(m1, q);
	671	if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
	672	q--;
	673	t -= m1;
	674	r = (r + m0) & BIGNUM_INT_MASK; /* overflow? */
	675	if (r >= (BignumDblInt) m0 &&
	676	t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
	677	}
	678	}
	679
	680	/* Subtract q * m from a[i...] */
	681	c = 0;
	682	for (k = mlen - 1; k >= 0; k--) {
	683	t = MUL_WORD(q, m[k]);
	684	t += c;
	685	c = (unsigned)(t >> BIGNUM_INT_BITS);
	686	if ((BignumInt) t > a[i + k])
	687	c++;
	688	a[i + k] -= (BignumInt) t;
	689	}
	690
	691	/* Add back m in case of borrow */
	692	if (c != h) {
	693	t = 0;
	694	for (k = mlen - 1; k >= 0; k--) {
	695	t += m[k];
	696	t += a[i + k];
	697	a[i + k] = (BignumInt) t;
	698	t = t >> BIGNUM_INT_BITS;
	699	}
	700	q--;
	701	}
	702	if (quot)
	703	internal_add_shifted(quot, q, qshift + BIGNUM_INT_BITS * (alen - mlen - i));
	704	}
	705	}
	706
	707	/*
	708	* Compute (base ^ exp) % mod. Uses the Montgomery multiplication
	709	* technique.
	710	*/
	711	Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
	712	{
	713	BignumInt a, b, x, n, mninv, tmp;
	714	int len, i, j;
	715	Bignum base, base2, r, rn, inv, result;
	716
	717	/*
	718	* The most significant word of mod needs to be non-zero. It
	719	* should already be, but let's make sure.
	720	*/
	721	assert(mod[mod[0]] != 0);
	722
	723	/*
	724	* Make sure the base is smaller than the modulus, by reducing
	725	* it modulo the modulus if not.
	726	*/
	727	base = bigmod(base_in, mod);
	728
	729	/*
	730	* mod had better be odd, or we can't do Montgomery multiplication
	731	* using a power of two at all.
	732	*/
	733	assert(mod[1] & 1);
	734
	735	/*
	736	* Compute the inverse of n mod r, for monty_reduce. (In fact we
	737	* want the inverse of _minus_ n mod r, but we'll sort that out
	738	* below.)
	739	*/
	740	len = mod[0];
	741	r = bn_power_2(BIGNUM_INT_BITS * len);
	742	inv = modinv(mod, r);
	743
	744	/*
	745	* Multiply the base by r mod n, to get it into Montgomery
	746	* representation.
	747	*/
	748	base2 = modmul(base, r, mod);
	749	freebn(base);
	750	base = base2;
	751
	752	rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */
	753
	754	freebn(r); /* won't need this any more */
	755
	756	/*
	757	* Set up internal arrays of the right lengths, in big-endian
	758	* format, containing the base, the modulus, and the modulus's
	759	* inverse.
	760	*/
	761	n = snewn(len, BignumInt);
	762	for (j = 0; j < len; j++)
	763	n[len - 1 - j] = mod[j + 1];
	764
	765	mninv = snewn(len, BignumInt);
	766	for (j = 0; j < len; j++)
	767	mninv[len - 1 - j] = (j < inv[0] ? inv[j + 1] : 0);
	768	freebn(inv); /* we don't need this copy of it any more */
	769	/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
	770	x = snewn(len, BignumInt);
	771	for (j = 0; j < len; j++)
	772	x[j] = 0;
	773	internal_sub(x, mninv, mninv, len);
	774
	775	/* x = snewn(len, BignumInt); / / already done above */
	776	for (j = 0; j < len; j++)
	777	x[len - 1 - j] = (j < base[0] ? base[j + 1] : 0);
	778	freebn(base); /* we don't need this copy of it any more */
	779
	780	a = snewn(2*len, BignumInt);
	781	b = snewn(2*len, BignumInt);
	782	for (j = 0; j < len; j++)
	783	a[2*len - 1 - j] = (j < rn[0] ? rn[j + 1] : 0);
	784	freebn(rn);
	785
	786	tmp = snewn(3*len, BignumInt);
	787
	788	/* Skip leading zero bits of exp. */
	789	i = 0;
	790	j = BIGNUM_INT_BITS-1;
	791	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	792	j--;
	793	if (j < 0) {
	794	i++;
	795	j = BIGNUM_INT_BITS-1;
	796	}
	797	}
	798
	799	/* Main computation */
	800	while (i < (int)exp[0]) {
	801	while (j >= 0) {
	802	internal_mul(a + len, a + len, b, len);
	803	monty_reduce(b, n, mninv, tmp, len);
	804	if ((exp[exp[0] - i] & (1 << j)) != 0) {
	805	internal_mul(b + len, x, a, len);
	806	monty_reduce(a, n, mninv, tmp, len);
	807	} else {
	808	BignumInt *t;
	809	t = a;
	810	a = b;
	811	b = t;
	812	}
	813	j--;
	814	}
	815	i++;
	816	j = BIGNUM_INT_BITS-1;
	817	}
	818
	819	/*
	820	* Final monty_reduce to get back from the adjusted Montgomery
	821	* representation.
	822	*/
	823	monty_reduce(a, n, mninv, tmp, len);
	824
	825	/* Copy result to buffer */
	826	result = newbn(mod[0]);
	827	for (i = 0; i < len; i++)
	828	result[result[0] - i] = a[i + len];
	829	while (result[0] > 1 && result[result[0]] == 0)
	830	result[0]--;
	831
	832	/* Free temporary arrays */
	833	for (i = 0; i < 3 * len; i++)
	834	tmp[i] = 0;
	835	sfree(tmp);
	836	for (i = 0; i < 2 * len; i++)
	837	a[i] = 0;
	838	sfree(a);
	839	for (i = 0; i < 2 * len; i++)
	840	b[i] = 0;
	841	sfree(b);
	842	for (i = 0; i < len; i++)
	843	mninv[i] = 0;
	844	sfree(mninv);
	845	for (i = 0; i < len; i++)
	846	n[i] = 0;
	847	sfree(n);
	848	for (i = 0; i < len; i++)
	849	x[i] = 0;
	850	sfree(x);
	851
	852	return result;
	853	}
	854
	855	/*
	856	* Compute (p * q) % mod.
	857	* The most significant word of mod MUST be non-zero.
	858	* We assume that the result array is the same size as the mod array.
	859	*/
	860	Bignum modmul(Bignum p, Bignum q, Bignum mod)
	861	{
	862	BignumInt a, n, m, o;
	863	int mshift;
	864	int pqlen, mlen, rlen, i, j;
	865	Bignum result;
	866
	867	/* Allocate m of size mlen, copy mod to m */
	868	/* We use big endian internally */
	869	mlen = mod[0];
	870	m = snewn(mlen, BignumInt);
	871	for (j = 0; j < mlen; j++)
	872	m[j] = mod[mod[0] - j];
	873
	874	/* Shift m left to make msb bit set */
	875	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	876	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
	877	break;
	878	if (mshift) {
	879	for (i = 0; i < mlen - 1; i++)
	880	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
	881	m[mlen - 1] = m[mlen - 1] << mshift;
	882	}
	883
	884	pqlen = (p[0] > q[0] ? p[0] : q[0]);
	885
	886	/* Allocate n of size pqlen, copy p to n */
	887	n = snewn(pqlen, BignumInt);
	888	i = pqlen - p[0];
	889	for (j = 0; j < i; j++)
	890	n[j] = 0;
	891	for (j = 0; j < (int)p[0]; j++)
	892	n[i + j] = p[p[0] - j];
	893
	894	/* Allocate o of size pqlen, copy q to o */
	895	o = snewn(pqlen, BignumInt);
	896	i = pqlen - q[0];
	897	for (j = 0; j < i; j++)
	898	o[j] = 0;
	899	for (j = 0; j < (int)q[0]; j++)
	900	o[i + j] = q[q[0] - j];
	901
	902	/* Allocate a of size 2pqlen for result /
	903	a = snewn(2 * pqlen, BignumInt);
	904
	905	/* Main computation */
	906	internal_mul(n, o, a, pqlen);
	907	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	908
	909	/* Fixup result in case the modulus was shifted */
	910	if (mshift) {
	911	for (i = 2 * pqlen - mlen - 1; i < 2 * pqlen - 1; i++)
	912	a[i] = (a[i] << mshift) \| (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
	913	a[2 * pqlen - 1] = a[2 * pqlen - 1] << mshift;
	914	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	915	for (i = 2 * pqlen - 1; i >= 2 * pqlen - mlen; i--)
	916	a[i] = (a[i] >> mshift) \| (a[i - 1] << (BIGNUM_INT_BITS - mshift));
	917	}
	918
	919	/* Copy result to buffer */
	920	rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
	921	result = newbn(rlen);
	922	for (i = 0; i < rlen; i++)
	923	result[result[0] - i] = a[i + 2 * pqlen - rlen];
	924	while (result[0] > 1 && result[result[0]] == 0)
	925	result[0]--;
	926
	927	/* Free temporary arrays */
	928	for (i = 0; i < 2 * pqlen; i++)
	929	a[i] = 0;
	930	sfree(a);
	931	for (i = 0; i < mlen; i++)
	932	m[i] = 0;
	933	sfree(m);
	934	for (i = 0; i < pqlen; i++)
	935	n[i] = 0;
	936	sfree(n);
	937	for (i = 0; i < pqlen; i++)
	938	o[i] = 0;
	939	sfree(o);
	940
	941	return result;
	942	}
	943
	944	/*
	945	* Compute p % mod.
	946	* The most significant word of mod MUST be non-zero.
	947	* We assume that the result array is the same size as the mod array.
	948	* We optionally write out a quotient if `quotient' is non-NULL.
	949	* We can avoid writing out the result if `result' is NULL.
	950	*/
	951	static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
	952	{
	953	BignumInt n, m;
	954	int mshift;
	955	int plen, mlen, i, j;
	956
	957	/* Allocate m of size mlen, copy mod to m */
	958	/* We use big endian internally */
	959	mlen = mod[0];
	960	m = snewn(mlen, BignumInt);
	961	for (j = 0; j < mlen; j++)
	962	m[j] = mod[mod[0] - j];
	963
	964	/* Shift m left to make msb bit set */
	965	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	966	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
	967	break;
	968	if (mshift) {
	969	for (i = 0; i < mlen - 1; i++)
	970	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
	971	m[mlen - 1] = m[mlen - 1] << mshift;
	972	}
	973
	974	plen = p[0];
	975	/* Ensure plen > mlen */
	976	if (plen <= mlen)
	977	plen = mlen + 1;
	978
	979	/* Allocate n of size plen, copy p to n */
	980	n = snewn(plen, BignumInt);
	981	for (j = 0; j < plen; j++)
	982	n[j] = 0;
	983	for (j = 1; j <= (int)p[0]; j++)
	984	n[plen - j] = p[j];
	985
	986	/* Main computation */
	987	internal_mod(n, plen, m, mlen, quotient, mshift);
	988
	989	/* Fixup result in case the modulus was shifted */
	990	if (mshift) {
	991	for (i = plen - mlen - 1; i < plen - 1; i++)
	992	n[i] = (n[i] << mshift) \| (n[i + 1] >> (BIGNUM_INT_BITS - mshift));
	993	n[plen - 1] = n[plen - 1] << mshift;
	994	internal_mod(n, plen, m, mlen, quotient, 0);
	995	for (i = plen - 1; i >= plen - mlen; i--)
	996	n[i] = (n[i] >> mshift) \| (n[i - 1] << (BIGNUM_INT_BITS - mshift));
	997	}
	998
	999	/* Copy result to buffer */
	1000	if (result) {
	1001	for (i = 1; i <= (int)result[0]; i++) {
	1002	int j = plen - i;
	1003	result[i] = j >= 0 ? n[j] : 0;
	1004	}
	1005	}
	1006
	1007	/* Free temporary arrays */
	1008	for (i = 0; i < mlen; i++)
	1009	m[i] = 0;
	1010	sfree(m);
	1011	for (i = 0; i < plen; i++)
	1012	n[i] = 0;
	1013	sfree(n);
	1014	}
	1015
	1016	/*
	1017	* Decrement a number.
	1018	*/
	1019	void decbn(Bignum bn)
	1020	{
	1021	int i = 1;
	1022	while (i < (int)bn[0] && bn[i] == 0)
	1023	bn[i++] = BIGNUM_INT_MASK;
	1024	bn[i]--;
	1025	}
	1026
	1027	Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
	1028	{
	1029	Bignum result;
	1030	int w, i;
	1031
	1032	w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
	1033
	1034	result = newbn(w);
	1035	for (i = 1; i <= w; i++)
	1036	result[i] = 0;
	1037	for (i = nbytes; i--;) {
	1038	unsigned char byte = *data++;
	1039	result[1 + i / BIGNUM_INT_BYTES] \|= byte << (8*i % BIGNUM_INT_BITS);
	1040	}
	1041
	1042	while (result[0] > 1 && result[result[0]] == 0)
	1043	result[0]--;
	1044	return result;
	1045	}
	1046
	1047	/*
	1048	* Read an SSH-1-format bignum from a data buffer. Return the number
	1049	* of bytes consumed, or -1 if there wasn't enough data.
	1050	*/
	1051	int ssh1_read_bignum(const unsigned char data, int len, Bignum result)
	1052	{
	1053	const unsigned char *p = data;
	1054	int i;
	1055	int w, b;
	1056
	1057	if (len < 2)
	1058	return -1;
	1059
	1060	w = 0;
	1061	for (i = 0; i < 2; i++)
	1062	w = (w << 8) + *p++;
	1063	b = (w + 7) / 8; /* bits -> bytes */
	1064
	1065	if (len < b+2)
	1066	return -1;
	1067
	1068	if (!result) /* just return length */
	1069	return b + 2;
	1070
	1071	*result = bignum_from_bytes(p, b);
	1072
	1073	return p + b - data;
	1074	}
	1075
	1076	/*
	1077	* Return the bit count of a bignum, for SSH-1 encoding.
	1078	*/
	1079	int bignum_bitcount(Bignum bn)
	1080	{
	1081	int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
	1082	while (bitcount >= 0
	1083	&& (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
	1084	return bitcount + 1;
	1085	}
	1086
	1087	/*
	1088	* Return the byte length of a bignum when SSH-1 encoded.
	1089	*/
	1090	int ssh1_bignum_length(Bignum bn)
	1091	{
	1092	return 2 + (bignum_bitcount(bn) + 7) / 8;
	1093	}
	1094
	1095	/*
	1096	* Return the byte length of a bignum when SSH-2 encoded.
	1097	*/
	1098	int ssh2_bignum_length(Bignum bn)
	1099	{
	1100	return 4 + (bignum_bitcount(bn) + 8) / 8;
	1101	}
	1102
	1103	/*
	1104	* Return a byte from a bignum; 0 is least significant, etc.
	1105	*/
	1106	int bignum_byte(Bignum bn, int i)
	1107	{
	1108	if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
	1109	return 0; /* beyond the end */
	1110	else
	1111	return (bn[i / BIGNUM_INT_BYTES + 1] >>
	1112	((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
	1113	}
	1114
	1115	/*
	1116	* Return a bit from a bignum; 0 is least significant, etc.
	1117	*/
	1118	int bignum_bit(Bignum bn, int i)
	1119	{
	1120	if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
	1121	return 0; /* beyond the end */
	1122	else
	1123	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
	1124	}
	1125
	1126	/*
	1127	* Set a bit in a bignum; 0 is least significant, etc.
	1128	*/
	1129	void bignum_set_bit(Bignum bn, int bitnum, int value)
	1130	{
	1131	if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
	1132	abort(); /* beyond the end */
	1133	else {
	1134	int v = bitnum / BIGNUM_INT_BITS + 1;
	1135	int mask = 1 << (bitnum % BIGNUM_INT_BITS);
	1136	if (value)
	1137	bn[v] \|= mask;
	1138	else
	1139	bn[v] &= ~mask;
	1140	}
	1141	}
	1142
	1143	/*
	1144	* Write a SSH-1-format bignum into a buffer. It is assumed the
	1145	* buffer is big enough. Returns the number of bytes used.
	1146	*/
	1147	int ssh1_write_bignum(void *data, Bignum bn)
	1148	{
	1149	unsigned char *p = data;
	1150	int len = ssh1_bignum_length(bn);
	1151	int i;
	1152	int bitc = bignum_bitcount(bn);
	1153
	1154	*p++ = (bitc >> 8) & 0xFF;
	1155	*p++ = (bitc) & 0xFF;
	1156	for (i = len - 2; i--;)
	1157	*p++ = bignum_byte(bn, i);
	1158	return len;
	1159	}
	1160
	1161	/*
	1162	* Compare two bignums. Returns like strcmp.
	1163	*/
	1164	int bignum_cmp(Bignum a, Bignum b)
	1165	{
	1166	int amax = a[0], bmax = b[0];
	1167	int i = (amax > bmax ? amax : bmax);
	1168	while (i) {
	1169	BignumInt aval = (i > amax ? 0 : a[i]);
	1170	BignumInt bval = (i > bmax ? 0 : b[i]);
	1171	if (aval < bval)
	1172	return -1;
	1173	if (aval > bval)
	1174	return +1;
	1175	i--;
	1176	}
	1177	return 0;
	1178	}
	1179
	1180	/*
	1181	* Right-shift one bignum to form another.
	1182	*/
	1183	Bignum bignum_rshift(Bignum a, int shift)
	1184	{
	1185	Bignum ret;
	1186	int i, shiftw, shiftb, shiftbb, bits;
	1187	BignumInt ai, ai1;
	1188
	1189	bits = bignum_bitcount(a) - shift;
	1190	ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
	1191
	1192	if (ret) {
	1193	shiftw = shift / BIGNUM_INT_BITS;
	1194	shiftb = shift % BIGNUM_INT_BITS;
	1195	shiftbb = BIGNUM_INT_BITS - shiftb;
	1196
	1197	ai1 = a[shiftw + 1];
	1198	for (i = 1; i <= (int)ret[0]; i++) {
	1199	ai = ai1;
	1200	ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
	1201	ret[i] = ((ai >> shiftb) \| (ai1 << shiftbb)) & BIGNUM_INT_MASK;
	1202	}
	1203	}
	1204
	1205	return ret;
	1206	}
	1207
	1208	/*
	1209	* Non-modular multiplication and addition.
	1210	*/
	1211	Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
	1212	{
	1213	int alen = a[0], blen = b[0];
	1214	int mlen = (alen > blen ? alen : blen);
	1215	int rlen, i, maxspot;
	1216	BignumInt *workspace;
	1217	Bignum ret;
	1218
	1219	/* mlen space for a, mlen space for b, 2mlen for result /
	1220	workspace = snewn(mlen * 4, BignumInt);
	1221	for (i = 0; i < mlen; i++) {
	1222	workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);
	1223	workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);
	1224	}
	1225
	1226	internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
	1227	workspace + 2 * mlen, mlen);
	1228
	1229	/* now just copy the result back */
	1230	rlen = alen + blen + 1;
	1231	if (addend && rlen <= (int)addend[0])
	1232	rlen = addend[0] + 1;
	1233	ret = newbn(rlen);
	1234	maxspot = 0;
	1235	for (i = 1; i <= (int)ret[0]; i++) {
	1236	ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);
	1237	if (ret[i] != 0)
	1238	maxspot = i;
	1239	}
	1240	ret[0] = maxspot;
	1241
	1242	/* now add in the addend, if any */
	1243	if (addend) {
	1244	BignumDblInt carry = 0;
	1245	for (i = 1; i <= rlen; i++) {
	1246	carry += (i <= (int)ret[0] ? ret[i] : 0);
	1247	carry += (i <= (int)addend[0] ? addend[i] : 0);
	1248	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1249	carry >>= BIGNUM_INT_BITS;
	1250	if (ret[i] != 0 && i > maxspot)
	1251	maxspot = i;
	1252	}
	1253	}
	1254	ret[0] = maxspot;
	1255
	1256	sfree(workspace);
	1257	return ret;
	1258	}
	1259
	1260	/*
	1261	* Non-modular multiplication.
	1262	*/
	1263	Bignum bigmul(Bignum a, Bignum b)
	1264	{
	1265	return bigmuladd(a, b, NULL);
	1266	}
	1267
	1268	/*
	1269	* Simple addition.
	1270	*/
	1271	Bignum bigadd(Bignum a, Bignum b)
	1272	{
	1273	int alen = a[0], blen = b[0];
	1274	int rlen = (alen > blen ? alen : blen) + 1;
	1275	int i, maxspot;
	1276	Bignum ret;
	1277	BignumDblInt carry;
	1278
	1279	ret = newbn(rlen);
	1280
	1281	carry = 0;
	1282	maxspot = 0;
	1283	for (i = 1; i <= rlen; i++) {
	1284	carry += (i <= (int)a[0] ? a[i] : 0);
	1285	carry += (i <= (int)b[0] ? b[i] : 0);
	1286	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1287	carry >>= BIGNUM_INT_BITS;
	1288	if (ret[i] != 0 && i > maxspot)
	1289	maxspot = i;
	1290	}
	1291	ret[0] = maxspot;
	1292
	1293	return ret;
	1294	}
	1295
	1296	/*
	1297	* Subtraction. Returns a-b, or NULL if the result would come out
	1298	* negative (recall that this entire bignum module only handles
	1299	* positive numbers).
	1300	*/
	1301	Bignum bigsub(Bignum a, Bignum b)
	1302	{
	1303	int alen = a[0], blen = b[0];
	1304	int rlen = (alen > blen ? alen : blen);
	1305	int i, maxspot;
	1306	Bignum ret;
	1307	BignumDblInt carry;
	1308
	1309	ret = newbn(rlen);
	1310
	1311	carry = 1;
	1312	maxspot = 0;
	1313	for (i = 1; i <= rlen; i++) {
	1314	carry += (i <= (int)a[0] ? a[i] : 0);
	1315	carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
	1316	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1317	carry >>= BIGNUM_INT_BITS;
	1318	if (ret[i] != 0 && i > maxspot)
	1319	maxspot = i;
	1320	}
	1321	ret[0] = maxspot;
	1322
	1323	if (!carry) {
	1324	freebn(ret);
	1325	return NULL;
	1326	}
	1327
	1328	return ret;
	1329	}
	1330
	1331	/*
	1332	* Create a bignum which is the bitmask covering another one. That
	1333	* is, the smallest integer which is >= N and is also one less than
	1334	* a power of two.
	1335	*/
	1336	Bignum bignum_bitmask(Bignum n)
	1337	{
	1338	Bignum ret = copybn(n);
	1339	int i;
	1340	BignumInt j;
	1341
	1342	i = ret[0];
	1343	while (n[i] == 0 && i > 0)
	1344	i--;
	1345	if (i <= 0)
	1346	return ret; /* input was zero */
	1347	j = 1;
	1348	while (j < n[i])
	1349	j = 2 * j + 1;
	1350	ret[i] = j;
	1351	while (--i > 0)
	1352	ret[i] = BIGNUM_INT_MASK;
	1353	return ret;
	1354	}
	1355
	1356	/*
	1357	* Convert a (max 32-bit) long into a bignum.
	1358	*/
	1359	Bignum bignum_from_long(unsigned long nn)
	1360	{
	1361	Bignum ret;
	1362	BignumDblInt n = nn;
	1363
	1364	ret = newbn(3);
	1365	ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
	1366	ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
	1367	ret[3] = 0;
	1368	ret[0] = (ret[2] ? 2 : 1);
	1369	return ret;
	1370	}
	1371
	1372	/*
	1373	* Add a long to a bignum.
	1374	*/
	1375	Bignum bignum_add_long(Bignum number, unsigned long addendx)
	1376	{
	1377	Bignum ret = newbn(number[0] + 1);
	1378	int i, maxspot = 0;
	1379	BignumDblInt carry = 0, addend = addendx;
	1380
	1381	for (i = 1; i <= (int)ret[0]; i++) {
	1382	carry += addend & BIGNUM_INT_MASK;
	1383	carry += (i <= (int)number[0] ? number[i] : 0);
	1384	addend >>= BIGNUM_INT_BITS;
	1385	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1386	carry >>= BIGNUM_INT_BITS;
	1387	if (ret[i] != 0)
	1388	maxspot = i;
	1389	}
	1390	ret[0] = maxspot;
	1391	return ret;
	1392	}
	1393
	1394	/*
	1395	* Compute the residue of a bignum, modulo a (max 16-bit) short.
	1396	*/
	1397	unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
	1398	{
	1399	BignumDblInt mod, r;
	1400	int i;
	1401
	1402	r = 0;
	1403	mod = modulus;
	1404	for (i = number[0]; i > 0; i--)
	1405	r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
	1406	return (unsigned short) r;
	1407	}
	1408
	1409	#ifdef DEBUG
	1410	void diagbn(char *prefix, Bignum md)
	1411	{
	1412	int i, nibbles, morenibbles;
	1413	static const char hex[] = "0123456789ABCDEF";
	1414
	1415	debug(("%s0x", prefix ? prefix : ""));
	1416
	1417	nibbles = (3 + bignum_bitcount(md)) / 4;
	1418	if (nibbles < 1)
	1419	nibbles = 1;
	1420	morenibbles = 4 * md[0] - nibbles;
	1421	for (i = 0; i < morenibbles; i++)
	1422	debug(("-"));
	1423	for (i = nibbles; i--;)
	1424	debug(("%c",
	1425	hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
	1426
	1427	if (prefix)
	1428	debug(("\n"));
	1429	}
	1430	#endif
	1431
	1432	/*
	1433	* Simple division.
	1434	*/
	1435	Bignum bigdiv(Bignum a, Bignum b)
	1436	{
	1437	Bignum q = newbn(a[0]);
	1438	bigdivmod(a, b, NULL, q);
	1439	return q;
	1440	}
	1441
	1442	/*
	1443	* Simple remainder.
	1444	*/
	1445	Bignum bigmod(Bignum a, Bignum b)
	1446	{
	1447	Bignum r = newbn(b[0]);
	1448	bigdivmod(a, b, r, NULL);
	1449	return r;
	1450	}
	1451
	1452	/*
	1453	* Greatest common divisor.
	1454	*/
	1455	Bignum biggcd(Bignum av, Bignum bv)
	1456	{
	1457	Bignum a = copybn(av);
	1458	Bignum b = copybn(bv);
	1459
	1460	while (bignum_cmp(b, Zero) != 0) {
	1461	Bignum t = newbn(b[0]);
	1462	bigdivmod(a, b, t, NULL);
	1463	while (t[0] > 1 && t[t[0]] == 0)
	1464	t[0]--;
	1465	freebn(a);
	1466	a = b;
	1467	b = t;
	1468	}
	1469
	1470	freebn(b);
	1471	return a;
	1472	}
	1473
	1474	/*
	1475	* Modular inverse, using Euclid's extended algorithm.
	1476	*/
	1477	Bignum modinv(Bignum number, Bignum modulus)
	1478	{
	1479	Bignum a = copybn(modulus);
	1480	Bignum b = copybn(number);
	1481	Bignum xp = copybn(Zero);
	1482	Bignum x = copybn(One);
	1483	int sign = +1;
	1484
	1485	while (bignum_cmp(b, One) != 0) {
	1486	Bignum t = newbn(b[0]);
	1487	Bignum q = newbn(a[0]);
	1488	bigdivmod(a, b, t, q);
	1489	while (t[0] > 1 && t[t[0]] == 0)
	1490	t[0]--;
	1491	freebn(a);
	1492	a = b;
	1493	b = t;
	1494	t = xp;
	1495	xp = x;
	1496	x = bigmuladd(q, xp, t);
	1497	sign = -sign;
	1498	freebn(t);
	1499	freebn(q);
	1500	}
	1501
	1502	freebn(b);
	1503	freebn(a);
	1504	freebn(xp);
	1505
	1506	/* now we know that sign * x == 1, and that x < modulus */
	1507	if (sign < 0) {
	1508	/* set a new x to be modulus - x */
	1509	Bignum newx = newbn(modulus[0]);
	1510	BignumInt carry = 0;
	1511	int maxspot = 1;
	1512	int i;
	1513
	1514	for (i = 1; i <= (int)newx[0]; i++) {
	1515	BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
	1516	BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
	1517	newx[i] = aword - bword - carry;
	1518	bword = ~bword;
	1519	carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
	1520	if (newx[i] != 0)
	1521	maxspot = i;
	1522	}
	1523	newx[0] = maxspot;
	1524	freebn(x);
	1525	x = newx;
	1526	}
	1527
	1528	/* and return. */
	1529	return x;
	1530	}
	1531
	1532	/*
	1533	* Render a bignum into decimal. Return a malloced string holding
	1534	* the decimal representation.
	1535	*/
	1536	char *bignum_decimal(Bignum x)
	1537	{
	1538	int ndigits, ndigit;
	1539	int i, iszero;
	1540	BignumDblInt carry;
	1541	char *ret;
	1542	BignumInt *workspace;
	1543
	1544	/*
	1545	* First, estimate the number of digits. Since log(10)/log(2)
	1546	* is just greater than 93/28 (the joys of continued fraction
	1547	* approximations...) we know that for every 93 bits, we need
	1548	* at most 28 digits. This will tell us how much to malloc.
	1549	*
	1550	* Formally: if x has i bits, that means x is strictly less
	1551	* than 2^i. Since 2 is less than 10^(28/93), this is less than
	1552	* 10^(28i/93). We need an integer power of ten, so we must
	1553	* round up (rounding down might make it less than x again).
	1554	* Therefore if we multiply the bit count by 28/93, rounding
	1555	* up, we will have enough digits.
	1556	*
	1557	* i=0 (i.e., x=0) is an irritating special case.
	1558	*/
	1559	i = bignum_bitcount(x);
	1560	if (!i)
	1561	ndigits = 1; /* x = 0 */
	1562	else
	1563	ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */
	1564	ndigits++; /* allow for trailing \0 */
	1565	ret = snewn(ndigits, char);
	1566
	1567	/*
	1568	* Now allocate some workspace to hold the binary form as we
	1569	* repeatedly divide it by ten. Initialise this to the
	1570	* big-endian form of the number.
	1571	*/
	1572	workspace = snewn(x[0], BignumInt);
	1573	for (i = 0; i < (int)x[0]; i++)
	1574	workspace[i] = x[x[0] - i];
	1575
	1576	/*
	1577	* Next, write the decimal number starting with the last digit.
	1578	* We use ordinary short division, dividing 10 into the
	1579	* workspace.
	1580	*/
	1581	ndigit = ndigits - 1;
	1582	ret[ndigit] = '\0';
	1583	do {
	1584	iszero = 1;
	1585	carry = 0;
	1586	for (i = 0; i < (int)x[0]; i++) {
	1587	carry = (carry << BIGNUM_INT_BITS) + workspace[i];
	1588	workspace[i] = (BignumInt) (carry / 10);
	1589	if (workspace[i])
	1590	iszero = 0;
	1591	carry %= 10;
	1592	}
	1593	ret[--ndigit] = (char) (carry + '0');
	1594	} while (!iszero);
	1595
	1596	/*
	1597	* There's a chance we've fallen short of the start of the
	1598	* string. Correct if so.
	1599	*/
	1600	if (ndigit > 0)
	1601	memmove(ret, ret + ndigit, ndigits - ndigit);
	1602
	1603	/*
	1604	* Done.
	1605	*/
	1606	sfree(workspace);
	1607	return ret;
	1608	}
	1609
	1610	#ifdef TESTBN
	1611
	1612	#include <stdio.h>
	1613	#include <stdlib.h>
	1614	#include <ctype.h>
	1615
	1616	/*
	1617	* gcc -g -O0 -DTESTBN -o testbn sshbn.c misc.c -I unix -I charset
	1618	*/
	1619
	1620	void modalfatalbox(char *p, ...)
	1621	{
	1622	va_list ap;
	1623	fprintf(stderr, "FATAL ERROR: ");
	1624	va_start(ap, p);
	1625	vfprintf(stderr, p, ap);
	1626	va_end(ap);
	1627	fputc('\n', stderr);
	1628	exit(1);
	1629	}
	1630
	1631	#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
	1632
	1633	int main(int argc, char **argv)
	1634	{
	1635	char *buf;
	1636	int line = 0;
	1637	int passes = 0, fails = 0;
	1638
	1639	while ((buf = fgetline(stdin)) != NULL) {
	1640	int maxlen = strlen(buf);
	1641	unsigned char *data = snewn(maxlen, unsigned char);
	1642	unsigned char ptrs[4], q;
	1643	int ptrnum;
	1644	char *bufp = buf;
	1645
	1646	line++;
	1647
	1648	q = data;
	1649	ptrnum = 0;
	1650
	1651	while (*bufp) {
	1652	char start, end;
	1653	int i;
	1654
	1655	while (bufp && !isxdigit((unsigned char)bufp))
	1656	bufp++;
	1657	start = bufp;
	1658
	1659	if (!*bufp)
	1660	break;
	1661
	1662	while (bufp && isxdigit((unsigned char)bufp))
	1663	bufp++;
	1664	end = bufp;
	1665
	1666	if (ptrnum >= lenof(ptrs))
	1667	break;
	1668	ptrs[ptrnum++] = q;
	1669
	1670	for (i = -((end - start) & 1); i < end-start; i += 2) {
	1671	unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
	1672	val = val * 16 + fromxdigit(start[i+1]);
	1673	*q++ = val;
	1674	}
	1675
	1676	ptrs[ptrnum] = q;
	1677	}
	1678
	1679	if (ptrnum == 3) {
	1680	Bignum a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1681	Bignum b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1682	Bignum c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1683	Bignum p = bigmul(a, b);
	1684
	1685	if (bignum_cmp(c, p) == 0) {
	1686	passes++;
	1687	} else {
	1688	char *as = bignum_decimal(a);
	1689	char *bs = bignum_decimal(b);
	1690	char *cs = bignum_decimal(c);
	1691	char *ps = bignum_decimal(p);
	1692
	1693	printf("%d: fail: %s * %s gave %s expected %s\n",
	1694	line, as, bs, ps, cs);
	1695	fails++;
	1696
	1697	sfree(as);
	1698	sfree(bs);
	1699	sfree(cs);
	1700	sfree(ps);
	1701	}
	1702	freebn(a);
	1703	freebn(b);
	1704	freebn(c);
	1705	freebn(p);
	1706	}
	1707	sfree(buf);
	1708	sfree(data);
	1709	}
	1710
	1711	printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
	1712	return fails != 0;
	1713	}
	1714
	1715	#endif