git.distorted.org.uk Git - u/mdw/putty/blame

Commit	Line	Data
e5574168	1	/*
	2	* Bignum routines for RSA and DH and stuff.
	3	*/
	4
	5	#include <stdio.h>
ed953b91	6	#include <assert.h>
e5574168	7	#include <stdlib.h>
e5574168	8	#include <string.h>
551a4acb	9	#include <limits.h>
e5574168	10
5c72ca61	11	#include "misc.h"
98ba26b9	12
819a22b3	13	/*
	14	* Usage notes:
	15	* * Do not call the DIVMOD_WORD macro with expressions such as array
	16	* subscripts, as some implementations object to this (see below).
	17	* * Note that none of the division methods below will cope if the
	18	* quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
	19	* to avoid this case.
	20	* If this condition occurs, in the case of the x86 DIV instruction,
	21	* an overflow exception will occur, which (according to a correspondent)
	22	* will manifest on Windows as something like
	23	* 0xC0000095: Integer overflow
	24	* The C variant won't give the right answer, either.
	25	*/
	26
a3412f52	27	#if defined __GNUC__ && defined __i386__
	28	typedef unsigned long BignumInt;
	29	typedef unsigned long long BignumDblInt;
	30	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	31	#define BIGNUM_TOP_BIT 0x80000000UL
	32	#define BIGNUM_INT_BITS 32
	33	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
a47e8bba	34	#define DIVMOD_WORD(q, r, hi, lo, w) \
	35	__asm__("div %2" : \
	36	"=d" (r), "=a" (q) : \
	37	"r" (w), "d" (hi), "a" (lo))
036eddfb	38	#elif defined _MSC_VER && defined _M_IX86
	39	typedef unsigned __int32 BignumInt;
	40	typedef unsigned __int64 BignumDblInt;
	41	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	42	#define BIGNUM_TOP_BIT 0x80000000UL
	43	#define BIGNUM_INT_BITS 32
	44	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
819a22b3	45	/* Note: MASM interprets array subscripts in the macro arguments as
	46	* assembler syntax, which gives the wrong answer. Don't supply them.
	47	* <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
036eddfb	48	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
819a22b3	49	__asm mov edx, hi \
	50	__asm mov eax, lo \
	51	__asm div w \
	52	__asm mov r, edx \
	53	__asm mov q, eax \
	54	} while(0)
32e51f76	55	#elif defined _LP64
	56	/* 64-bit architectures can do 32x32->64 chunks at a time */
	57	typedef unsigned int BignumInt;
	58	typedef unsigned long BignumDblInt;
	59	#define BIGNUM_INT_MASK 0xFFFFFFFFU
	60	#define BIGNUM_TOP_BIT 0x80000000U
	61	#define BIGNUM_INT_BITS 32
	62	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	63	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	64	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	65	q = n / w; \
	66	r = n % w; \
	67	} while (0)
	68	#elif defined _LLP64
	69	/* 64-bit architectures in which unsigned long is 32 bits, not 64 */
	70	typedef unsigned long BignumInt;
	71	typedef unsigned long long BignumDblInt;
	72	#define BIGNUM_INT_MASK 0xFFFFFFFFUL
	73	#define BIGNUM_TOP_BIT 0x80000000UL
	74	#define BIGNUM_INT_BITS 32
	75	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
	76	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	77	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	78	q = n / w; \
	79	r = n % w; \
	80	} while (0)
a3412f52	81	#else
32e51f76	82	/* Fallback for all other cases */
a3412f52	83	typedef unsigned short BignumInt;
	84	typedef unsigned long BignumDblInt;
	85	#define BIGNUM_INT_MASK 0xFFFFU
	86	#define BIGNUM_TOP_BIT 0x8000U
	87	#define BIGNUM_INT_BITS 16
	88	#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
a47e8bba	89	#define DIVMOD_WORD(q, r, hi, lo, w) do { \
	90	BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) \| lo; \
	91	q = n / w; \
	92	r = n % w; \
	93	} while (0)
a3412f52	94	#endif
	95
	96	#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
	97
3709bfe9	98	#define BIGNUM_INTERNAL
a3412f52	99	typedef BignumInt *Bignum;
3709bfe9	100
e5574168	101	#include "ssh.h"
e5574168	102
a3412f52	103	BignumInt bnZero[1] = { 0 };
a3412f52	104	BignumInt bnOne[2] = { 1, 1 };
e5574168	105
7d6ee6ff	106	/*
a3412f52	107	* The Bignum format is an array of `BignumInt'. The first
7d6ee6ff	108	* element of the array counts the remaining elements. The
a3412f52	109	* remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
7d6ee6ff	110	* significant digit first. (So it's trivial to extract the bit
	111	* with value 2^n for any n.)
	112	*
	113	* All Bignums in this module are positive. Negative numbers must
	114	* be dealt with outside it.
	115	*
	116	* INVARIANT: the most significant word of any Bignum must be
	117	* nonzero.
	118	*/
	119
7cca0d81	120	Bignum Zero = bnZero, One = bnOne;
e5574168	121
32874aea	122	static Bignum newbn(int length)
32874aea	123	{
551a4acb	124	Bignum b;
	125
	126	assert(length >= 0 && length < INT_MAX / BIGNUM_INT_BITS);
	127
	128	b = snewn(length + 1, BignumInt);
e5574168	129	if (!b)
e5574168	130	abort(); /* FIXME */
32874aea	131	memset(b, 0, (length + 1) * sizeof(*b));
e5574168	132	b[0] = length;
	133	return b;
	134	}
	135
32874aea	136	void bn_restore_invariant(Bignum b)
	137	{
	138	while (b[0] > 1 && b[b[0]] == 0)
	139	b[0]--;
3709bfe9	140	}
3709bfe9	141
32874aea	142	Bignum copybn(Bignum orig)
32874aea	143	{
a3412f52	144	Bignum b = snewn(orig[0] + 1, BignumInt);
7cca0d81	145	if (!b)
7cca0d81	146	abort(); /* FIXME */
32874aea	147	memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
7cca0d81	148	return b;
	149	}
	150
32874aea	151	void freebn(Bignum b)
32874aea	152	{
e5574168	153	/*
	154	* Burn the evidence, just in case.
	155	*/
dfb88efd	156	smemclr(b, sizeof(b[0]) * (b[0] + 1));
dcbde236	157	sfree(b);
e5574168	158	}
e5574168	159
32874aea	160	Bignum bn_power_2(int n)
32874aea	161	{
551a4acb	162	Bignum ret;
	163
	164	assert(n >= 0);
	165
	166	ret = newbn(n / BIGNUM_INT_BITS + 1);
3709bfe9	167	bignum_set_bit(ret, n, 1);
	168	return ret;
	169	}
	170
e5574168	171	/*
0c431b2f	172	* Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
	173	* big-endian arrays of 'len' BignumInts. Returns a BignumInt carried
	174	* off the top.
	175	*/
	176	static BignumInt internal_add(const BignumInt a, const BignumInt b,
	177	BignumInt *c, int len)
	178	{
	179	int i;
	180	BignumDblInt carry = 0;
	181
	182	for (i = len-1; i >= 0; i--) {
	183	carry += (BignumDblInt)a[i] + b[i];
	184	c[i] = (BignumInt)carry;
	185	carry >>= BIGNUM_INT_BITS;
	186	}
	187
	188	return (BignumInt)carry;
	189	}
	190
	191	/*
	192	* Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
	193	* all big-endian arrays of 'len' BignumInts. Any borrow from the top
	194	* is ignored.
	195	*/
	196	static void internal_sub(const BignumInt a, const BignumInt b,
	197	BignumInt *c, int len)
	198	{
	199	int i;
	200	BignumDblInt carry = 1;
	201
	202	for (i = len-1; i >= 0; i--) {
	203	carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
	204	c[i] = (BignumInt)carry;
	205	carry >>= BIGNUM_INT_BITS;
	206	}
	207	}
	208
	209	/*
e5574168	210	* Compute c = a * b.
	211	* Input is in the first len words of a and b.
	212	* Result is returned in the first 2*len words of c.
5a502a19	213	*
	214	* 'scratch' must point to an array of BignumInt of size at least
	215	* mul_compute_scratch(len). (This covers the needs of internal_mul
	216	* and all its recursive calls to itself.)
e5574168	217	*/
0c431b2f	218	#define KARATSUBA_THRESHOLD 50
5a502a19	219	static int mul_compute_scratch(int len)
	220	{
	221	int ret = 0;
	222	while (len > KARATSUBA_THRESHOLD) {
	223	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	224	int midlen = botlen + 1;
	225	ret += 4*midlen;
	226	len = midlen;
	227	}
	228	return ret;
	229	}
132c534f	230	static void internal_mul(const BignumInt a, const BignumInt b,
5a502a19	231	BignumInt c, int len, BignumInt scratch)
e5574168	232	{
0c431b2f	233	if (len > KARATSUBA_THRESHOLD) {
757b0110	234	int i;
0c431b2f	235
	236	/*
	237	* Karatsuba divide-and-conquer algorithm. Cut each input in
	238	* half, so that it's expressed as two big 'digits' in a giant
	239	* base D:
	240	*
	241	* a = a_1 D + a_0
	242	* b = b_1 D + b_0
	243	*
	244	* Then the product is of course
	245	*
	246	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	247	*
	248	* and we compute the three coefficients by recursively
	249	* calling ourself to do half-length multiplications.
	250	*
	251	* The clever bit that makes this worth doing is that we only
	252	* need _one_ half-length multiplication for the central
	253	* coefficient rather than the two that it obviouly looks
	254	* like, because we can use a single multiplication to compute
	255	*
	256	* (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
	257	*
	258	* and then we subtract the other two coefficients (a_1 b_1
	259	* and a_0 b_0) which we were computing anyway.
	260	*
	261	* Hence we get to multiply two numbers of length N in about
	262	* three times as much work as it takes to multiply numbers of
	263	* length N/2, which is obviously better than the four times
	264	* as much work it would take if we just did a long
	265	* conventional multiply.
	266	*/
	267
	268	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
	269	int midlen = botlen + 1;
0c431b2f	270	BignumDblInt carry;
f3c29e34	271	#ifdef KARA_DEBUG
	272	int i;
	273	#endif
0c431b2f	274
	275	/*
	276	* The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
	277	* in the output array, so we can compute them immediately in
	278	* place.
	279	*/
	280
f3c29e34	281	#ifdef KARA_DEBUG
	282	printf("a1,a0 = 0x");
	283	for (i = 0; i < len; i++) {
	284	if (i == toplen) printf(", 0x");
	285	printf("%0*x", BIGNUM_INT_BITS/4, a[i]);
	286	}
	287	printf("\n");
	288	printf("b1,b0 = 0x");
	289	for (i = 0; i < len; i++) {
	290	if (i == toplen) printf(", 0x");
	291	printf("%0*x", BIGNUM_INT_BITS/4, b[i]);
	292	}
	293	printf("\n");
	294	#endif
	295
0c431b2f	296	/* a_1 b_1 */
5a502a19	297	internal_mul(a, b, c, toplen, scratch);
f3c29e34	298	#ifdef KARA_DEBUG
	299	printf("a1b1 = 0x");
	300	for (i = 0; i < 2*toplen; i++) {
	301	printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
	302	}
	303	printf("\n");
	304	#endif
0c431b2f	305
0c431b2f	306	/* a_0 b_0 */
5a502a19	307	internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch);
f3c29e34	308	#ifdef KARA_DEBUG
	309	printf("a0b0 = 0x");
	310	for (i = 0; i < 2*botlen; i++) {
	311	printf("%0x", BIGNUM_INT_BITS/4, c[2toplen+i]);
	312	}
	313	printf("\n");
	314	#endif
0c431b2f	315
0c431b2f	316	/* Zero padding. midlen exceeds toplen by at most 2, so just
	317	* zero the first two words of each input and the rest will be
	318	* copied over. */
	319	scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0;
	320
757b0110	321	for (i = 0; i < toplen; i++) {
	322	scratch[midlen - toplen + i] = a[i]; /* a_1 */
	323	scratch[2midlen - toplen + i] = b[i]; / b_1 */
0c431b2f	324	}
	325
	326	/* compute a_1 + a_0 */
	327	scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen);
f3c29e34	328	#ifdef KARA_DEBUG
	329	printf("a1plusa0 = 0x");
	330	for (i = 0; i < midlen; i++) {
	331	printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
	332	}
	333	printf("\n");
	334	#endif
0c431b2f	335	/* compute b_1 + b_0 */
	336	scratch[midlen] = internal_add(scratch+midlen+1, b+toplen,
	337	scratch+midlen+1, botlen);
f3c29e34	338	#ifdef KARA_DEBUG
	339	printf("b1plusb0 = 0x");
	340	for (i = 0; i < midlen; i++) {
	341	printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]);
	342	}
	343	printf("\n");
	344	#endif
0c431b2f	345
	346	/*
	347	* Now we can do the third multiplication.
	348	*/
5a502a19	349	internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
5a502a19	350	scratch + 4*midlen);
f3c29e34	351	#ifdef KARA_DEBUG
	352	printf("a1plusa0timesb1plusb0 = 0x");
	353	for (i = 0; i < 2*midlen; i++) {
	354	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen+i]);
	355	}
	356	printf("\n");
	357	#endif
0c431b2f	358
	359	/*
	360	* Now we can reuse the first half of 'scratch' to compute the
	361	* sum of the outer two coefficients, to subtract from that
	362	* product to obtain the middle one.
	363	*/
	364	scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0;
757b0110	365	for (i = 0; i < 2*toplen; i++)
757b0110	366	scratch[2midlen - 2toplen + i] = c[i];
0c431b2f	367	scratch[1] = internal_add(scratch+2, c + 2*toplen,
0c431b2f	368	scratch+2, 2*botlen);
f3c29e34	369	#ifdef KARA_DEBUG
	370	printf("a1b1plusa0b0 = 0x");
	371	for (i = 0; i < 2*midlen; i++) {
	372	printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]);
	373	}
	374	printf("\n");
	375	#endif
0c431b2f	376
	377	internal_sub(scratch + 2*midlen, scratch,
	378	scratch + 2midlen, 2midlen);
f3c29e34	379	#ifdef KARA_DEBUG
	380	printf("a1b0plusa0b1 = 0x");
	381	for (i = 0; i < 2*midlen; i++) {
	382	printf("%0x", BIGNUM_INT_BITS/4, scratch[2midlen+i]);
	383	}
	384	printf("\n");
	385	#endif
0c431b2f	386
	387	/*
	388	* And now all we need to do is to add that middle coefficient
	389	* back into the output. We may have to propagate a carry
	390	* further up the output, but we can be sure it won't
	391	* propagate right the way off the top.
	392	*/
	393	carry = internal_add(c + 2len - botlen - 2midlen,
	394	scratch + 2*midlen,
	395	c + 2len - botlen - 2midlen, 2*midlen);
757b0110	396	i = 2len - botlen - 2midlen - 1;
0c431b2f	397	while (carry) {
757b0110	398	assert(i >= 0);
	399	carry += c[i];
	400	c[i] = (BignumInt)carry;
0c431b2f	401	carry >>= BIGNUM_INT_BITS;
757b0110	402	i--;
0c431b2f	403	}
f3c29e34	404	#ifdef KARA_DEBUG
	405	printf("ab = 0x");
	406	for (i = 0; i < 2*len; i++) {
	407	printf("%0*x", BIGNUM_INT_BITS/4, c[i]);
	408	}
	409	printf("\n");
	410	#endif
0c431b2f	411
0c431b2f	412	} else {
757b0110	413	int i;
	414	BignumInt carry;
	415	BignumDblInt t;
	416	const BignumInt ap, bp;
	417	BignumInt cp, cps;
0c431b2f	418
	419	/*
	420	* Multiply in the ordinary O(N^2) way.
	421	*/
	422
757b0110	423	for (i = 0; i < 2 * len; i++)
757b0110	424	c[i] = 0;
0c431b2f	425
757b0110	426	for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) {
	427	carry = 0;
	428	for (cp = cps, bp = b + len; cp--, bp-- > b ;) {
	429	t = (MUL_WORD(ap, bp) + carry) + *cp;
	430	*cp = (BignumInt) t;
08b5c9a2	431	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
0c431b2f	432	}
757b0110	433	*cp = carry;
0c431b2f	434	}
e5574168	435	}
	436	}
	437
132c534f	438	/*
	439	* Variant form of internal_mul used for the initial step of
	440	* Montgomery reduction. Only bothers outputting 'len' words
	441	* (everything above that is thrown away).
	442	*/
	443	static void internal_mul_low(const BignumInt a, const BignumInt b,
5a502a19	444	BignumInt c, int len, BignumInt scratch)
132c534f	445	{
132c534f	446	if (len > KARATSUBA_THRESHOLD) {
757b0110	447	int i;
132c534f	448
	449	/*
	450	* Karatsuba-aware version of internal_mul_low. As before, we
	451	* express each input value as a shifted combination of two
	452	* halves:
	453	*
	454	* a = a_1 D + a_0
	455	* b = b_1 D + b_0
	456	*
	457	* Then the full product is, as before,
	458	*
	459	* ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
	460	*
	461	* Provided we choose D on the large side (so that a_0 and b_0
	462	* are _at least_ as long as a_1 and b_1), we don't need the
	463	* topmost term at all, and we only need half of the middle
	464	* term. So there's no point in doing the proper Karatsuba
	465	* optimisation which computes the middle term using the top
	466	* one, because we'd take as long computing the top one as
	467	* just computing the middle one directly.
	468	*
	469	* So instead, we do a much more obvious thing: we call the
	470	* fully optimised internal_mul to compute a_0 b_0, and we
	471	* recursively call ourself to compute the _bottom halves_ of
	472	* a_1 b_0 and a_0 b_1, each of which we add into the result
	473	* in the obvious way.
	474	*
	475	* In other words, there's no actual Karatsuba _optimisation_
	476	* in this function; the only benefit in doing it this way is
	477	* that we call internal_mul proper for a large part of the
	478	* work, and _that_ can optimise its operation.
	479	*/
	480
	481	int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
132c534f	482
132c534f	483	/*
5a502a19	484	* Scratch space for the various bits and pieces we're going
	485	* to be adding together: we need botlen*2 words for a_0 b_0
	486	* (though we may end up throwing away its topmost word), and
	487	* toplen words for each of a_1 b_0 and a_0 b_1. That adds up
	488	* to exactly 2*len.
132c534f	489	*/
132c534f	490
132c534f	491	/* a_0 b_0 */
5a502a19	492	internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen,
5a502a19	493	scratch + 2*len);
132c534f	494
132c534f	495	/* a_1 b_0 */
5a502a19	496	internal_mul_low(a, b + len - toplen, scratch + toplen, toplen,
5a502a19	497	scratch + 2*len);
132c534f	498
132c534f	499	/* a_0 b_1 */
5a502a19	500	internal_mul_low(a + len - toplen, b, scratch, toplen,
5a502a19	501	scratch + 2*len);
132c534f	502
132c534f	503	/* Copy the bottom half of the big coefficient into place */
757b0110	504	for (i = 0; i < botlen; i++)
757b0110	505	c[toplen + i] = scratch[2*toplen + botlen + i];
132c534f	506
	507	/* Add the two small coefficients, throwing away the returned carry */
	508	internal_add(scratch, scratch + toplen, scratch, toplen);
	509
	510	/* And add that to the large coefficient, leaving the result in c. */
	511	internal_add(scratch, scratch + 2*toplen + botlen - toplen,
	512	c, toplen);
	513
132c534f	514	} else {
757b0110	515	int i;
	516	BignumInt carry;
	517	BignumDblInt t;
	518	const BignumInt ap, bp;
	519	BignumInt cp, cps;
132c534f	520
757b0110	521	/*
	522	* Multiply in the ordinary O(N^2) way.
	523	*/
132c534f	524
757b0110	525	for (i = 0; i < len; i++)
	526	c[i] = 0;
	527
	528	for (cps = c + len, ap = a + len; ap-- > a; cps--) {
	529	carry = 0;
	530	for (cp = cps, bp = b + len; bp--, cp-- > c ;) {
	531	t = (MUL_WORD(ap, bp) + carry) + *cp;
	532	*cp = (BignumInt) t;
08b5c9a2	533	carry = (BignumInt)(t >> BIGNUM_INT_BITS);
132c534f	534	}
132c534f	535	}
132c534f	536	}
	537	}
	538
	539	/*
	540	* Montgomery reduction. Expects x to be a big-endian array of 2*len
	541	* BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
	542	* BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
	543	* a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
	544	* x' < n.
	545	*
	546	* 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts
	547	* each, containing respectively n and the multiplicative inverse of
	548	* -n mod r.
	549	*
5a502a19	550	* 'tmp' is an array of BignumInt used as scratch space, of length at
5a502a19	551	* least 3*len + mul_compute_scratch(len).
132c534f	552	*/
	553	static void monty_reduce(BignumInt x, const BignumInt n,
	554	const BignumInt mninv, BignumInt tmp, int len)
	555	{
	556	int i;
	557	BignumInt carry;
	558
	559	/*
	560	* Multiply x by (-n)^{-1} mod r. This gives us a value m such
	561	* that mn is congruent to -x mod r. Hence, mn+x is an exact
	562	* multiple of r, and is also (obviously) congruent to x mod n.
	563	*/
5a502a19	564	internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len);
132c534f	565
	566	/*
	567	* Compute t = (mn+x)/r in ordinary, non-modular, integer
	568	* arithmetic. By construction this is exact, and is congruent mod
	569	* n to x * r^{-1}, i.e. the answer we want.
	570	*
	571	* The following multiply leaves that answer in the _most_
	572	* significant half of the 'x' array, so then we must shift it
	573	* down.
	574	*/
5a502a19	575	internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
132c534f	576	carry = internal_add(x, tmp+len, x, 2*len);
	577	for (i = 0; i < len; i++)
	578	x[len + i] = x[i], x[i] = 0;
	579
	580	/*
	581	* Reduce t mod n. This doesn't require a full-on division by n,
	582	* but merely a test and single optional subtraction, since we can
	583	* show that 0 <= t < 2n.
	584	*
	585	* Proof:
	586	* + we computed m mod r, so 0 <= m < r.
	587	* + so 0 <= mn < rn, obviously
	588	* + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
	589	* + yielding 0 <= (mn+x)/r < 2n as required.
	590	*/
	591	if (!carry) {
	592	for (i = 0; i < len; i++)
	593	if (x[len + i] != n[i])
	594	break;
	595	}
	596	if (carry \|\| i >= len \|\| x[len + i] > n[i])
	597	internal_sub(x+len, n, x+len, len);
	598	}
	599
a3412f52	600	static void internal_add_shifted(BignumInt *number,
32874aea	601	unsigned n, int shift)
32874aea	602	{
a3412f52	603	int word = 1 + (shift / BIGNUM_INT_BITS);
	604	int bshift = shift % BIGNUM_INT_BITS;
	605	BignumDblInt addend;
9400cf6f	606
3014da2b	607	addend = (BignumDblInt)n << bshift;
9400cf6f	608
9400cf6f	609	while (addend) {
16bd1b88	610	assert(word <= number[0]);
32874aea	611	addend += number[word];
a3412f52	612	number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
a3412f52	613	addend >>= BIGNUM_INT_BITS;
32874aea	614	word++;
9400cf6f	615	}
	616	}
	617
e5574168	618	/*
e5574168	619	* Compute a = a % m.
9400cf6f	620	* Input in first alen words of a and first mlen words of m.
	621	* Output in first alen words of a
	622	* (of which first alen-mlen words will be zero).
e5574168	623	* The MSW of m MUST have its high bit set.
9400cf6f	624	* Quotient is accumulated in the `quotient' array, which is a Bignum
	625	* rather than the internal bigendian format. Quotient parts are shifted
	626	* left by `qshift' before adding into quot.
e5574168	627	*/
a3412f52	628	static void internal_mod(BignumInt *a, int alen,
	629	BignumInt *m, int mlen,
	630	BignumInt *quot, int qshift)
e5574168	631	{
a3412f52	632	BignumInt m0, m1;
e5574168	633	unsigned int h;
	634	int i, k;
	635
e5574168	636	m0 = m[0];
8bd9144b	637	assert(m0 >> (BIGNUM_INT_BITS-1) == 1);
9400cf6f	638	if (mlen > 1)
32874aea	639	m1 = m[1];
9400cf6f	640	else
32874aea	641	m1 = 0;
e5574168	642
32874aea	643	for (i = 0; i <= alen - mlen; i++) {
a3412f52	644	BignumDblInt t;
9400cf6f	645	unsigned int q, r, c, ai1;
e5574168	646
	647	if (i == 0) {
	648	h = 0;
	649	} else {
32874aea	650	h = a[i - 1];
32874aea	651	a[i - 1] = 0;
e5574168	652	}
e5574168	653
32874aea	654	if (i == alen - 1)
	655	ai1 = 0;
	656	else
	657	ai1 = a[i + 1];
9400cf6f	658
e5574168	659	/* Find q = h:a[i] / m0 */
62ef3d44	660	if (h >= m0) {
	661	/*
	662	* Special case.
	663	*
	664	* To illustrate it, suppose a BignumInt is 8 bits, and
	665	* we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
	666	* our initial division will be 0xA123 / 0xA1, which
	667	* will give a quotient of 0x100 and a divide overflow.
	668	* However, the invariants in this division algorithm
	669	* are not violated, since the full number A1:23:... is
	670	* _less_ than the quotient prefix A1:B2:... and so the
	671	* following correction loop would have sorted it out.
	672	*
	673	* In this situation we set q to be the largest
	674	* quotient we _can_ stomach (0xFF, of course).
	675	*/
	676	q = BIGNUM_INT_MASK;
	677	} else {
819a22b3	678	/* Macro doesn't want an array subscript expression passed
	679	* into it (see definition), so use a temporary. */
	680	BignumInt tmplo = a[i];
	681	DIVMOD_WORD(q, r, h, tmplo, m0);
62ef3d44	682
	683	/* Refine our estimate of q by looking at
	684	h:a[i]:a[i+1] / m0:m1 */
	685	t = MUL_WORD(m1, q);
	686	if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
	687	q--;
	688	t -= m1;
	689	r = (r + m0) & BIGNUM_INT_MASK; /* overflow? */
	690	if (r >= (BignumDblInt) m0 &&
	691	t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
	692	}
e5574168	693	}
e5574168	694
9400cf6f	695	/* Subtract q * m from a[i...] */
e5574168	696	c = 0;
9400cf6f	697	for (k = mlen - 1; k >= 0; k--) {
a47e8bba	698	t = MUL_WORD(q, m[k]);
e5574168	699	t += c;
62ddb51e	700	c = (unsigned)(t >> BIGNUM_INT_BITS);
a3412f52	701	if ((BignumInt) t > a[i + k])
32874aea	702	c++;
a3412f52	703	a[i + k] -= (BignumInt) t;
e5574168	704	}
	705
	706	/* Add back m in case of borrow */
	707	if (c != h) {
	708	t = 0;
9400cf6f	709	for (k = mlen - 1; k >= 0; k--) {
e5574168	710	t += m[k];
32874aea	711	t += a[i + k];
a3412f52	712	a[i + k] = (BignumInt) t;
a3412f52	713	t = t >> BIGNUM_INT_BITS;
e5574168	714	}
32874aea	715	q--;
e5574168	716	}
32874aea	717	if (quot)
a3412f52	718	internal_add_shifted(quot, q, qshift + BIGNUM_INT_BITS * (alen - mlen - i));
e5574168	719	}
	720	}
	721
	722	/*
09095ac5	723	* Compute (base ^ exp) % mod, the pedestrian way.
e5574168	724	*/
09095ac5	725	Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
e5574168	726	{
5a502a19	727	BignumInt a, b, n, m, *scratch;
09095ac5	728	int mshift;
5a502a19	729	int mlen, scratchlen, i, j;
09095ac5	730	Bignum base, result;
ed953b91	731
	732	/*
	733	* The most significant word of mod needs to be non-zero. It
	734	* should already be, but let's make sure.
	735	*/
	736	assert(mod[mod[0]] != 0);
	737
	738	/*
	739	* Make sure the base is smaller than the modulus, by reducing
	740	* it modulo the modulus if not.
	741	*/
	742	base = bigmod(base_in, mod);
e5574168	743
09095ac5	744	/* Allocate m of size mlen, copy mod to m */
	745	/* We use big endian internally */
	746	mlen = mod[0];
	747	m = snewn(mlen, BignumInt);
	748	for (j = 0; j < mlen; j++)
	749	m[j] = mod[mod[0] - j];
	750
	751	/* Shift m left to make msb bit set */
	752	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
	753	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
	754	break;
	755	if (mshift) {
	756	for (i = 0; i < mlen - 1; i++)
	757	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
	758	m[mlen - 1] = m[mlen - 1] << mshift;
	759	}
	760
	761	/* Allocate n of size mlen, copy base to n */
	762	n = snewn(mlen, BignumInt);
	763	i = mlen - base[0];
	764	for (j = 0; j < i; j++)
	765	n[j] = 0;
	766	for (j = 0; j < (int)base[0]; j++)
	767	n[i + j] = base[base[0] - j];
	768
	769	/* Allocate a and b of size 2mlen. Set a = 1 /
	770	a = snewn(2 * mlen, BignumInt);
	771	b = snewn(2 * mlen, BignumInt);
	772	for (i = 0; i < 2 * mlen; i++)
	773	a[i] = 0;
	774	a[2 * mlen - 1] = 1;
	775
5a502a19	776	/* Scratch space for multiplies */
	777	scratchlen = mul_compute_scratch(mlen);
	778	scratch = snewn(scratchlen, BignumInt);
	779
09095ac5	780	/* Skip leading zero bits of exp. */
	781	i = 0;
	782	j = BIGNUM_INT_BITS-1;
	783	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
	784	j--;
	785	if (j < 0) {
	786	i++;
	787	j = BIGNUM_INT_BITS-1;
	788	}
	789	}
	790
	791	/* Main computation */
	792	while (i < (int)exp[0]) {
	793	while (j >= 0) {
5a502a19	794	internal_mul(a + mlen, a + mlen, b, mlen, scratch);
09095ac5	795	internal_mod(b, mlen * 2, m, mlen, NULL, 0);
09095ac5	796	if ((exp[exp[0] - i] & (1 << j)) != 0) {
5a502a19	797	internal_mul(b + mlen, n, a, mlen, scratch);
09095ac5	798	internal_mod(a, mlen * 2, m, mlen, NULL, 0);
	799	} else {
	800	BignumInt *t;
	801	t = a;
	802	a = b;
	803	b = t;
	804	}
	805	j--;
	806	}
	807	i++;
	808	j = BIGNUM_INT_BITS-1;
	809	}
	810
	811	/* Fixup result in case the modulus was shifted */
	812	if (mshift) {
	813	for (i = mlen - 1; i < 2 * mlen - 1; i++)
	814	a[i] = (a[i] << mshift) \| (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
	815	a[2 * mlen - 1] = a[2 * mlen - 1] << mshift;
	816	internal_mod(a, mlen * 2, m, mlen, NULL, 0);
	817	for (i = 2 * mlen - 1; i >= mlen; i--)
	818	a[i] = (a[i] >> mshift) \| (a[i - 1] << (BIGNUM_INT_BITS - mshift));
	819	}
	820
	821	/* Copy result to buffer */
	822	result = newbn(mod[0]);
	823	for (i = 0; i < mlen; i++)
	824	result[result[0] - i] = a[i + mlen];
	825	while (result[0] > 1 && result[result[0]] == 0)
	826	result[0]--;
	827
	828	/* Free temporary arrays */
16430000	829	smemclr(a, 2 * mlen * sizeof(*a));
09095ac5	830	sfree(a);
16430000	831	smemclr(scratch, scratchlen * sizeof(*scratch));
5a502a19	832	sfree(scratch);
16430000	833	smemclr(b, 2 * mlen * sizeof(*b));
09095ac5	834	sfree(b);
16430000	835	smemclr(m, mlen * sizeof(*m));
09095ac5	836	sfree(m);
16430000	837	smemclr(n, mlen * sizeof(*n));
09095ac5	838	sfree(n);
	839
	840	freebn(base);
	841
	842	return result;
	843	}
	844
	845	/*
	846	* Compute (base ^ exp) % mod. Uses the Montgomery multiplication
	847	* technique where possible, falling back to modpow_simple otherwise.
	848	*/
	849	Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
	850	{
5a502a19	851	BignumInt a, b, x, n, mninv, scratch;
5a502a19	852	int len, scratchlen, i, j;
09095ac5	853	Bignum base, base2, r, rn, inv, result;
	854
	855	/*
	856	* The most significant word of mod needs to be non-zero. It
	857	* should already be, but let's make sure.
	858	*/
	859	assert(mod[mod[0]] != 0);
	860
132c534f	861	/*
	862	* mod had better be odd, or we can't do Montgomery multiplication
	863	* using a power of two at all.
	864	*/
09095ac5	865	if (!(mod[1] & 1))
	866	return modpow_simple(base_in, exp, mod);
	867
	868	/*
	869	* Make sure the base is smaller than the modulus, by reducing
	870	* it modulo the modulus if not.
	871	*/
	872	base = bigmod(base_in, mod);
e5574168	873
132c534f	874	/*
	875	* Compute the inverse of n mod r, for monty_reduce. (In fact we
	876	* want the inverse of _minus_ n mod r, but we'll sort that out
	877	* below.)
	878	*/
	879	len = mod[0];
	880	r = bn_power_2(BIGNUM_INT_BITS * len);
	881	inv = modinv(mod, r);
de81309d	882	assert(inv); /* cannot fail, since mod is odd and r is a power of 2 */
e5574168	883
132c534f	884	/*
	885	* Multiply the base by r mod n, to get it into Montgomery
	886	* representation.
	887	*/
	888	base2 = modmul(base, r, mod);
	889	freebn(base);
	890	base = base2;
	891
	892	rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */
	893
	894	freebn(r); /* won't need this any more */
	895
	896	/*
	897	* Set up internal arrays of the right lengths, in big-endian
	898	* format, containing the base, the modulus, and the modulus's
	899	* inverse.
	900	*/
	901	n = snewn(len, BignumInt);
	902	for (j = 0; j < len; j++)
	903	n[len - 1 - j] = mod[j + 1];
	904
	905	mninv = snewn(len, BignumInt);
	906	for (j = 0; j < len; j++)
08b5c9a2	907	mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0);
132c534f	908	freebn(inv); /* we don't need this copy of it any more */
	909	/* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
	910	x = snewn(len, BignumInt);
	911	for (j = 0; j < len; j++)
	912	x[j] = 0;
	913	internal_sub(x, mninv, mninv, len);
	914
	915	/* x = snewn(len, BignumInt); / / already done above */
	916	for (j = 0; j < len; j++)
08b5c9a2	917	x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0);
132c534f	918	freebn(base); /* we don't need this copy of it any more */
	919
	920	a = snewn(2*len, BignumInt);
	921	b = snewn(2*len, BignumInt);
	922	for (j = 0; j < len; j++)
08b5c9a2	923	a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0);
132c534f	924	freebn(rn);
132c534f	925
5a502a19	926	/* Scratch space for multiplies */
	927	scratchlen = 3*len + mul_compute_scratch(len);
	928	scratch = snewn(scratchlen, BignumInt);
e5574168	929
e5574168	930	/* Skip leading zero bits of exp. */
32874aea	931	i = 0;
a3412f52	932	j = BIGNUM_INT_BITS-1;
62ddb51e	933	while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
e5574168	934	j--;
32874aea	935	if (j < 0) {
32874aea	936	i++;
a3412f52	937	j = BIGNUM_INT_BITS-1;
32874aea	938	}
e5574168	939	}
	940
	941	/* Main computation */
62ddb51e	942	while (i < (int)exp[0]) {
e5574168	943	while (j >= 0) {
5a502a19	944	internal_mul(a + len, a + len, b, len, scratch);
5a502a19	945	monty_reduce(b, n, mninv, scratch, len);
e5574168	946	if ((exp[exp[0] - i] & (1 << j)) != 0) {
5a502a19	947	internal_mul(b + len, x, a, len, scratch);
5a502a19	948	monty_reduce(a, n, mninv, scratch, len);
e5574168	949	} else {
a3412f52	950	BignumInt *t;
32874aea	951	t = a;
	952	a = b;
	953	b = t;
e5574168	954	}
	955	j--;
	956	}
32874aea	957	i++;
a3412f52	958	j = BIGNUM_INT_BITS-1;
e5574168	959	}
e5574168	960
132c534f	961	/*
	962	* Final monty_reduce to get back from the adjusted Montgomery
	963	* representation.
	964	*/
5a502a19	965	monty_reduce(a, n, mninv, scratch, len);
e5574168	966
e5574168	967	/* Copy result to buffer */
59600f67	968	result = newbn(mod[0]);
132c534f	969	for (i = 0; i < len; i++)
132c534f	970	result[result[0] - i] = a[i + len];
32874aea	971	while (result[0] > 1 && result[result[0]] == 0)
32874aea	972	result[0]--;
e5574168	973
e5574168	974	/* Free temporary arrays */
16430000	975	smemclr(scratch, scratchlen * sizeof(*scratch));
5a502a19	976	sfree(scratch);
16430000	977	smemclr(a, 2 * len * sizeof(*a));
32874aea	978	sfree(a);
16430000	979	smemclr(b, 2 * len * sizeof(*b));
32874aea	980	sfree(b);
16430000	981	smemclr(mninv, len * sizeof(*mninv));
132c534f	982	sfree(mninv);
16430000	983	smemclr(n, len * sizeof(*n));
32874aea	984	sfree(n);
16430000	985	smemclr(x, len * sizeof(*x));
132c534f	986	sfree(x);
ed953b91	987
59600f67	988	return result;
e5574168	989	}
7cca0d81	990
	991	/*
	992	* Compute (p * q) % mod.
	993	* The most significant word of mod MUST be non-zero.
	994	* We assume that the result array is the same size as the mod array.
	995	*/
59600f67	996	Bignum modmul(Bignum p, Bignum q, Bignum mod)
7cca0d81	997	{
5a502a19	998	BignumInt a, n, m, o, *scratch;
5a502a19	999	int mshift, scratchlen;
80b10571	1000	int pqlen, mlen, rlen, i, j;
59600f67	1001	Bignum result;
7cca0d81	1002
8bd9144b	1003	/*
	1004	* The most significant word of mod needs to be non-zero. It
	1005	* should already be, but let's make sure.
	1006	*/
	1007	assert(mod[mod[0]] != 0);
	1008
7cca0d81	1009	/* Allocate m of size mlen, copy mod to m */
	1010	/* We use big endian internally */
	1011	mlen = mod[0];
a3412f52	1012	m = snewn(mlen, BignumInt);
32874aea	1013	for (j = 0; j < mlen; j++)
32874aea	1014	m[j] = mod[mod[0] - j];
7cca0d81	1015
7cca0d81	1016	/* Shift m left to make msb bit set */
a3412f52	1017	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
a3412f52	1018	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
32874aea	1019	break;
7cca0d81	1020	if (mshift) {
7cca0d81	1021	for (i = 0; i < mlen - 1; i++)
a3412f52	1022	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
32874aea	1023	m[mlen - 1] = m[mlen - 1] << mshift;
7cca0d81	1024	}
	1025
	1026	pqlen = (p[0] > q[0] ? p[0] : q[0]);
	1027
5064e5e6	1028	/*
	1029	* Make sure that we're allowing enough space. The shifting below
	1030	* will underflow the vectors we allocate if pqlen is too small.
	1031	*/
	1032	if (2*pqlen <= mlen)
	1033	pqlen = mlen/2 + 1;
	1034
7cca0d81	1035	/* Allocate n of size pqlen, copy p to n */
a3412f52	1036	n = snewn(pqlen, BignumInt);
7cca0d81	1037	i = pqlen - p[0];
32874aea	1038	for (j = 0; j < i; j++)
32874aea	1039	n[j] = 0;
62ddb51e	1040	for (j = 0; j < (int)p[0]; j++)
32874aea	1041	n[i + j] = p[p[0] - j];
7cca0d81	1042
7cca0d81	1043	/* Allocate o of size pqlen, copy q to o */
a3412f52	1044	o = snewn(pqlen, BignumInt);
7cca0d81	1045	i = pqlen - q[0];
32874aea	1046	for (j = 0; j < i; j++)
32874aea	1047	o[j] = 0;
62ddb51e	1048	for (j = 0; j < (int)q[0]; j++)
32874aea	1049	o[i + j] = q[q[0] - j];
7cca0d81	1050
7cca0d81	1051	/* Allocate a of size 2pqlen for result /
a3412f52	1052	a = snewn(2 * pqlen, BignumInt);
7cca0d81	1053
5a502a19	1054	/* Scratch space for multiplies */
	1055	scratchlen = mul_compute_scratch(pqlen);
	1056	scratch = snewn(scratchlen, BignumInt);
	1057
7cca0d81	1058	/* Main computation */
5a502a19	1059	internal_mul(n, o, a, pqlen, scratch);
32874aea	1060	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
7cca0d81	1061
	1062	/* Fixup result in case the modulus was shifted */
	1063	if (mshift) {
32874aea	1064	for (i = 2 * pqlen - mlen - 1; i < 2 * pqlen - 1; i++)
a3412f52	1065	a[i] = (a[i] << mshift) \| (a[i + 1] >> (BIGNUM_INT_BITS - mshift));
32874aea	1066	a[2 * pqlen - 1] = a[2 * pqlen - 1] << mshift;
	1067	internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
	1068	for (i = 2 * pqlen - 1; i >= 2 * pqlen - mlen; i--)
a3412f52	1069	a[i] = (a[i] >> mshift) \| (a[i - 1] << (BIGNUM_INT_BITS - mshift));
7cca0d81	1070	}
	1071
	1072	/* Copy result to buffer */
32874aea	1073	rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
80b10571	1074	result = newbn(rlen);
80b10571	1075	for (i = 0; i < rlen; i++)
32874aea	1076	result[result[0] - i] = a[i + 2 * pqlen - rlen];
	1077	while (result[0] > 1 && result[result[0]] == 0)
	1078	result[0]--;
7cca0d81	1079
7cca0d81	1080	/* Free temporary arrays */
16430000	1081	smemclr(scratch, scratchlen * sizeof(*scratch));
5a502a19	1082	sfree(scratch);
16430000	1083	smemclr(a, 2 * pqlen * sizeof(*a));
32874aea	1084	sfree(a);
16430000	1085	smemclr(m, mlen * sizeof(*m));
32874aea	1086	sfree(m);
16430000	1087	smemclr(n, pqlen * sizeof(*n));
32874aea	1088	sfree(n);
16430000	1089	smemclr(o, pqlen * sizeof(*o));
32874aea	1090	sfree(o);
59600f67	1091
59600f67	1092	return result;
7cca0d81	1093	}
	1094
	1095	/*
9400cf6f	1096	* Compute p % mod.
	1097	* The most significant word of mod MUST be non-zero.
	1098	* We assume that the result array is the same size as the mod array.
5c72ca61	1099	* We optionally write out a quotient if `quotient' is non-NULL.
5c72ca61	1100	* We can avoid writing out the result if `result' is NULL.
9400cf6f	1101	*/
f28753ab	1102	static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
9400cf6f	1103	{
a3412f52	1104	BignumInt n, m;
9400cf6f	1105	int mshift;
	1106	int plen, mlen, i, j;
	1107
8bd9144b	1108	/*
	1109	* The most significant word of mod needs to be non-zero. It
	1110	* should already be, but let's make sure.
	1111	*/
	1112	assert(mod[mod[0]] != 0);
	1113
9400cf6f	1114	/* Allocate m of size mlen, copy mod to m */
	1115	/* We use big endian internally */
	1116	mlen = mod[0];
a3412f52	1117	m = snewn(mlen, BignumInt);
32874aea	1118	for (j = 0; j < mlen; j++)
32874aea	1119	m[j] = mod[mod[0] - j];
9400cf6f	1120
9400cf6f	1121	/* Shift m left to make msb bit set */
a3412f52	1122	for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
a3412f52	1123	if ((m[0] << mshift) & BIGNUM_TOP_BIT)
32874aea	1124	break;
9400cf6f	1125	if (mshift) {
9400cf6f	1126	for (i = 0; i < mlen - 1; i++)
a3412f52	1127	m[i] = (m[i] << mshift) \| (m[i + 1] >> (BIGNUM_INT_BITS - mshift));
32874aea	1128	m[mlen - 1] = m[mlen - 1] << mshift;
9400cf6f	1129	}
	1130
	1131	plen = p[0];
	1132	/* Ensure plen > mlen */
32874aea	1133	if (plen <= mlen)
32874aea	1134	plen = mlen + 1;
9400cf6f	1135
9400cf6f	1136	/* Allocate n of size plen, copy p to n */
a3412f52	1137	n = snewn(plen, BignumInt);
32874aea	1138	for (j = 0; j < plen; j++)
32874aea	1139	n[j] = 0;
62ddb51e	1140	for (j = 1; j <= (int)p[0]; j++)
32874aea	1141	n[plen - j] = p[j];
9400cf6f	1142
	1143	/* Main computation */
	1144	internal_mod(n, plen, m, mlen, quotient, mshift);
	1145
	1146	/* Fixup result in case the modulus was shifted */
	1147	if (mshift) {
	1148	for (i = plen - mlen - 1; i < plen - 1; i++)
a3412f52	1149	n[i] = (n[i] << mshift) \| (n[i + 1] >> (BIGNUM_INT_BITS - mshift));
32874aea	1150	n[plen - 1] = n[plen - 1] << mshift;
9400cf6f	1151	internal_mod(n, plen, m, mlen, quotient, 0);
9400cf6f	1152	for (i = plen - 1; i >= plen - mlen; i--)
a3412f52	1153	n[i] = (n[i] >> mshift) \| (n[i - 1] << (BIGNUM_INT_BITS - mshift));
9400cf6f	1154	}
	1155
	1156	/* Copy result to buffer */
5c72ca61	1157	if (result) {
62ddb51e	1158	for (i = 1; i <= (int)result[0]; i++) {
5c72ca61	1159	int j = plen - i;
	1160	result[i] = j >= 0 ? n[j] : 0;
	1161	}
9400cf6f	1162	}
	1163
	1164	/* Free temporary arrays */
16430000	1165	smemclr(m, mlen * sizeof(*m));
32874aea	1166	sfree(m);
16430000	1167	smemclr(n, plen * sizeof(*n));
32874aea	1168	sfree(n);
9400cf6f	1169	}
	1170
	1171	/*
7cca0d81	1172	* Decrement a number.
7cca0d81	1173	*/
32874aea	1174	void decbn(Bignum bn)
32874aea	1175	{
7cca0d81	1176	int i = 1;
62ddb51e	1177	while (i < (int)bn[0] && bn[i] == 0)
a3412f52	1178	bn[i++] = BIGNUM_INT_MASK;
7cca0d81	1179	bn[i]--;
	1180	}
	1181
27cd7fc2	1182	Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
32874aea	1183	{
3709bfe9	1184	Bignum result;
	1185	int w, i;
	1186
551a4acb	1187	assert(nbytes >= 0 && nbytes < INT_MAX/8);
551a4acb	1188
a3412f52	1189	w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
3709bfe9	1190
3709bfe9	1191	result = newbn(w);
32874aea	1192	for (i = 1; i <= w; i++)
	1193	result[i] = 0;
	1194	for (i = nbytes; i--;) {
	1195	unsigned char byte = *data++;
a3412f52	1196	result[1 + i / BIGNUM_INT_BYTES] \|= byte << (8*i % BIGNUM_INT_BITS);
3709bfe9	1197	}
3709bfe9	1198
32874aea	1199	while (result[0] > 1 && result[result[0]] == 0)
32874aea	1200	result[0]--;
3709bfe9	1201	return result;
	1202	}
	1203
7cca0d81	1204	/*
2e85c969	1205	* Read an SSH-1-format bignum from a data buffer. Return the number
0016d70b	1206	* of bytes consumed, or -1 if there wasn't enough data.
7cca0d81	1207	*/
0016d70b	1208	int ssh1_read_bignum(const unsigned char data, int len, Bignum result)
32874aea	1209	{
27cd7fc2	1210	const unsigned char *p = data;
7cca0d81	1211	int i;
	1212	int w, b;
	1213
0016d70b	1214	if (len < 2)
	1215	return -1;
	1216
7cca0d81	1217	w = 0;
32874aea	1218	for (i = 0; i < 2; i++)
	1219	w = (w << 8) + *p++;
	1220	b = (w + 7) / 8; /* bits -> bytes */
7cca0d81	1221
0016d70b	1222	if (len < b+2)
	1223	return -1;
	1224
32874aea	1225	if (!result) /* just return length */
32874aea	1226	return b + 2;
a52f067e	1227
3709bfe9	1228	*result = bignum_from_bytes(p, b);
7cca0d81	1229
3709bfe9	1230	return p + b - data;
7cca0d81	1231	}
5c58ad2d	1232
5c58ad2d	1233	/*
2e85c969	1234	* Return the bit count of a bignum, for SSH-1 encoding.
5c58ad2d	1235	*/
32874aea	1236	int bignum_bitcount(Bignum bn)
32874aea	1237	{
a3412f52	1238	int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
32874aea	1239	while (bitcount >= 0
a3412f52	1240	&& (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
5c58ad2d	1241	return bitcount + 1;
	1242	}
	1243
	1244	/*
2e85c969	1245	* Return the byte length of a bignum when SSH-1 encoded.
5c58ad2d	1246	*/
32874aea	1247	int ssh1_bignum_length(Bignum bn)
	1248	{
	1249	return 2 + (bignum_bitcount(bn) + 7) / 8;
ddecd643	1250	}
	1251
	1252	/*
2e85c969	1253	* Return the byte length of a bignum when SSH-2 encoded.
ddecd643	1254	*/
32874aea	1255	int ssh2_bignum_length(Bignum bn)
	1256	{
	1257	return 4 + (bignum_bitcount(bn) + 8) / 8;
5c58ad2d	1258	}
	1259
	1260	/*
	1261	* Return a byte from a bignum; 0 is least significant, etc.
	1262	*/
32874aea	1263	int bignum_byte(Bignum bn, int i)
32874aea	1264	{
551a4acb	1265	if (i < 0 \|\| i >= (int)(BIGNUM_INT_BYTES * bn[0]))
32874aea	1266	return 0; /* beyond the end */
5c58ad2d	1267	else
a3412f52	1268	return (bn[i / BIGNUM_INT_BYTES + 1] >>
a3412f52	1269	((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
5c58ad2d	1270	}
	1271
	1272	/*
9400cf6f	1273	* Return a bit from a bignum; 0 is least significant, etc.
9400cf6f	1274	*/
32874aea	1275	int bignum_bit(Bignum bn, int i)
32874aea	1276	{
551a4acb	1277	if (i < 0 \|\| i >= (int)(BIGNUM_INT_BITS * bn[0]))
32874aea	1278	return 0; /* beyond the end */
9400cf6f	1279	else
a3412f52	1280	return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
9400cf6f	1281	}
	1282
	1283	/*
	1284	* Set a bit in a bignum; 0 is least significant, etc.
	1285	*/
32874aea	1286	void bignum_set_bit(Bignum bn, int bitnum, int value)
32874aea	1287	{
551a4acb	1288	if (bitnum < 0 \|\| bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
32874aea	1289	abort(); /* beyond the end */
9400cf6f	1290	else {
a3412f52	1291	int v = bitnum / BIGNUM_INT_BITS + 1;
a3412f52	1292	int mask = 1 << (bitnum % BIGNUM_INT_BITS);
32874aea	1293	if (value)
	1294	bn[v] \|= mask;
	1295	else
	1296	bn[v] &= ~mask;
9400cf6f	1297	}
	1298	}
	1299
	1300	/*
2e85c969	1301	* Write a SSH-1-format bignum into a buffer. It is assumed the
5c58ad2d	1302	* buffer is big enough. Returns the number of bytes used.
5c58ad2d	1303	*/
32874aea	1304	int ssh1_write_bignum(void *data, Bignum bn)
32874aea	1305	{
5c58ad2d	1306	unsigned char *p = data;
	1307	int len = ssh1_bignum_length(bn);
	1308	int i;
ddecd643	1309	int bitc = bignum_bitcount(bn);
5c58ad2d	1310
5c58ad2d	1311	*p++ = (bitc >> 8) & 0xFF;
32874aea	1312	*p++ = (bitc) & 0xFF;
	1313	for (i = len - 2; i--;)
	1314	*p++ = bignum_byte(bn, i);
5c58ad2d	1315	return len;
5c58ad2d	1316	}
9400cf6f	1317
	1318	/*
	1319	* Compare two bignums. Returns like strcmp.
	1320	*/
32874aea	1321	int bignum_cmp(Bignum a, Bignum b)
32874aea	1322	{
9400cf6f	1323	int amax = a[0], bmax = b[0];
551a4acb	1324	int i;
551a4acb	1325
434a1d60	1326	/* Annoyingly we have two representations of zero */
	1327	if (amax == 1 && a[amax] == 0)
	1328	amax = 0;
	1329	if (bmax == 1 && b[bmax] == 0)
	1330	bmax = 0;
	1331
551a4acb	1332	assert(amax == 0 \|\| a[amax] != 0);
	1333	assert(bmax == 0 \|\| b[bmax] != 0);
	1334
	1335	i = (amax > bmax ? amax : bmax);
9400cf6f	1336	while (i) {
a3412f52	1337	BignumInt aval = (i > amax ? 0 : a[i]);
a3412f52	1338	BignumInt bval = (i > bmax ? 0 : b[i]);
32874aea	1339	if (aval < bval)
	1340	return -1;
	1341	if (aval > bval)
	1342	return +1;
	1343	i--;
9400cf6f	1344	}
	1345	return 0;
	1346	}
	1347
	1348	/*
	1349	* Right-shift one bignum to form another.
	1350	*/
32874aea	1351	Bignum bignum_rshift(Bignum a, int shift)
32874aea	1352	{
9400cf6f	1353	Bignum ret;
9400cf6f	1354	int i, shiftw, shiftb, shiftbb, bits;
a3412f52	1355	BignumInt ai, ai1;
9400cf6f	1356
551a4acb	1357	assert(shift >= 0);
551a4acb	1358
ddecd643	1359	bits = bignum_bitcount(a) - shift;
a3412f52	1360	ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
9400cf6f	1361
9400cf6f	1362	if (ret) {
a3412f52	1363	shiftw = shift / BIGNUM_INT_BITS;
	1364	shiftb = shift % BIGNUM_INT_BITS;
	1365	shiftbb = BIGNUM_INT_BITS - shiftb;
32874aea	1366
32874aea	1367	ai1 = a[shiftw + 1];
62ddb51e	1368	for (i = 1; i <= (int)ret[0]; i++) {
32874aea	1369	ai = ai1;
62ddb51e	1370	ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
a3412f52	1371	ret[i] = ((ai >> shiftb) \| (ai1 << shiftbb)) & BIGNUM_INT_MASK;
32874aea	1372	}
9400cf6f	1373	}
	1374
	1375	return ret;
	1376	}
	1377
	1378	/*
	1379	* Non-modular multiplication and addition.
	1380	*/
32874aea	1381	Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
32874aea	1382	{
9400cf6f	1383	int alen = a[0], blen = b[0];
	1384	int mlen = (alen > blen ? alen : blen);
	1385	int rlen, i, maxspot;
5a502a19	1386	int wslen;
a3412f52	1387	BignumInt *workspace;
9400cf6f	1388	Bignum ret;
9400cf6f	1389
5a502a19	1390	/* mlen space for a, mlen space for b, 2*mlen for result,
	1391	* plus scratch space for multiplication */
	1392	wslen = mlen * 4 + mul_compute_scratch(mlen);
	1393	workspace = snewn(wslen, BignumInt);
9400cf6f	1394	for (i = 0; i < mlen; i++) {
62ddb51e	1395	workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0);
62ddb51e	1396	workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0);
9400cf6f	1397	}
9400cf6f	1398
32874aea	1399	internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
5a502a19	1400	workspace + 2 * mlen, mlen, workspace + 4 * mlen);
9400cf6f	1401
	1402	/* now just copy the result back */
	1403	rlen = alen + blen + 1;
62ddb51e	1404	if (addend && rlen <= (int)addend[0])
32874aea	1405	rlen = addend[0] + 1;
9400cf6f	1406	ret = newbn(rlen);
9400cf6f	1407	maxspot = 0;
62ddb51e	1408	for (i = 1; i <= (int)ret[0]; i++) {
32874aea	1409	ret[i] = (i <= 2 * mlen ? workspace[4 * mlen - i] : 0);
	1410	if (ret[i] != 0)
	1411	maxspot = i;
9400cf6f	1412	}
	1413	ret[0] = maxspot;
	1414
	1415	/* now add in the addend, if any */
	1416	if (addend) {
a3412f52	1417	BignumDblInt carry = 0;
32874aea	1418	for (i = 1; i <= rlen; i++) {
62ddb51e	1419	carry += (i <= (int)ret[0] ? ret[i] : 0);
62ddb51e	1420	carry += (i <= (int)addend[0] ? addend[i] : 0);
a3412f52	1421	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
a3412f52	1422	carry >>= BIGNUM_INT_BITS;
32874aea	1423	if (ret[i] != 0 && i > maxspot)
	1424	maxspot = i;
	1425	}
9400cf6f	1426	}
	1427	ret[0] = maxspot;
	1428
16430000	1429	smemclr(workspace, wslen * sizeof(*workspace));
c523f55f	1430	sfree(workspace);
9400cf6f	1431	return ret;
	1432	}
	1433
	1434	/*
	1435	* Non-modular multiplication.
	1436	*/
32874aea	1437	Bignum bigmul(Bignum a, Bignum b)
32874aea	1438	{
9400cf6f	1439	return bigmuladd(a, b, NULL);
	1440	}
	1441
	1442	/*
d737853b	1443	* Simple addition.
	1444	*/
	1445	Bignum bigadd(Bignum a, Bignum b)
	1446	{
	1447	int alen = a[0], blen = b[0];
	1448	int rlen = (alen > blen ? alen : blen) + 1;
	1449	int i, maxspot;
	1450	Bignum ret;
	1451	BignumDblInt carry;
	1452
	1453	ret = newbn(rlen);
	1454
	1455	carry = 0;
	1456	maxspot = 0;
	1457	for (i = 1; i <= rlen; i++) {
	1458	carry += (i <= (int)a[0] ? a[i] : 0);
	1459	carry += (i <= (int)b[0] ? b[i] : 0);
	1460	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1461	carry >>= BIGNUM_INT_BITS;
	1462	if (ret[i] != 0 && i > maxspot)
	1463	maxspot = i;
	1464	}
	1465	ret[0] = maxspot;
	1466
	1467	return ret;
	1468	}
	1469
	1470	/*
	1471	* Subtraction. Returns a-b, or NULL if the result would come out
	1472	* negative (recall that this entire bignum module only handles
	1473	* positive numbers).
	1474	*/
	1475	Bignum bigsub(Bignum a, Bignum b)
	1476	{
	1477	int alen = a[0], blen = b[0];
	1478	int rlen = (alen > blen ? alen : blen);
	1479	int i, maxspot;
	1480	Bignum ret;
	1481	BignumDblInt carry;
	1482
	1483	ret = newbn(rlen);
	1484
	1485	carry = 1;
	1486	maxspot = 0;
	1487	for (i = 1; i <= rlen; i++) {
	1488	carry += (i <= (int)a[0] ? a[i] : 0);
	1489	carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
	1490	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1491	carry >>= BIGNUM_INT_BITS;
	1492	if (ret[i] != 0 && i > maxspot)
	1493	maxspot = i;
	1494	}
	1495	ret[0] = maxspot;
	1496
	1497	if (!carry) {
	1498	freebn(ret);
	1499	return NULL;
	1500	}
	1501
	1502	return ret;
	1503	}
	1504
	1505	/*
3709bfe9	1506	* Create a bignum which is the bitmask covering another one. That
	1507	* is, the smallest integer which is >= N and is also one less than
	1508	* a power of two.
	1509	*/
32874aea	1510	Bignum bignum_bitmask(Bignum n)
32874aea	1511	{
3709bfe9	1512	Bignum ret = copybn(n);
3709bfe9	1513	int i;
a3412f52	1514	BignumInt j;
3709bfe9	1515
	1516	i = ret[0];
	1517	while (n[i] == 0 && i > 0)
32874aea	1518	i--;
3709bfe9	1519	if (i <= 0)
32874aea	1520	return ret; /* input was zero */
3709bfe9	1521	j = 1;
3709bfe9	1522	while (j < n[i])
32874aea	1523	j = 2 * j + 1;
3709bfe9	1524	ret[i] = j;
3709bfe9	1525	while (--i > 0)
a3412f52	1526	ret[i] = BIGNUM_INT_MASK;
3709bfe9	1527	return ret;
	1528	}
	1529
	1530	/*
5c72ca61	1531	* Convert a (max 32-bit) long into a bignum.
9400cf6f	1532	*/
a3412f52	1533	Bignum bignum_from_long(unsigned long nn)
32874aea	1534	{
9400cf6f	1535	Bignum ret;
a3412f52	1536	BignumDblInt n = nn;
9400cf6f	1537
5c72ca61	1538	ret = newbn(3);
a3412f52	1539	ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
a3412f52	1540	ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
5c72ca61	1541	ret[3] = 0;
5c72ca61	1542	ret[0] = (ret[2] ? 2 : 1);
32874aea	1543	return ret;
9400cf6f	1544	}
	1545
	1546	/*
	1547	* Add a long to a bignum.
	1548	*/
a3412f52	1549	Bignum bignum_add_long(Bignum number, unsigned long addendx)
32874aea	1550	{
32874aea	1551	Bignum ret = newbn(number[0] + 1);
9400cf6f	1552	int i, maxspot = 0;
a3412f52	1553	BignumDblInt carry = 0, addend = addendx;
9400cf6f	1554
62ddb51e	1555	for (i = 1; i <= (int)ret[0]; i++) {
a3412f52	1556	carry += addend & BIGNUM_INT_MASK;
62ddb51e	1557	carry += (i <= (int)number[0] ? number[i] : 0);
a3412f52	1558	addend >>= BIGNUM_INT_BITS;
	1559	ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
	1560	carry >>= BIGNUM_INT_BITS;
32874aea	1561	if (ret[i] != 0)
32874aea	1562	maxspot = i;
9400cf6f	1563	}
	1564	ret[0] = maxspot;
	1565	return ret;
	1566	}
	1567
	1568	/*
	1569	* Compute the residue of a bignum, modulo a (max 16-bit) short.
	1570	*/
32874aea	1571	unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
32874aea	1572	{
a3412f52	1573	BignumDblInt mod, r;
9400cf6f	1574	int i;
	1575
	1576	r = 0;
	1577	mod = modulus;
	1578	for (i = number[0]; i > 0; i--)
736cc6d1	1579	r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
6e522441	1580	return (unsigned short) r;
9400cf6f	1581	}
9400cf6f	1582
a3412f52	1583	#ifdef DEBUG
32874aea	1584	void diagbn(char *prefix, Bignum md)
32874aea	1585	{
9400cf6f	1586	int i, nibbles, morenibbles;
	1587	static const char hex[] = "0123456789ABCDEF";
	1588
5c72ca61	1589	debug(("%s0x", prefix ? prefix : ""));
9400cf6f	1590
32874aea	1591	nibbles = (3 + bignum_bitcount(md)) / 4;
	1592	if (nibbles < 1)
	1593	nibbles = 1;
	1594	morenibbles = 4 * md[0] - nibbles;
	1595	for (i = 0; i < morenibbles; i++)
5c72ca61	1596	debug(("-"));
32874aea	1597	for (i = nibbles; i--;)
5c72ca61	1598	debug(("%c",
5c72ca61	1599	hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
9400cf6f	1600
32874aea	1601	if (prefix)
5c72ca61	1602	debug(("\n"));
5c72ca61	1603	}
f28753ab	1604	#endif
5c72ca61	1605
	1606	/*
	1607	* Simple division.
	1608	*/
	1609	Bignum bigdiv(Bignum a, Bignum b)
	1610	{
	1611	Bignum q = newbn(a[0]);
	1612	bigdivmod(a, b, NULL, q);
	1613	return q;
	1614	}
	1615
	1616	/*
	1617	* Simple remainder.
	1618	*/
	1619	Bignum bigmod(Bignum a, Bignum b)
	1620	{
	1621	Bignum r = newbn(b[0]);
	1622	bigdivmod(a, b, r, NULL);
	1623	return r;
9400cf6f	1624	}
	1625
	1626	/*
	1627	* Greatest common divisor.
	1628	*/
32874aea	1629	Bignum biggcd(Bignum av, Bignum bv)
32874aea	1630	{
9400cf6f	1631	Bignum a = copybn(av);
	1632	Bignum b = copybn(bv);
	1633
9400cf6f	1634	while (bignum_cmp(b, Zero) != 0) {
32874aea	1635	Bignum t = newbn(b[0]);
5c72ca61	1636	bigdivmod(a, b, t, NULL);
32874aea	1637	while (t[0] > 1 && t[t[0]] == 0)
	1638	t[0]--;
	1639	freebn(a);
	1640	a = b;
	1641	b = t;
9400cf6f	1642	}
	1643
	1644	freebn(b);
	1645	return a;
	1646	}
	1647
	1648	/*
	1649	* Modular inverse, using Euclid's extended algorithm.
	1650	*/
32874aea	1651	Bignum modinv(Bignum number, Bignum modulus)
32874aea	1652	{
9400cf6f	1653	Bignum a = copybn(modulus);
	1654	Bignum b = copybn(number);
	1655	Bignum xp = copybn(Zero);
	1656	Bignum x = copybn(One);
	1657	int sign = +1;
	1658
8bd9144b	1659	assert(number[number[0]] != 0);
	1660	assert(modulus[modulus[0]] != 0);
	1661
9400cf6f	1662	while (bignum_cmp(b, One) != 0) {
de81309d	1663	Bignum t, q;
	1664
	1665	if (bignum_cmp(b, Zero) == 0) {
	1666	/*
	1667	* Found a common factor between the inputs, so we cannot
	1668	* return a modular inverse at all.
	1669	*/
c6456dca	1670	freebn(b);
	1671	freebn(a);
	1672	freebn(xp);
	1673	freebn(x);
de81309d	1674	return NULL;
	1675	}
	1676
	1677	t = newbn(b[0]);
	1678	q = newbn(a[0]);
5c72ca61	1679	bigdivmod(a, b, t, q);
32874aea	1680	while (t[0] > 1 && t[t[0]] == 0)
	1681	t[0]--;
	1682	freebn(a);
	1683	a = b;
	1684	b = t;
	1685	t = xp;
	1686	xp = x;
	1687	x = bigmuladd(q, xp, t);
	1688	sign = -sign;
	1689	freebn(t);
75374b2f	1690	freebn(q);
9400cf6f	1691	}
	1692
	1693	freebn(b);
	1694	freebn(a);
	1695	freebn(xp);
	1696
	1697	/* now we know that sign * x == 1, and that x < modulus */
	1698	if (sign < 0) {
32874aea	1699	/* set a new x to be modulus - x */
32874aea	1700	Bignum newx = newbn(modulus[0]);
a3412f52	1701	BignumInt carry = 0;
32874aea	1702	int maxspot = 1;
	1703	int i;
	1704
62ddb51e	1705	for (i = 1; i <= (int)newx[0]; i++) {
	1706	BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
	1707	BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
32874aea	1708	newx[i] = aword - bword - carry;
	1709	bword = ~bword;
	1710	carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
	1711	if (newx[i] != 0)
	1712	maxspot = i;
	1713	}
	1714	newx[0] = maxspot;
	1715	freebn(x);
	1716	x = newx;
9400cf6f	1717	}
	1718
	1719	/* and return. */
	1720	return x;
	1721	}
6e522441	1722
	1723	/*
	1724	* Render a bignum into decimal. Return a malloced string holding
	1725	* the decimal representation.
	1726	*/
32874aea	1727	char *bignum_decimal(Bignum x)
32874aea	1728	{
6e522441	1729	int ndigits, ndigit;
6e522441	1730	int i, iszero;
a3412f52	1731	BignumDblInt carry;
6e522441	1732	char *ret;
a3412f52	1733	BignumInt *workspace;
6e522441	1734
	1735	/*
	1736	* First, estimate the number of digits. Since log(10)/log(2)
	1737	* is just greater than 93/28 (the joys of continued fraction
	1738	* approximations...) we know that for every 93 bits, we need
	1739	* at most 28 digits. This will tell us how much to malloc.
	1740	*
	1741	* Formally: if x has i bits, that means x is strictly less
	1742	* than 2^i. Since 2 is less than 10^(28/93), this is less than
	1743	* 10^(28i/93). We need an integer power of ten, so we must
	1744	* round up (rounding down might make it less than x again).
	1745	* Therefore if we multiply the bit count by 28/93, rounding
	1746	* up, we will have enough digits.
74c79ce8	1747	*
74c79ce8	1748	* i=0 (i.e., x=0) is an irritating special case.
6e522441	1749	*/
ddecd643	1750	i = bignum_bitcount(x);
74c79ce8	1751	if (!i)
	1752	ndigits = 1; /* x = 0 */
	1753	else
	1754	ndigits = (28 * i + 92) / 93; /* multiply by 28/93 and round up */
32874aea	1755	ndigits++; /* allow for trailing \0 */
3d88e64d	1756	ret = snewn(ndigits, char);
6e522441	1757
	1758	/*
	1759	* Now allocate some workspace to hold the binary form as we
	1760	* repeatedly divide it by ten. Initialise this to the
	1761	* big-endian form of the number.
	1762	*/
a3412f52	1763	workspace = snewn(x[0], BignumInt);
62ddb51e	1764	for (i = 0; i < (int)x[0]; i++)
32874aea	1765	workspace[i] = x[x[0] - i];
6e522441	1766
	1767	/*
	1768	* Next, write the decimal number starting with the last digit.
	1769	* We use ordinary short division, dividing 10 into the
	1770	* workspace.
	1771	*/
32874aea	1772	ndigit = ndigits - 1;
6e522441	1773	ret[ndigit] = '\0';
6e522441	1774	do {
32874aea	1775	iszero = 1;
32874aea	1776	carry = 0;
62ddb51e	1777	for (i = 0; i < (int)x[0]; i++) {
a3412f52	1778	carry = (carry << BIGNUM_INT_BITS) + workspace[i];
a3412f52	1779	workspace[i] = (BignumInt) (carry / 10);
32874aea	1780	if (workspace[i])
	1781	iszero = 0;
	1782	carry %= 10;
	1783	}
	1784	ret[--ndigit] = (char) (carry + '0');
6e522441	1785	} while (!iszero);
	1786
	1787	/*
	1788	* There's a chance we've fallen short of the start of the
	1789	* string. Correct if so.
	1790	*/
	1791	if (ndigit > 0)
32874aea	1792	memmove(ret, ret + ndigit, ndigits - ndigit);
6e522441	1793
	1794	/*
	1795	* Done.
	1796	*/
16430000	1797	smemclr(workspace, x[0] * sizeof(*workspace));
c523f55f	1798	sfree(workspace);
6e522441	1799	return ret;
6e522441	1800	}
f3c29e34	1801
	1802	#ifdef TESTBN
	1803
	1804	#include <stdio.h>
	1805	#include <stdlib.h>
	1806	#include <ctype.h>
	1807
	1808	/*
4800a5e5	1809	* gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset
f84f1e46	1810	*
	1811	* Then feed to this program's standard input the output of
	1812	* testdata/bignum.py .
f3c29e34	1813	*/
	1814
	1815	void modalfatalbox(char *p, ...)
	1816	{
	1817	va_list ap;
	1818	fprintf(stderr, "FATAL ERROR: ");
	1819	va_start(ap, p);
	1820	vfprintf(stderr, p, ap);
	1821	va_end(ap);
	1822	fputc('\n', stderr);
	1823	exit(1);
	1824	}
	1825
	1826	#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
	1827
	1828	int main(int argc, char **argv)
	1829	{
	1830	char *buf;
	1831	int line = 0;
	1832	int passes = 0, fails = 0;
	1833
	1834	while ((buf = fgetline(stdin)) != NULL) {
	1835	int maxlen = strlen(buf);
	1836	unsigned char *data = snewn(maxlen, unsigned char);
f84f1e46	1837	unsigned char ptrs[5], q;
f3c29e34	1838	int ptrnum;
	1839	char *bufp = buf;
	1840
	1841	line++;
	1842
	1843	q = data;
	1844	ptrnum = 0;
	1845
f84f1e46	1846	while (bufp && !isspace((unsigned char)bufp))
	1847	bufp++;
	1848	if (bufp)
	1849	*bufp++ = '\0';
	1850
f3c29e34	1851	while (*bufp) {
	1852	char start, end;
	1853	int i;
	1854
	1855	while (bufp && !isxdigit((unsigned char)bufp))
	1856	bufp++;
	1857	start = bufp;
	1858
	1859	if (!*bufp)
	1860	break;
	1861
	1862	while (bufp && isxdigit((unsigned char)bufp))
	1863	bufp++;
	1864	end = bufp;
	1865
	1866	if (ptrnum >= lenof(ptrs))
	1867	break;
	1868	ptrs[ptrnum++] = q;
	1869
	1870	for (i = -((end - start) & 1); i < end-start; i += 2) {
	1871	unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
	1872	val = val * 16 + fromxdigit(start[i+1]);
	1873	*q++ = val;
	1874	}
	1875
	1876	ptrs[ptrnum] = q;
	1877	}
	1878
f84f1e46	1879	if (!strcmp(buf, "mul")) {
	1880	Bignum a, b, c, p;
	1881
	1882	if (ptrnum != 3) {
f6939e2b	1883	printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
f84f1e46	1884	exit(1);
	1885	}
	1886	a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1887	b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1888	c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1889	p = bigmul(a, b);
f3c29e34	1890
	1891	if (bignum_cmp(c, p) == 0) {
	1892	passes++;
	1893	} else {
	1894	char *as = bignum_decimal(a);
	1895	char *bs = bignum_decimal(b);
	1896	char *cs = bignum_decimal(c);
	1897	char *ps = bignum_decimal(p);
	1898
	1899	printf("%d: fail: %s * %s gave %s expected %s\n",
	1900	line, as, bs, ps, cs);
	1901	fails++;
	1902
	1903	sfree(as);
	1904	sfree(bs);
	1905	sfree(cs);
	1906	sfree(ps);
	1907	}
	1908	freebn(a);
	1909	freebn(b);
	1910	freebn(c);
	1911	freebn(p);
5064e5e6	1912	} else if (!strcmp(buf, "modmul")) {
	1913	Bignum a, b, m, c, p;
	1914
	1915	if (ptrnum != 4) {
	1916	printf("%d: modmul with %d parameters, expected 4\n",
	1917	line, ptrnum);
	1918	exit(1);
	1919	}
	1920	a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1921	b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1922	m = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1923	c = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
	1924	p = modmul(a, b, m);
	1925
	1926	if (bignum_cmp(c, p) == 0) {
	1927	passes++;
	1928	} else {
	1929	char *as = bignum_decimal(a);
	1930	char *bs = bignum_decimal(b);
	1931	char *ms = bignum_decimal(m);
	1932	char *cs = bignum_decimal(c);
	1933	char *ps = bignum_decimal(p);
	1934
	1935	printf("%d: fail: %s * %s mod %s gave %s expected %s\n",
	1936	line, as, bs, ms, ps, cs);
	1937	fails++;
	1938
	1939	sfree(as);
	1940	sfree(bs);
	1941	sfree(ms);
	1942	sfree(cs);
	1943	sfree(ps);
	1944	}
	1945	freebn(a);
	1946	freebn(b);
	1947	freebn(m);
	1948	freebn(c);
	1949	freebn(p);
f84f1e46	1950	} else if (!strcmp(buf, "pow")) {
	1951	Bignum base, expt, modulus, expected, answer;
	1952
	1953	if (ptrnum != 4) {
f6939e2b	1954	printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);
f84f1e46	1955	exit(1);
	1956	}
	1957
	1958	base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
	1959	expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
	1960	modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
	1961	expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
	1962	answer = modpow(base, expt, modulus);
	1963
	1964	if (bignum_cmp(expected, answer) == 0) {
	1965	passes++;
	1966	} else {
	1967	char *as = bignum_decimal(base);
	1968	char *bs = bignum_decimal(expt);
	1969	char *cs = bignum_decimal(modulus);
	1970	char *ds = bignum_decimal(answer);
	1971	char *ps = bignum_decimal(expected);
	1972
	1973	printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
	1974	line, as, bs, cs, ds, ps);
	1975	fails++;
	1976
	1977	sfree(as);
	1978	sfree(bs);
	1979	sfree(cs);
	1980	sfree(ds);
	1981	sfree(ps);
	1982	}
	1983	freebn(base);
	1984	freebn(expt);
	1985	freebn(modulus);
	1986	freebn(expected);
	1987	freebn(answer);
	1988	} else {
	1989	printf("%d: unrecognised test keyword: '%s'\n", line, buf);
	1990	exit(1);
f3c29e34	1991	}
f84f1e46	1992
f3c29e34	1993	sfree(buf);
	1994	sfree(data);
	1995	}
	1996
	1997	printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
	1998	return fails != 0;
	1999	}
	2000
	2001	#endif