Joris van Rantwijk's RSA speedup patch
[u/mdw/putty] / sshrsa.c
1 /*
2 * RSA implementation just sufficient for ssh client-side
3 * initialisation step
4 * Modified by Joris, Jun 1999.
5 */
6
7 #define JORIS_RSA
8
9 /*#include <windows.h>
10 #define RSADEBUG
11 #define DLVL 2
12 #include "stel.h"*/
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17
18 #include "ssh.h"
19
20 typedef unsigned short *Bignum;
21
22 static unsigned short Zero[1] = { 0 };
23
24 #if defined TESTMODE || defined RSADEBUG
25 #ifndef DLVL
26 #define DLVL 10000
27 #endif
28 #define debug(x) bndebug(#x,x)
29 static int level = 0;
30 static void bndebug(char *name, Bignum b) {
31 int i;
32 int w = 50-level-strlen(name)-5*b[0];
33 if (level >= DLVL)
34 return;
35 if (w < 0) w = 0;
36 dprintf("%*s%s%*s", level, "", name, w, "");
37 for (i=b[0]; i>0; i--)
38 dprintf(" %04x", b[i]);
39 dprintf("\n");
40 }
41 #define dmsg(x) do {if(level<DLVL){dprintf("%*s",level,"");printf x;}} while(0)
42 #define enter(x) do { dmsg(x); level += 4; } while(0)
43 #define leave(x) do { level -= 4; dmsg(x); } while(0)
44 #else
45 #define debug(x)
46 #define dmsg(x)
47 #define enter(x)
48 #define leave(x)
49 #endif
50
51 static Bignum newbn(int length) {
52 Bignum b = malloc((length+1)*sizeof(unsigned short));
53 if (!b)
54 abort(); /* FIXME */
55 b[0] = length;
56 return b;
57 }
58
59 static void freebn(Bignum b) {
60 free(b);
61 }
62
63 #ifdef JORIS_RSA
64
65 /*
66 * Compute c = a * b.
67 * Input is in the first len words of a and b.
68 * Result is returned in the first 2*len words of c.
69 */
70 static void bigmul(unsigned short *a, unsigned short *b, unsigned short *c,
71 int len)
72 {
73 int i, j;
74 unsigned long ai, t;
75
76 for (j = len - 1; j >= 0; j--)
77 c[j+len] = 0;
78
79 for (i = len - 1; i >= 0; i--) {
80 ai = a[i];
81 t = 0;
82 for (j = len - 1; j >= 0; j--) {
83 t += ai * (unsigned long) b[j];
84 t += (unsigned long) c[i+j+1];
85 c[i+j+1] = t;
86 t = t >> 16;
87 }
88 c[i] = t;
89 }
90 }
91
92
93 /*
94 * Compute a = a % m.
95 * Input in first 2*len words of a and first len words of m.
96 * Output in first 2*len words of a (of which first len words will be zero).
97 * The MSW of m MUST have its high bit set.
98 */
99 static void bigmod(unsigned short *a, unsigned short *m, int len)
100 {
101 unsigned short m0, m1;
102 unsigned int h;
103 int i, k;
104
105 /* Special case for len == 1 */
106 if (len == 1) {
107 a[1] = (((long) a[0] << 16) + a[1]) % m[0];
108 a[0] = 0;
109 return;
110 }
111
112 m0 = m[0];
113 m1 = m[1];
114
115 for (i = 0; i <= len; i++) {
116 unsigned long t;
117 unsigned int q, r, c;
118
119 if (i == 0) {
120 h = 0;
121 } else {
122 h = a[i-1];
123 a[i-1] = 0;
124 }
125
126 /* Find q = h:a[i] / m0 */
127 t = ((unsigned long) h << 16) + a[i];
128 q = t / m0;
129 r = t % m0;
130
131 /* Refine our estimate of q by looking at
132 h:a[i]:a[i+1] / m0:m1 */
133 t = (long) m1 * (long) q;
134 if (t > ((unsigned long) r << 16) + a[i+1]) {
135 q--;
136 t -= m1;
137 r = (r + m0) & 0xffff; /* overflow? */
138 if (r >= m0 && t > ((unsigned long) r << 16) + a[i+1])
139 q--;
140 }
141
142 /* Substract q * m from a[i...] */
143 c = 0;
144 for (k = len - 1; k >= 0; k--) {
145 t = (long) q * (long) m[k];
146 t += c;
147 c = t >> 16;
148 if ((unsigned short) t > a[i+k]) c++;
149 a[i+k] -= (unsigned short) t;
150 }
151
152 /* Add back m in case of borrow */
153 if (c != h) {
154 t = 0;
155 for (k = len - 1; k >= 0; k--) {
156 t += m[k];
157 t += a[i+k];
158 a[i+k] = t;
159 t = t >> 16;
160 }
161 }
162 }
163 }
164
165
166 /*
167 * Compute (base ^ exp) % mod.
168 * The base MUST be smaller than the modulus.
169 * The most significant word of mod MUST be non-zero.
170 * We assume that the result array is the same size as the mod array.
171 */
172 static void modpow(Bignum base, Bignum exp, Bignum mod, Bignum result)
173 {
174 unsigned short *a, *b, *n, *m;
175 int mshift;
176 int mlen, i, j;
177
178 /* Allocate m of size mlen, copy mod to m */
179 /* We use big endian internally */
180 mlen = mod[0];
181 m = malloc(mlen * sizeof(unsigned short));
182 for (j = 0; j < mlen; j++) m[j] = mod[mod[0] - j];
183
184 /* Shift m left to make msb bit set */
185 for (mshift = 0; mshift < 15; mshift++)
186 if ((m[0] << mshift) & 0x8000) break;
187 if (mshift) {
188 for (i = 0; i < mlen - 1; i++)
189 m[i] = (m[i] << mshift) | (m[i+1] >> (16-mshift));
190 m[mlen-1] = m[mlen-1] << mshift;
191 }
192
193 /* Allocate n of size mlen, copy base to n */
194 n = malloc(mlen * sizeof(unsigned short));
195 i = mlen - base[0];
196 for (j = 0; j < i; j++) n[j] = 0;
197 for (j = 0; j < base[0]; j++) n[i+j] = base[base[0] - j];
198
199 /* Allocate a and b of size 2*mlen. Set a = 1 */
200 a = malloc(2 * mlen * sizeof(unsigned short));
201 b = malloc(2 * mlen * sizeof(unsigned short));
202 for (i = 0; i < 2*mlen; i++) a[i] = 0;
203 a[2*mlen-1] = 1;
204
205 /* Skip leading zero bits of exp. */
206 i = 0; j = 15;
207 while (i < exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
208 j--;
209 if (j < 0) { i++; j = 15; }
210 }
211
212 /* Main computation */
213 while (i < exp[0]) {
214 while (j >= 0) {
215 bigmul(a + mlen, a + mlen, b, mlen);
216 bigmod(b, m, mlen);
217 if ((exp[exp[0] - i] & (1 << j)) != 0) {
218 bigmul(b + mlen, n, a, mlen);
219 bigmod(a, m, mlen);
220 } else {
221 unsigned short *t;
222 t = a; a = b; b = t;
223 }
224 j--;
225 }
226 i++; j = 15;
227 }
228
229 /* Fixup result in case the modulus was shifted */
230 if (mshift) {
231 for (i = mlen - 1; i < 2*mlen - 1; i++)
232 a[i] = (a[i] << mshift) | (a[i+1] >> (16-mshift));
233 a[2*mlen-1] = a[2*mlen-1] << mshift;
234 bigmod(a, m, mlen);
235 for (i = 2*mlen - 1; i >= mlen; i--)
236 a[i] = (a[i] >> mshift) | (a[i-1] << (16-mshift));
237 }
238
239 /* Copy result to buffer */
240 for (i = 0; i < mlen; i++)
241 result[result[0] - i] = a[i+mlen];
242
243 /* Free temporary arrays */
244 for (i = 0; i < 2*mlen; i++) a[i] = 0; free(a);
245 for (i = 0; i < 2*mlen; i++) b[i] = 0; free(b);
246 for (i = 0; i < mlen; i++) m[i] = 0; free(m);
247 for (i = 0; i < mlen; i++) n[i] = 0; free(n);
248 }
249
250 #else
251
252 static int msb(Bignum r) {
253 int i;
254 int j;
255 unsigned short n;
256
257 for (i=r[0]; i>0; i--)
258 if (r[i])
259 break;
260
261 j = (i-1)*16;
262 n = r[i];
263 if (n & 0xFF00) j += 8, n >>= 8;
264 if (n & 0x00F0) j += 4, n >>= 4;
265 if (n & 0x000C) j += 2, n >>= 2;
266 if (n & 0x0002) j += 1, n >>= 1;
267
268 return j;
269 }
270
271 static void add(Bignum r1, Bignum r2, Bignum result) {
272 int i;
273 long stuff = 0;
274
275 enter((">add\n"));
276 debug(r1);
277 debug(r2);
278
279 for (i = 1 ;; i++) {
280 if (i <= r1[0])
281 stuff += r1[i];
282 if (i <= r2[0])
283 stuff += r2[i];
284 if (i <= result[0])
285 result[i] = stuff & 0xFFFFU;
286 if (i > r1[0] && i > r2[0] && i >= result[0])
287 break;
288 stuff >>= 16;
289 }
290
291 debug(result);
292 leave(("<add\n"));
293 }
294
295 static void sub(Bignum r1, Bignum r2, Bignum result) {
296 int i;
297 long stuff = 0;
298
299 enter((">sub\n"));
300 debug(r1);
301 debug(r2);
302
303 for (i = 1 ;; i++) {
304 if (i <= r1[0])
305 stuff += r1[i];
306 if (i <= r2[0])
307 stuff -= r2[i];
308 if (i <= result[0])
309 result[i] = stuff & 0xFFFFU;
310 if (i > r1[0] && i > r2[0] && i >= result[0])
311 break;
312 stuff = stuff<0 ? -1 : 0;
313 }
314
315 debug(result);
316 leave(("<sub\n"));
317 }
318
319 static int ge(Bignum r1, Bignum r2) {
320 int i;
321
322 enter((">ge\n"));
323 debug(r1);
324 debug(r2);
325
326 if (r1[0] < r2[0])
327 i = r2[0];
328 else
329 i = r1[0];
330
331 while (i > 0) {
332 unsigned short n1 = (i > r1[0] ? 0 : r1[i]);
333 unsigned short n2 = (i > r2[0] ? 0 : r2[i]);
334
335 if (n1 > n2) {
336 dmsg(("greater\n"));
337 leave(("<ge\n"));
338 return 1; /* r1 > r2 */
339 } else if (n1 < n2) {
340 dmsg(("less\n"));
341 leave(("<ge\n"));
342 return 0; /* r1 < r2 */
343 }
344
345 i--;
346 }
347
348 dmsg(("equal\n"));
349 leave(("<ge\n"));
350 return 1; /* r1 = r2 */
351 }
352
353 static void modmult(Bignum r1, Bignum r2, Bignum modulus, Bignum result) {
354 Bignum temp = newbn(modulus[0]+1);
355 Bignum tmp2 = newbn(modulus[0]+1);
356 int i;
357 int bit, bits, digit, smallbit;
358
359 enter((">modmult\n"));
360 debug(r1);
361 debug(r2);
362 debug(modulus);
363
364 for (i=1; i<=result[0]; i++)
365 result[i] = 0; /* result := 0 */
366 for (i=1; i<=temp[0]; i++)
367 temp[i] = (i > r2[0] ? 0 : r2[i]); /* temp := r2 */
368
369 bits = 1+msb(r1);
370
371 for (bit = 0; bit < bits; bit++) {
372 digit = 1 + bit / 16;
373 smallbit = bit % 16;
374
375 debug(temp);
376 if (digit <= r1[0] && (r1[digit] & (1<<smallbit))) {
377 dmsg(("bit %d\n", bit));
378 add(temp, result, tmp2);
379 if (ge(tmp2, modulus))
380 sub(tmp2, modulus, result);
381 else
382 add(tmp2, Zero, result);
383 debug(result);
384 }
385
386 add(temp, temp, tmp2);
387 if (ge(tmp2, modulus))
388 sub(tmp2, modulus, temp);
389 else
390 add(tmp2, Zero, temp);
391 }
392
393 freebn(temp);
394 freebn(tmp2);
395
396 debug(result);
397 leave(("<modmult\n"));
398 }
399
400 static void modpow(Bignum r1, Bignum r2, Bignum modulus, Bignum result) {
401 Bignum temp = newbn(modulus[0]+1);
402 Bignum tmp2 = newbn(modulus[0]+1);
403 int i;
404 int bit, bits, digit, smallbit;
405
406 enter((">modpow\n"));
407 debug(r1);
408 debug(r2);
409 debug(modulus);
410
411 for (i=1; i<=result[0]; i++)
412 result[i] = (i==1); /* result := 1 */
413 for (i=1; i<=temp[0]; i++)
414 temp[i] = (i > r1[0] ? 0 : r1[i]); /* temp := r1 */
415
416 bits = 1+msb(r2);
417
418 for (bit = 0; bit < bits; bit++) {
419 digit = 1 + bit / 16;
420 smallbit = bit % 16;
421
422 debug(temp);
423 if (digit <= r2[0] && (r2[digit] & (1<<smallbit))) {
424 dmsg(("bit %d\n", bit));
425 modmult(temp, result, modulus, tmp2);
426 add(tmp2, Zero, result);
427 debug(result);
428 }
429
430 modmult(temp, temp, modulus, tmp2);
431 add(tmp2, Zero, temp);
432 }
433
434 freebn(temp);
435 freebn(tmp2);
436
437 debug(result);
438 leave(("<modpow\n"));
439 }
440
441 #endif
442
443 int makekey(unsigned char *data, struct RSAKey *result,
444 unsigned char **keystr) {
445 unsigned char *p = data;
446 Bignum bn[2];
447 int i, j;
448 int w, b;
449
450 result->bits = 0;
451 for (i=0; i<4; i++)
452 result->bits = (result->bits << 8) + *p++;
453
454 for (j=0; j<2; j++) {
455
456 w = 0;
457 for (i=0; i<2; i++)
458 w = (w << 8) + *p++;
459
460 result->bytes = b = (w+7)/8; /* bits -> bytes */
461 w = (w+15)/16; /* bits -> words */
462
463 bn[j] = newbn(w);
464
465 if (keystr) *keystr = p; /* point at key string, second time */
466
467 for (i=1; i<=w; i++)
468 bn[j][i] = 0;
469 for (i=0; i<b; i++) {
470 unsigned char byte = *p++;
471 if ((b-i) & 1)
472 bn[j][w-i/2] |= byte;
473 else
474 bn[j][w-i/2] |= byte<<8;
475 }
476
477 debug(bn[j]);
478
479 }
480
481 result->exponent = bn[0];
482 result->modulus = bn[1];
483
484 return p - data;
485 }
486
487 void rsaencrypt(unsigned char *data, int length, struct RSAKey *key) {
488 Bignum b1, b2;
489 int w, i;
490 unsigned char *p;
491
492 debug(key->exponent);
493
494 memmove(data+key->bytes-length, data, length);
495 data[0] = 0;
496 data[1] = 2;
497
498 for (i = 2; i < key->bytes-length-1; i++) {
499 do {
500 data[i] = random_byte();
501 } while (data[i] == 0);
502 }
503 data[key->bytes-length-1] = 0;
504
505 w = (key->bytes+1)/2;
506
507 b1 = newbn(w);
508 b2 = newbn(w);
509
510 p = data;
511 for (i=1; i<=w; i++)
512 b1[i] = 0;
513 for (i=0; i<key->bytes; i++) {
514 unsigned char byte = *p++;
515 if ((key->bytes-i) & 1)
516 b1[w-i/2] |= byte;
517 else
518 b1[w-i/2] |= byte<<8;
519 }
520
521 debug(b1);
522
523 modpow(b1, key->exponent, key->modulus, b2);
524
525 debug(b2);
526
527 p = data;
528 for (i=0; i<key->bytes; i++) {
529 unsigned char b;
530 if (i & 1)
531 b = b2[w-i/2] & 0xFF;
532 else
533 b = b2[w-i/2] >> 8;
534 *p++ = b;
535 }
536
537 freebn(b1);
538 freebn(b2);
539 }
540
541 int rsastr_len(struct RSAKey *key) {
542 Bignum md, ex;
543
544 md = key->modulus;
545 ex = key->exponent;
546 return 4 * (ex[0]+md[0]) + 10;
547 }
548
549 void rsastr_fmt(char *str, struct RSAKey *key) {
550 Bignum md, ex;
551 int len = 0, i;
552
553 md = key->modulus;
554 ex = key->exponent;
555
556 for (i=1; i<=ex[0]; i++) {
557 sprintf(str+len, "%04x", ex[i]);
558 len += strlen(str+len);
559 }
560 str[len++] = '/';
561 for (i=1; i<=md[0]; i++) {
562 sprintf(str+len, "%04x", md[i]);
563 len += strlen(str+len);
564 }
565 str[len] = '\0';
566 }
567
568 #ifdef TESTMODE
569
570 #ifndef NODDY
571 #define p1 10007
572 #define p2 10069
573 #define p3 10177
574 #else
575 #define p1 3
576 #define p2 7
577 #define p3 13
578 #endif
579
580 unsigned short P1[2] = { 1, p1 };
581 unsigned short P2[2] = { 1, p2 };
582 unsigned short P3[2] = { 1, p3 };
583 unsigned short bigmod[5] = { 4, 0, 0, 0, 32768U };
584 unsigned short mod[5] = { 4, 0, 0, 0, 0 };
585 unsigned short a[5] = { 4, 0, 0, 0, 0 };
586 unsigned short b[5] = { 4, 0, 0, 0, 0 };
587 unsigned short c[5] = { 4, 0, 0, 0, 0 };
588 unsigned short One[2] = { 1, 1 };
589 unsigned short Two[2] = { 1, 2 };
590
591 int main(void) {
592 modmult(P1, P2, bigmod, a); debug(a);
593 modmult(a, P3, bigmod, mod); debug(mod);
594
595 sub(P1, One, a); debug(a);
596 sub(P2, One, b); debug(b);
597 modmult(a, b, bigmod, c); debug(c);
598 sub(P3, One, a); debug(a);
599 modmult(a, c, bigmod, b); debug(b);
600
601 modpow(Two, b, mod, a); debug(a);
602
603 return 0;
604 }
605
606 #endif