git.distorted.org.uk Git - u/mdw/putty/blob - sshbn.c

   1 /*
   2  * Bignum routines for RSA and DH and stuff.
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <stdlib.h>
   8 #include <string.h>
   9
  10 #include "misc.h"
  11 #include "bn-internal.h"
  12 #include "ssh.h"
  13
  14 BignumInt bnZero[1] = { 0 };
  15 BignumInt bnOne[2] = { 1, 1 };
  16
  17 /*
  18  * The Bignum format is an array of `BignumInt'. The first
  19  * element of the array counts the remaining elements. The
  20  * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
  21  * significant digit first. (So it's trivial to extract the bit
  22  * with value 2^n for any n.)
  23  *
  24  * All Bignums in this module are positive. Negative numbers must
  25  * be dealt with outside it.
  26  *
  27  * INVARIANT: the most significant word of any Bignum must be
  28  * nonzero.
  29  */
  30
  31 Bignum Zero = bnZero, One = bnOne;
  32
  33 static Bignum newbn(int length)
  34 {
  35     Bignum b = snewn(length + 1, BignumInt);
  36     if (!b)
  37         abort();                       /* FIXME */
  38     memset(b, 0, (length + 1) * sizeof(*b));
  39     b[0] = length;
  40     return b;
  41 }
  42
  43 void bn_restore_invariant(Bignum b)
  44 {
  45     while (b[0] > 1 && b[b[0]] == 0)
  46         b[0]--;
  47 }
  48
  49 Bignum copybn(Bignum orig)
  50 {
  51     Bignum b = snewn(orig[0] + 1, BignumInt);
  52     if (!b)
  53         abort();                       /* FIXME */
  54     memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
  55     return b;
  56 }
  57
  58 void freebn(Bignum b)
  59 {
  60     /*
  61      * Burn the evidence, just in case.
  62      */
  63     smemclr(b, sizeof(b[0]) * (b[0] + 1));
  64     sfree(b);
  65 }
  66
  67 Bignum bn_power_2(int n)
  68 {
  69     Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
  70     bignum_set_bit(ret, n, 1);
  71     return ret;
  72 }
  73
  74 /*
  75  * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
  76  * little-endian arrays of 'len' BignumInts. Returns a BignumInt carried
  77  * off the top.
  78  */
  79 static BignumInt internal_add(const BignumInt *a, const BignumInt *b,
  80                               BignumInt *c, int len)
  81 {
  82     int i;
  83     BignumDblInt carry = 0;
  84
  85     for (i = 0; i < len; i++) {
  86         carry += (BignumDblInt)a[i] + b[i];
  87         c[i] = (BignumInt)carry;
  88         carry >>= BIGNUM_INT_BITS;
  89     }
  90
  91     return (BignumInt)carry;
  92 }
  93
  94 /*
  95  * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
  96  * all little-endian arrays of 'len' BignumInts. Any borrow from the top
  97  * is ignored.
  98  */
  99 static void internal_sub(const BignumInt *a, const BignumInt *b,
 100                          BignumInt *c, int len)
 101 {
 102     int i;
 103     BignumDblInt carry = 1;
 104
 105     for (i = 0; i < len; i++) {
 106         carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
 107         c[i] = (BignumInt)carry;
 108         carry >>= BIGNUM_INT_BITS;
 109     }
 110 }
 111
 112 /*
 113  * Compute c = a * b.
 114  * Input is in the first len words of a and b.
 115  * Result is returned in the first 2*len words of c.
 116  *
 117  * 'scratch' must point to an array of BignumInt of size at least
 118  * mul_compute_scratch(len). (This covers the needs of internal_mul
 119  * and all its recursive calls to itself.)
 120  */
 121 #define KARATSUBA_THRESHOLD 50
 122 static int mul_compute_scratch(int len)
 123 {
 124     int ret = 0;
 125     while (len > KARATSUBA_THRESHOLD) {
 126         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 127         int midlen = botlen + 1;
 128         ret += 4*midlen;
 129         len = midlen;
 130     }
 131     return ret;
 132 }
 133 static void internal_mul(const BignumInt *a, const BignumInt *b,
 134                          BignumInt *c, int len, BignumInt *scratch)
 135 {
 136     if (len > KARATSUBA_THRESHOLD) {
 137         int i;
 138
 139         /*
 140          * Karatsuba divide-and-conquer algorithm. Cut each input in
 141          * half, so that it's expressed as two big 'digits' in a giant
 142          * base D:
 143          *
 144          *   a = a_1 D + a_0
 145          *   b = b_1 D + b_0
 146          *
 147          * Then the product is of course
 148          *
 149          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
 150          *
 151          * and we compute the three coefficients by recursively
 152          * calling ourself to do half-length multiplications.
 153          *
 154          * The clever bit that makes this worth doing is that we only
 155          * need _one_ half-length multiplication for the central
 156          * coefficient rather than the two that it obviouly looks
 157          * like, because we can use a single multiplication to compute
 158          *
 159          *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
 160          *
 161          * and then we subtract the other two coefficients (a_1 b_1
 162          * and a_0 b_0) which we were computing anyway.
 163          *
 164          * Hence we get to multiply two numbers of length N in about
 165          * three times as much work as it takes to multiply numbers of
 166          * length N/2, which is obviously better than the four times
 167          * as much work it would take if we just did a long
 168          * conventional multiply.
 169          */
 170
 171         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 172         int midlen = botlen + 1;
 173         BignumDblInt carry;
 174
 175         /*
 176          * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
 177          * in the output array, so we can compute them immediately in
 178          * place.
 179          */
 180
 181 #ifdef KARA_DEBUG
 182         printf("a1,a0 = 0x");
 183         for (i = 0; i < len; i++) {
 184             if (i == toplen) printf(", 0x");
 185             printf("%0*x", BIGNUM_INT_BITS/4, a[len - 1 - i]);
 186         }
 187         printf("\n");
 188         printf("b1,b0 = 0x");
 189         for (i = 0; i < len; i++) {
 190             if (i == toplen) printf(", 0x");
 191             printf("%0*x", BIGNUM_INT_BITS/4, b[len - 1 - i]);
 192         }
 193         printf("\n");
 194 #endif
 195
 196         /* a_1 b_1 */
 197         internal_mul(a + botlen, b + botlen, c + 2*botlen, toplen, scratch);
 198 #ifdef KARA_DEBUG
 199         printf("a1b1 = 0x");
 200         for (i = 0; i < 2*toplen; i++) {
 201             printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - 1 - i]);
 202         }
 203         printf("\n");
 204 #endif
 205
 206         /* a_0 b_0 */
 207         internal_mul(a, b, c, botlen, scratch);
 208 #ifdef KARA_DEBUG
 209         printf("a0b0 = 0x");
 210         for (i = 0; i < 2*botlen; i++) {
 211             printf("%0*x", BIGNUM_INT_BITS/4, c[2*botlen - 1 - i]);
 212         }
 213         printf("\n");
 214 #endif
 215
 216         /* Zero padding. botlen exceeds toplen by at most 1, and we'll set
 217          * the extra carry explicitly below, so we only need to zero at most
 218          * one of the top words here.
 219          */
 220         scratch[midlen - 2] = scratch[2*midlen - 2] = 0;
 221
 222         for (i = 0; i < toplen; i++) {
 223             scratch[i] = a[i + botlen]; /* a_1 */
 224             scratch[midlen + i] = b[i + botlen]; /* b_1 */
 225         }
 226
 227         /* compute a_1 + a_0 */
 228         scratch[midlen - 1] = internal_add(scratch, a, scratch, botlen);
 229 #ifdef KARA_DEBUG
 230         printf("a1plusa0 = 0x");
 231         for (i = 0; i < midlen; i++) {
 232             printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen - 1 - i]);
 233         }
 234         printf("\n");
 235 #endif
 236         /* compute b_1 + b_0 */
 237         scratch[2*midlen - 1] = internal_add(scratch+midlen, b,
 238                                              scratch+midlen, botlen);
 239 #ifdef KARA_DEBUG
 240         printf("b1plusb0 = 0x");
 241         for (i = 0; i < midlen; i++) {
 242             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
 243         }
 244         printf("\n");
 245 #endif
 246
 247         /*
 248          * Now we can do the third multiplication.
 249          */
 250         internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
 251                      scratch + 4*midlen);
 252 #ifdef KARA_DEBUG
 253         printf("a1plusa0timesb1plusb0 = 0x");
 254         for (i = 0; i < 2*midlen; i++) {
 255             printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
 256         }
 257         printf("\n");
 258 #endif
 259
 260         /*
 261          * Now we can reuse the first half of 'scratch' to compute the
 262          * sum of the outer two coefficients, to subtract from that
 263          * product to obtain the middle one.
 264          */
 265         scratch[2*botlen - 2] = scratch[2*botlen - 1] = 0;
 266         for (i = 0; i < 2*toplen; i++)
 267             scratch[i] = c[2*botlen + i];
 268         scratch[2*botlen] = internal_add(scratch, c, scratch, 2*botlen);
 269         scratch[2*botlen + 1] = 0;
 270 #ifdef KARA_DEBUG
 271         printf("a1b1plusa0b0 = 0x");
 272         for (i = 0; i < 2*midlen; i++) {
 273             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
 274         }
 275         printf("\n");
 276 #endif
 277
 278         internal_sub(scratch + 2*midlen, scratch, scratch, 2*midlen);
 279 #ifdef KARA_DEBUG
 280         printf("a1b0plusa0b1 = 0x");
 281         for (i = 0; i < 2*midlen; i++) {
 282             printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
 283         }
 284         printf("\n");
 285 #endif
 286
 287         /*
 288          * And now all we need to do is to add that middle coefficient
 289          * back into the output. We may have to propagate a carry
 290          * further up the output, but we can be sure it won't
 291          * propagate right the way off the top.
 292          */
 293         carry = internal_add(c + botlen, scratch, c + botlen, 2*midlen);
 294         i = botlen + 2*midlen;
 295         while (carry) {
 296             assert(i <= 2*len);
 297             carry += c[i];
 298             c[i] = (BignumInt)carry;
 299             carry >>= BIGNUM_INT_BITS;
 300             i++;
 301         }
 302 #ifdef KARA_DEBUG
 303         printf("ab = 0x");
 304         for (i = 0; i < 2*len; i++) {
 305             printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - i]);
 306         }
 307         printf("\n");
 308 #endif
 309
 310     } else {
 311         int i;
 312         BignumInt carry;
 313         BignumDblInt t;
 314         const BignumInt *ap, *alim = a + len, *bp, *blim = b + len;
 315         BignumInt *cp, *cps;
 316
 317         /*
 318          * Multiply in the ordinary O(N^2) way.
 319          */
 320
 321         for (i = 0; i < 2 * len; i++)
 322             c[i] = 0;
 323
 324         for (cps = c, ap = a; ap < alim; ap++, cps++) {
 325             carry = 0;
 326             for (cp = cps, bp = b, i = blim - bp; i--; bp++, cp++) {
 327                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;
 328                 *cp = (BignumInt) t;
 329                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);
 330             }
 331             *cp = carry;
 332         }
 333     }
 334 }
 335
 336 /*
 337  * Variant form of internal_mul used for the initial step of
 338  * Montgomery reduction. Only bothers outputting 'len' words
 339  * (everything above that is thrown away).
 340  */
 341 static void internal_mul_low(const BignumInt *a, const BignumInt *b,
 342                              BignumInt *c, int len, BignumInt *scratch)
 343 {
 344     if (len > KARATSUBA_THRESHOLD) {
 345         int i;
 346
 347         /*
 348          * Karatsuba-aware version of internal_mul_low. As before, we
 349          * express each input value as a shifted combination of two
 350          * halves:
 351          *
 352          *   a = a_1 D + a_0
 353          *   b = b_1 D + b_0
 354          *
 355          * Then the full product is, as before,
 356          *
 357          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
 358          *
 359          * Provided we choose D on the large side (so that a_0 and b_0
 360          * are _at least_ as long as a_1 and b_1), we don't need the
 361          * topmost term at all, and we only need half of the middle
 362          * term. So there's no point in doing the proper Karatsuba
 363          * optimisation which computes the middle term using the top
 364          * one, because we'd take as long computing the top one as
 365          * just computing the middle one directly.
 366          *
 367          * So instead, we do a much more obvious thing: we call the
 368          * fully optimised internal_mul to compute a_0 b_0, and we
 369          * recursively call ourself to compute the _bottom halves_ of
 370          * a_1 b_0 and a_0 b_1, each of which we add into the result
 371          * in the obvious way.
 372          *
 373          * In other words, there's no actual Karatsuba _optimisation_
 374          * in this function; the only benefit in doing it this way is
 375          * that we call internal_mul proper for a large part of the
 376          * work, and _that_ can optimise its operation.
 377          */
 378
 379         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 380
 381         /*
 382          * Scratch space for the various bits and pieces we're going
 383          * to be adding together: we need botlen*2 words for a_0 b_0
 384          * (though we may end up throwing away its topmost word), and
 385          * toplen words for each of a_1 b_0 and a_0 b_1. That adds up
 386          * to exactly 2*len.
 387          */
 388
 389         /* a_0 b_0 */
 390         internal_mul(a, b, scratch + 2*toplen, botlen, scratch + 2*len);
 391
 392         /* a_1 b_0 */
 393         internal_mul_low(a + botlen, b, scratch + toplen, toplen,
 394                          scratch + 2*len);
 395
 396         /* a_0 b_1 */
 397         internal_mul_low(a, b + botlen, scratch, toplen, scratch + 2*len);
 398
 399         /* Copy the bottom half of the big coefficient into place */
 400         for (i = 0; i < botlen; i++)
 401             c[i] = scratch[2*toplen + i];
 402
 403         /* Add the two small coefficients, throwing away the returned carry */
 404         internal_add(scratch, scratch + toplen, scratch, toplen);
 405
 406         /* And add that to the large coefficient, leaving the result in c. */
 407         internal_add(scratch, scratch + 2*toplen + botlen,
 408                      c + botlen, toplen);
 409
 410     } else {
 411         int i;
 412         BignumInt carry;
 413         BignumDblInt t;
 414         const BignumInt *ap, *alim = a + len, *bp;
 415         BignumInt *cp, *cps, *clim = c + len;
 416
 417         /*
 418          * Multiply in the ordinary O(N^2) way.
 419          */
 420
 421         for (i = 0; i < len; i++)
 422             c[i] = 0;
 423
 424         for (cps = c, ap = a; ap < alim; ap++, cps++) {
 425             carry = 0;
 426             for (cp = cps, bp = b, i = clim - cp; i--; bp++, cp++) {
 427                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;
 428                 *cp = (BignumInt) t;
 429                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);
 430             }
 431         }
 432     }
 433 }
 434
 435 /*
 436  * Montgomery reduction. Expects x to be a little-endian array of 2*len
 437  * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
 438  * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
 439  * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
 440  * x' < n.
 441  *
 442  * 'n' and 'mninv' should be little-endian arrays of 'len' BignumInts
 443  * each, containing respectively n and the multiplicative inverse of
 444  * -n mod r.
 445  *
 446  * 'tmp' is an array of BignumInt used as scratch space, of length at
 447  * least 3*len + mul_compute_scratch(len).
 448  */
 449 static void monty_reduce(BignumInt *x, const BignumInt *n,
 450                          const BignumInt *mninv, BignumInt *tmp, int len)
 451 {
 452     int i;
 453     BignumInt carry;
 454
 455     /*
 456      * Multiply x by (-n)^{-1} mod r. This gives us a value m such
 457      * that mn is congruent to -x mod r. Hence, mn+x is an exact
 458      * multiple of r, and is also (obviously) congruent to x mod n.
 459      */
 460     internal_mul_low(x, mninv, tmp, len, tmp + 3*len);
 461
 462     /*
 463      * Compute t = (mn+x)/r in ordinary, non-modular, integer
 464      * arithmetic. By construction this is exact, and is congruent mod
 465      * n to x * r^{-1}, i.e. the answer we want.
 466      *
 467      * The following multiply leaves that answer in the _most_
 468      * significant half of the 'x' array, so then we must shift it
 469      * down.
 470      */
 471     internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
 472     carry = internal_add(x, tmp+len, x, 2*len);
 473     for (i = 0; i < len; i++)
 474         x[i] = x[len + i], x[len + i] = 0;
 475
 476     /*
 477      * Reduce t mod n. This doesn't require a full-on division by n,
 478      * but merely a test and single optional subtraction, since we can
 479      * show that 0 <= t < 2n.
 480      *
 481      * Proof:
 482      *  + we computed m mod r, so 0 <= m < r.
 483      *  + so 0 <= mn < rn, obviously
 484      *  + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
 485      *  + yielding 0 <= (mn+x)/r < 2n as required.
 486      */
 487     if (!carry) {
 488         for (i = len; i-- > 0; )
 489             if (x[i] != n[i])
 490                 break;
 491     }
 492     if (carry || i < 0 || x[i] > n[i])
 493         internal_sub(x, n, x, len);
 494 }
 495
 496 static void internal_add_shifted(BignumInt *number,
 497                                  unsigned n, int shift)
 498 {
 499     int word = 1 + (shift / BIGNUM_INT_BITS);
 500     int bshift = shift % BIGNUM_INT_BITS;
 501     BignumDblInt addend;
 502
 503     addend = (BignumDblInt)n << bshift;
 504
 505     while (addend) {
 506         addend += number[word];
 507         number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
 508         addend >>= BIGNUM_INT_BITS;
 509         word++;
 510     }
 511 }
 512
 513 /*
 514  * Compute a = a % m.
 515  * Input in first alen words of a and first mlen words of m.
 516  * Output in first alen words of a
 517  * (of which last alen-mlen words will be zero).
 518  * The MSW of m MUST have its high bit set.
 519  * Quotient is accumulated in the `quotient' array. Quotient parts
 520  * are shifted left by `qshift' before adding into quot.
 521  */
 522 static void internal_mod(BignumInt *a, int alen,
 523                          BignumInt *m, int mlen,
 524                          BignumInt *quot, int qshift)
 525 {
 526     BignumInt m0, m1;
 527     unsigned int h;
 528     int i, j, k;
 529
 530     m0 = m[mlen - 1];
 531     if (mlen > 1)
 532         m1 = m[mlen - 2];
 533     else
 534         m1 = 0;
 535
 536     for (i = alen, h = 0; i-- >= mlen; ) {
 537         BignumDblInt t;
 538         unsigned int q, r, c, ai1;
 539
 540         if (i)
 541             ai1 = a[i - 1];
 542         else
 543             ai1 = 0;
 544
 545         /* Find q = h:a[i] / m0 */
 546         if (h >= m0) {
 547             /*
 548              * Special case.
 549              *
 550              * To illustrate it, suppose a BignumInt is 8 bits, and
 551              * we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
 552              * our initial division will be 0xA123 / 0xA1, which
 553              * will give a quotient of 0x100 and a divide overflow.
 554              * However, the invariants in this division algorithm
 555              * are not violated, since the full number A1:23:... is
 556              * _less_ than the quotient prefix A1:B2:... and so the
 557              * following correction loop would have sorted it out.
 558              *
 559              * In this situation we set q to be the largest
 560              * quotient we _can_ stomach (0xFF, of course).
 561              */
 562             q = BIGNUM_INT_MASK;
 563         } else {
 564             /* Macro doesn't want an array subscript expression passed
 565              * into it (see definition), so use a temporary. */
 566             BignumInt tmplo = a[i];
 567             DIVMOD_WORD(q, r, h, tmplo, m0);
 568
 569             /* Refine our estimate of q by looking at
 570              h:a[i]:a[i-1] / m0:m1 */
 571             t = MUL_WORD(m1, q);
 572             if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
 573                 q--;
 574                 t -= m1;
 575                 r = (r + m0) & BIGNUM_INT_MASK;     /* overflow? */
 576                 if (r >= (BignumDblInt) m0 &&
 577                     t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
 578             }
 579         }
 580
 581         j = i + 1 - mlen;
 582
 583         /* Subtract q * m from a[i...] */
 584         c = 0;
 585         for (k = 0; k < mlen; k++) {
 586             t = MUL_WORD(q, m[k]);
 587             t += c;
 588             c = (unsigned)(t >> BIGNUM_INT_BITS);
 589             if ((BignumInt) t > a[j + k])
 590                 c++;
 591             a[j + k] -= (BignumInt) t;
 592         }
 593
 594         /* Add back m in case of borrow */
 595         if (c != h) {
 596             t = 0;
 597             for (k = 0; k < mlen; k++) {
 598                 t += m[k];
 599                 t += a[j + k];
 600                 a[j + k] = (BignumInt) t;
 601                 t = t >> BIGNUM_INT_BITS;
 602             }
 603             q--;
 604         }
 605
 606         if (quot)
 607             internal_add_shifted(quot, q,
 608                                  qshift + BIGNUM_INT_BITS * (i + 1 - mlen));
 609
 610         if (i >= mlen) {
 611             h = a[i];
 612             a[i] = 0;
 613         }
 614     }
 615 }
 616
 617 static void shift_left(BignumInt *x, int xlen, int shift)
 618 {
 619     int i;
 620
 621     if (!shift)
 622         return;
 623     for (i = xlen; --i > 0; )
 624         x[i] = (x[i] << shift) | (x[i - 1] >> (BIGNUM_INT_BITS - shift));
 625     x[0] = x[0] << shift;
 626 }
 627
 628 static void shift_right(BignumInt *x, int xlen, int shift)
 629 {
 630     int i;
 631
 632     if (!shift || !xlen)
 633         return;
 634     xlen--;
 635     for (i = 0; i < xlen; i++)
 636         x[i] = (x[i] >> shift) | (x[i + 1] << (BIGNUM_INT_BITS - shift));
 637     x[i] = x[i] >> shift;
 638 }
 639
 640 /*
 641  * Compute (base ^ exp) % mod, the pedestrian way.
 642  */
 643 Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
 644 {
 645     BignumInt *a, *b, *n, *m, *scratch;
 646     int mshift;
 647     int mlen, scratchlen, i, j;
 648     Bignum base, result;
 649
 650     /*
 651      * The most significant word of mod needs to be non-zero. It
 652      * should already be, but let's make sure.
 653      */
 654     assert(mod[mod[0]] != 0);
 655
 656     /*
 657      * Make sure the base is smaller than the modulus, by reducing
 658      * it modulo the modulus if not.
 659      */
 660     base = bigmod(base_in, mod);
 661
 662     /* Allocate m of size mlen, copy mod to m */
 663     mlen = mod[0];
 664     m = snewn(mlen, BignumInt);
 665     for (j = 0; j < mlen; j++)
 666         m[j] = mod[j + 1];
 667
 668     /* Shift m left to make msb bit set */
 669     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
 670         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
 671             break;
 672     if (mshift)
 673         shift_left(m, mlen, mshift);
 674
 675     /* Allocate n of size mlen, copy base to n */
 676     n = snewn(mlen, BignumInt);
 677     for (i = 0; i < (int)base[0]; i++)
 678         n[i] = base[i + 1];
 679     for (; i < mlen; i++)
 680         n[i] = 0;
 681
 682     /* Allocate a and b of size 2*mlen. Set a = 1 */
 683     a = snewn(2 * mlen, BignumInt);
 684     b = snewn(2 * mlen, BignumInt);
 685     a[0] = 1;
 686     for (i = 1; i < 2 * mlen; i++)
 687         a[i] = 0;
 688
 689     /* Scratch space for multiplies */
 690     scratchlen = mul_compute_scratch(mlen);
 691     scratch = snewn(scratchlen, BignumInt);
 692
 693     /* Skip leading zero bits of exp. */
 694     i = 0;
 695     j = BIGNUM_INT_BITS-1;
 696     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
 697         j--;
 698         if (j < 0) {
 699             i++;
 700             j = BIGNUM_INT_BITS-1;
 701         }
 702     }
 703
 704     /* Main computation */
 705     while (i < (int)exp[0]) {
 706         while (j >= 0) {
 707             internal_mul(a, a, b, mlen, scratch);
 708             internal_mod(b, mlen * 2, m, mlen, NULL, 0);
 709             if ((exp[exp[0] - i] & (1 << j)) != 0) {
 710                 internal_mul(b, n, a, mlen, scratch);
 711                 internal_mod(a, mlen * 2, m, mlen, NULL, 0);
 712             } else {
 713                 BignumInt *t;
 714                 t = a;
 715                 a = b;
 716                 b = t;
 717             }
 718             j--;
 719         }
 720         i++;
 721         j = BIGNUM_INT_BITS-1;
 722     }
 723
 724     /* Fixup result in case the modulus was shifted */
 725     if (mshift) {
 726         shift_left(a, mlen + 1, mshift);
 727         internal_mod(a, mlen + 1, m, mlen, NULL, 0);
 728         shift_right(a, mlen, mshift);
 729     }
 730
 731     /* Copy result to buffer */
 732     result = newbn(mod[0]);
 733     for (i = 0; i < mlen; i++)
 734         result[i + 1] = a[i];
 735     while (result[0] > 1 && result[result[0]] == 0)
 736         result[0]--;
 737
 738     /* Free temporary arrays */
 739     for (i = 0; i < 2 * mlen; i++)
 740         a[i] = 0;
 741     sfree(a);
 742     for (i = 0; i < scratchlen; i++)
 743         scratch[i] = 0;
 744     sfree(scratch);
 745     for (i = 0; i < 2 * mlen; i++)
 746         b[i] = 0;
 747     sfree(b);
 748     for (i = 0; i < mlen; i++)
 749         m[i] = 0;
 750     sfree(m);
 751     for (i = 0; i < mlen; i++)
 752         n[i] = 0;
 753     sfree(n);
 754
 755     freebn(base);
 756
 757     return result;
 758 }
 759
 760 /*
 761  * Compute (base ^ exp) % mod. Uses the Montgomery multiplication
 762  * technique where possible, falling back to modpow_simple otherwise.
 763  */
 764 Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
 765 {
 766     BignumInt *a, *b, *x, *n, *mninv, *scratch;
 767     int len, scratchlen, i, j;
 768     Bignum base, base2, r, rn, inv, result;
 769
 770     /*
 771      * The most significant word of mod needs to be non-zero. It
 772      * should already be, but let's make sure.
 773      */
 774     assert(mod[mod[0]] != 0);
 775
 776     /*
 777      * mod had better be odd, or we can't do Montgomery multiplication
 778      * using a power of two at all.
 779      */
 780     if (!(mod[1] & 1))
 781         return modpow_simple(base_in, exp, mod);
 782
 783     /*
 784      * Make sure the base is smaller than the modulus, by reducing
 785      * it modulo the modulus if not.
 786      */
 787     base = bigmod(base_in, mod);
 788
 789     /*
 790      * Compute the inverse of n mod r, for monty_reduce. (In fact we
 791      * want the inverse of _minus_ n mod r, but we'll sort that out
 792      * below.)
 793      */
 794     len = mod[0];
 795     r = bn_power_2(BIGNUM_INT_BITS * len);
 796     inv = modinv(mod, r);
 797
 798     /*
 799      * Multiply the base by r mod n, to get it into Montgomery
 800      * representation.
 801      */
 802     base2 = modmul(base, r, mod);
 803     freebn(base);
 804     base = base2;
 805
 806     rn = bigmod(r, mod);               /* r mod n, i.e. Montgomerified 1 */
 807
 808     freebn(r);                         /* won't need this any more */
 809
 810     /*
 811      * Set up internal arrays of the right lengths containing the base,
 812      * the modulus, and the modulus's inverse.
 813      */
 814     n = snewn(len, BignumInt);
 815     for (j = 0; j < len; j++)
 816         n[j] = mod[j + 1];
 817
 818     mninv = snewn(len, BignumInt);
 819     for (j = 0; j < len; j++)
 820         mninv[j] = (j < (int)inv[0] ? inv[j + 1] : 0);
 821     freebn(inv);         /* we don't need this copy of it any more */
 822     /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
 823     x = snewn(len, BignumInt);
 824     for (j = 0; j < len; j++)
 825         x[j] = 0;
 826     internal_sub(x, mninv, mninv, len);
 827
 828     /* x = snewn(len, BignumInt); */ /* already done above */
 829     for (j = 0; j < len; j++)
 830         x[j] = (j < (int)base[0] ? base[j + 1] : 0);
 831     freebn(base);        /* we don't need this copy of it any more */
 832
 833     a = snewn(2*len, BignumInt);
 834     b = snewn(2*len, BignumInt);
 835     for (j = 0; j < len; j++)
 836         a[j] = (j < (int)rn[0] ? rn[j + 1] : 0);
 837     freebn(rn);
 838
 839     /* Scratch space for multiplies */
 840     scratchlen = 3*len + mul_compute_scratch(len);
 841     scratch = snewn(scratchlen, BignumInt);
 842
 843     /* Skip leading zero bits of exp. */
 844     i = 0;
 845     j = BIGNUM_INT_BITS-1;
 846     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
 847         j--;
 848         if (j < 0) {
 849             i++;
 850             j = BIGNUM_INT_BITS-1;
 851         }
 852     }
 853
 854     /* Main computation */
 855     while (i < (int)exp[0]) {
 856         while (j >= 0) {
 857             internal_mul(a, a, b, len, scratch);
 858             monty_reduce(b, n, mninv, scratch, len);
 859             if ((exp[exp[0] - i] & (1 << j)) != 0) {
 860                 internal_mul(b, x, a, len,  scratch);
 861                 monty_reduce(a, n, mninv, scratch, len);
 862             } else {
 863                 BignumInt *t;
 864                 t = a;
 865                 a = b;
 866                 b = t;
 867             }
 868             j--;
 869         }
 870         i++;
 871         j = BIGNUM_INT_BITS-1;
 872     }
 873
 874     /*
 875      * Final monty_reduce to get back from the adjusted Montgomery
 876      * representation.
 877      */
 878     monty_reduce(a, n, mninv, scratch, len);
 879
 880     /* Copy result to buffer */
 881     result = newbn(mod[0]);
 882     for (i = 0; i < len; i++)
 883         result[i + 1] = a[i];
 884     while (result[0] > 1 && result[result[0]] == 0)
 885         result[0]--;
 886
 887     /* Free temporary arrays */
 888     for (i = 0; i < scratchlen; i++)
 889         scratch[i] = 0;
 890     sfree(scratch);
 891     for (i = 0; i < 2 * len; i++)
 892         a[i] = 0;
 893     sfree(a);
 894     for (i = 0; i < 2 * len; i++)
 895         b[i] = 0;
 896     sfree(b);
 897     for (i = 0; i < len; i++)
 898         mninv[i] = 0;
 899     sfree(mninv);
 900     for (i = 0; i < len; i++)
 901         n[i] = 0;
 902     sfree(n);
 903     for (i = 0; i < len; i++)
 904         x[i] = 0;
 905     sfree(x);
 906
 907     return result;
 908 }
 909
 910 /*
 911  * Compute (p * q) % mod.
 912  * The most significant word of mod MUST be non-zero.
 913  * We assume that the result array is the same size as the mod array.
 914  */
 915 Bignum modmul(Bignum p, Bignum q, Bignum mod)
 916 {
 917     BignumInt *a, *n, *m, *o, *scratch;
 918     int mshift, scratchlen;
 919     int pqlen, mlen, rlen, i, j;
 920     Bignum result;
 921
 922     /* Allocate m of size mlen, copy mod to m */
 923     mlen = mod[0];
 924     m = snewn(mlen, BignumInt);
 925     for (j = 0; j < mlen; j++)
 926         m[j] = mod[j + 1];
 927
 928     /* Shift m left to make msb bit set */
 929     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
 930         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
 931             break;
 932     if (mshift)
 933         shift_left(m, mlen, mshift);
 934
 935     pqlen = (p[0] > q[0] ? p[0] : q[0]);
 936
 937     /* Make sure that we're allowing enough space.  The shifting below will
 938      * underflow the vectors we allocate if `pqlen' is too small.
 939      */
 940     if (2*pqlen <= mlen)
 941         pqlen = mlen/2 + 1;
 942
 943     /* Allocate n of size pqlen, copy p to n */
 944     n = snewn(pqlen, BignumInt);
 945     for (i = 0; i < (int)p[0]; i++)
 946         n[i] = p[i + 1];
 947     for (; i < pqlen; i++)
 948         n[i] = 0;
 949
 950     /* Allocate o of size pqlen, copy q to o */
 951     o = snewn(pqlen, BignumInt);
 952     for (i = 0; i < (int)q[0]; i++)
 953         o[i] = q[i + 1];
 954     for (; i < pqlen; i++)
 955         o[i] = 0;
 956
 957     /* Allocate a of size 2*pqlen for result */
 958     a = snewn(2 * pqlen, BignumInt);
 959
 960     /* Scratch space for multiplies */
 961     scratchlen = mul_compute_scratch(pqlen);
 962     scratch = snewn(scratchlen, BignumInt);
 963
 964     /* Main computation */
 965     internal_mul(n, o, a, pqlen, scratch);
 966     internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
 967
 968     /* Fixup result in case the modulus was shifted */
 969     if (mshift) {
 970         shift_left(a, mlen + 1, mshift);
 971         internal_mod(a, mlen + 1, m, mlen, NULL, 0);
 972         shift_right(a, mlen, mshift);
 973     }
 974
 975     /* Copy result to buffer */
 976     rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
 977     result = newbn(rlen);
 978     for (i = 0; i < rlen; i++)
 979         result[i + 1] = a[i];
 980     while (result[0] > 1 && result[result[0]] == 0)
 981         result[0]--;
 982
 983     /* Free temporary arrays */
 984     for (i = 0; i < scratchlen; i++)
 985         scratch[i] = 0;
 986     sfree(scratch);
 987     for (i = 0; i < 2 * pqlen; i++)
 988         a[i] = 0;
 989     sfree(a);
 990     for (i = 0; i < mlen; i++)
 991         m[i] = 0;
 992     sfree(m);
 993     for (i = 0; i < pqlen; i++)
 994         n[i] = 0;
 995     sfree(n);
 996     for (i = 0; i < pqlen; i++)
 997         o[i] = 0;
 998     sfree(o);
 999
1000     return result;
1001 }
1002
1003 /*
1004  * Compute p % mod.
1005  * The most significant word of mod MUST be non-zero.
1006  * We assume that the result array is the same size as the mod array.
1007  * We optionally write out a quotient if `quotient' is non-NULL.
1008  * We can avoid writing out the result if `result' is NULL.
1009  */
1010 static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
1011 {
1012     BignumInt *n, *m;
1013     int mshift;
1014     int plen, mlen, i, j;
1015
1016     /* Allocate m of size mlen, copy mod to m */
1017     mlen = mod[0];
1018     m = snewn(mlen, BignumInt);
1019     for (j = 0; j < mlen; j++)
1020         m[j] = mod[j + 1];
1021
1022     /* Shift m left to make msb bit set */
1023     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
1024         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
1025             break;
1026     if (mshift)
1027         shift_left(m, mlen, mshift);
1028
1029     plen = p[0];
1030     /* Ensure plen > mlen */
1031     if (plen <= mlen)
1032         plen = mlen + 1;
1033
1034     /* Allocate n of size plen, copy p to n */
1035     n = snewn(plen, BignumInt);
1036     for (i = 0; i < (int)p[0]; i++)
1037         n[i] = p[i + 1];
1038     for (; i < plen; i++)
1039         n[i] = 0;
1040
1041     /* Main computation */
1042     internal_mod(n, plen, m, mlen, quotient, mshift);
1043
1044     /* Fixup result in case the modulus was shifted */
1045     if (mshift) {
1046         shift_left(n, mlen + 1, mshift);
1047         internal_mod(n, plen, m, mlen, quotient, 0);
1048         shift_right(n, mlen, mshift);
1049     }
1050
1051     /* Copy result to buffer */
1052     if (result) {
1053         for (i = 0; i < (int)result[0]; i++)
1054             result[i + 1] = i < plen ? n[i] : 0;
1055         bn_restore_invariant(result);
1056     }
1057
1058     /* Free temporary arrays */
1059     for (i = 0; i < mlen; i++)
1060         m[i] = 0;
1061     sfree(m);
1062     for (i = 0; i < plen; i++)
1063         n[i] = 0;
1064     sfree(n);
1065 }
1066
1067 /*
1068  * Decrement a number.
1069  */
1070 void decbn(Bignum bn)
1071 {
1072     int i = 1;
1073     while (i < (int)bn[0] && bn[i] == 0)
1074         bn[i++] = BIGNUM_INT_MASK;
1075     bn[i]--;
1076 }
1077
1078 Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
1079 {
1080     Bignum result;
1081     int w, i;
1082
1083     w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
1084
1085     result = newbn(w);
1086     for (i = 1; i <= w; i++)
1087         result[i] = 0;
1088     for (i = nbytes; i--;) {
1089         unsigned char byte = *data++;
1090         result[1 + i / BIGNUM_INT_BYTES] |= byte << (8*i % BIGNUM_INT_BITS);
1091     }
1092
1093     while (result[0] > 1 && result[result[0]] == 0)
1094         result[0]--;
1095     return result;
1096 }
1097
1098 /*
1099  * Read an SSH-1-format bignum from a data buffer. Return the number
1100  * of bytes consumed, or -1 if there wasn't enough data.
1101  */
1102 int ssh1_read_bignum(const unsigned char *data, int len, Bignum * result)
1103 {
1104     const unsigned char *p = data;
1105     int i;
1106     int w, b;
1107
1108     if (len < 2)
1109         return -1;
1110
1111     w = 0;
1112     for (i = 0; i < 2; i++)
1113         w = (w << 8) + *p++;
1114     b = (w + 7) / 8;                   /* bits -> bytes */
1115
1116     if (len < b+2)
1117         return -1;
1118
1119     if (!result)                       /* just return length */
1120         return b + 2;
1121
1122     *result = bignum_from_bytes(p, b);
1123
1124     return p + b - data;
1125 }
1126
1127 /*
1128  * Return the bit count of a bignum, for SSH-1 encoding.
1129  */
1130 int bignum_bitcount(Bignum bn)
1131 {
1132     int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
1133     while (bitcount >= 0
1134            && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
1135     return bitcount + 1;
1136 }
1137
1138 /*
1139  * Return the byte length of a bignum when SSH-1 encoded.
1140  */
1141 int ssh1_bignum_length(Bignum bn)
1142 {
1143     return 2 + (bignum_bitcount(bn) + 7) / 8;
1144 }
1145
1146 /*
1147  * Return the byte length of a bignum when SSH-2 encoded.
1148  */
1149 int ssh2_bignum_length(Bignum bn)
1150 {
1151     return 4 + (bignum_bitcount(bn) + 8) / 8;
1152 }
1153
1154 /*
1155  * Return a byte from a bignum; 0 is least significant, etc.
1156  */
1157 int bignum_byte(Bignum bn, int i)
1158 {
1159     if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
1160         return 0;                      /* beyond the end */
1161     else
1162         return (bn[i / BIGNUM_INT_BYTES + 1] >>
1163                 ((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
1164 }
1165
1166 /*
1167  * Return a bit from a bignum; 0 is least significant, etc.
1168  */
1169 int bignum_bit(Bignum bn, int i)
1170 {
1171     if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
1172         return 0;                      /* beyond the end */
1173     else
1174         return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
1175 }
1176
1177 /*
1178  * Set a bit in a bignum; 0 is least significant, etc.
1179  */
1180 void bignum_set_bit(Bignum bn, int bitnum, int value)
1181 {
1182     if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
1183         abort();                       /* beyond the end */
1184     else {
1185         int v = bitnum / BIGNUM_INT_BITS + 1;
1186         int mask = 1 << (bitnum % BIGNUM_INT_BITS);
1187         if (value)
1188             bn[v] |= mask;
1189         else
1190             bn[v] &= ~mask;
1191     }
1192 }
1193
1194 /*
1195  * Write a SSH-1-format bignum into a buffer. It is assumed the
1196  * buffer is big enough. Returns the number of bytes used.
1197  */
1198 int ssh1_write_bignum(void *data, Bignum bn)
1199 {
1200     unsigned char *p = data;
1201     int len = ssh1_bignum_length(bn);
1202     int i;
1203     int bitc = bignum_bitcount(bn);
1204
1205     *p++ = (bitc >> 8) & 0xFF;
1206     *p++ = (bitc) & 0xFF;
1207     for (i = len - 2; i--;)
1208         *p++ = bignum_byte(bn, i);
1209     return len;
1210 }
1211
1212 /*
1213  * Compare two bignums. Returns like strcmp.
1214  */
1215 int bignum_cmp(Bignum a, Bignum b)
1216 {
1217     int amax = a[0], bmax = b[0];
1218     int i = (amax > bmax ? amax : bmax);
1219     while (i) {
1220         BignumInt aval = (i > amax ? 0 : a[i]);
1221         BignumInt bval = (i > bmax ? 0 : b[i]);
1222         if (aval < bval)
1223             return -1;
1224         if (aval > bval)
1225             return +1;
1226         i--;
1227     }
1228     return 0;
1229 }
1230
1231 /*
1232  * Right-shift one bignum to form another.
1233  */
1234 Bignum bignum_rshift(Bignum a, int shift)
1235 {
1236     Bignum ret;
1237     int i, shiftw, shiftb, shiftbb, bits;
1238     BignumInt ai, ai1;
1239
1240     bits = bignum_bitcount(a) - shift;
1241     ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
1242
1243     if (ret) {
1244         shiftw = shift / BIGNUM_INT_BITS;
1245         shiftb = shift % BIGNUM_INT_BITS;
1246         shiftbb = BIGNUM_INT_BITS - shiftb;
1247
1248         ai1 = a[shiftw + 1];
1249         for (i = 1; i <= (int)ret[0]; i++) {
1250             ai = ai1;
1251             ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
1252             ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK;
1253         }
1254     }
1255
1256     return ret;
1257 }
1258
1259 /*
1260  * Non-modular multiplication and addition.
1261  */
1262 Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
1263 {
1264     int alen = a[0], blen = b[0];
1265     int mlen = (alen > blen ? alen : blen);
1266     int rlen, i, maxspot;
1267     int wslen;
1268     BignumInt *workspace;
1269     Bignum ret;
1270
1271     /* mlen space for a, mlen space for b, 2*mlen for result,
1272      * plus scratch space for multiplication */
1273     wslen = mlen * 4 + mul_compute_scratch(mlen);
1274     workspace = snewn(wslen, BignumInt);
1275     for (i = 0; i < mlen; i++) {
1276         workspace[0 * mlen + i] = i < (int)a[0] ? a[i + 1] : 0;
1277         workspace[1 * mlen + i] = i < (int)b[0] ? b[i + 1] : 0;
1278     }
1279
1280     internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
1281                  workspace + 2 * mlen, mlen, workspace + 4 * mlen);
1282
1283     /* now just copy the result back */
1284     rlen = alen + blen + 1;
1285     if (addend && rlen <= (int)addend[0])
1286         rlen = addend[0] + 1;
1287     ret = newbn(rlen);
1288     maxspot = 0;
1289     for (i = 0; i < (int)ret[0]; i++) {
1290         ret[i + 1] = (i < 2 * mlen ? workspace[2 * mlen + i] : 0);
1291         if (ret[i + 1] != 0)
1292             maxspot = i + 1;
1293     }
1294     ret[0] = maxspot;
1295
1296     /* now add in the addend, if any */
1297     if (addend) {
1298         BignumDblInt carry = 0;
1299         for (i = 1; i <= rlen; i++) {
1300             carry += (i <= (int)ret[0] ? ret[i] : 0);
1301             carry += (i <= (int)addend[0] ? addend[i] : 0);
1302             ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1303             carry >>= BIGNUM_INT_BITS;
1304             if (ret[i] != 0 && i > maxspot)
1305                 maxspot = i;
1306         }
1307     }
1308     ret[0] = maxspot;
1309
1310     for (i = 0; i < wslen; i++)
1311         workspace[i] = 0;
1312     sfree(workspace);
1313     return ret;
1314 }
1315
1316 /*
1317  * Non-modular multiplication.
1318  */
1319 Bignum bigmul(Bignum a, Bignum b)
1320 {
1321     return bigmuladd(a, b, NULL);
1322 }
1323
1324 /*
1325  * Simple addition.
1326  */
1327 Bignum bigadd(Bignum a, Bignum b)
1328 {
1329     int alen = a[0], blen = b[0];
1330     int rlen = (alen > blen ? alen : blen) + 1;
1331     int i, maxspot;
1332     Bignum ret;
1333     BignumDblInt carry;
1334
1335     ret = newbn(rlen);
1336
1337     carry = 0;
1338     maxspot = 0;
1339     for (i = 1; i <= rlen; i++) {
1340         carry += (i <= (int)a[0] ? a[i] : 0);
1341         carry += (i <= (int)b[0] ? b[i] : 0);
1342         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1343         carry >>= BIGNUM_INT_BITS;
1344         if (ret[i] != 0 && i > maxspot)
1345             maxspot = i;
1346     }
1347     ret[0] = maxspot;
1348
1349     return ret;
1350 }
1351
1352 /*
1353  * Subtraction. Returns a-b, or NULL if the result would come out
1354  * negative (recall that this entire bignum module only handles
1355  * positive numbers).
1356  */
1357 Bignum bigsub(Bignum a, Bignum b)
1358 {
1359     int alen = a[0], blen = b[0];
1360     int rlen = (alen > blen ? alen : blen);
1361     int i, maxspot;
1362     Bignum ret;
1363     BignumDblInt carry;
1364
1365     ret = newbn(rlen);
1366
1367     carry = 1;
1368     maxspot = 0;
1369     for (i = 1; i <= rlen; i++) {
1370         carry += (i <= (int)a[0] ? a[i] : 0);
1371         carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
1372         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1373         carry >>= BIGNUM_INT_BITS;
1374         if (ret[i] != 0 && i > maxspot)
1375             maxspot = i;
1376     }
1377     ret[0] = maxspot;
1378
1379     if (!carry) {
1380         freebn(ret);
1381         return NULL;
1382     }
1383
1384     return ret;
1385 }
1386
1387 /*
1388  * Return a bignum which is the result of shifting another left by N bits.
1389  * If N is negative then you get a right shift instead.
1390  */
1391 Bignum biglsl(Bignum x, int n)
1392 {
1393     Bignum d;
1394     unsigned o, i;
1395
1396     if (!n || !x[0])
1397         return copybn(x);
1398     else if (n < 0)
1399         return biglsr(x, -n);
1400
1401     o = n/BIGNUM_INT_BITS;
1402     n %= BIGNUM_INT_BITS;
1403     d = newbn(x[0] + o + !!n);
1404
1405     for (i = 1; i <= o; i++)
1406         d[i] = 0;
1407
1408     if (!n) {
1409         for (i = 1; i <= x[0]; i++)
1410             d[o + i] = x[i];
1411     } else {
1412         d[o + 1] = x[1] << n;
1413         for (i = 2; i <= x[0]; i--)
1414             d[o + i] = (x[i] << n) | (x[i - 1] >> (BIGNUM_INT_BITS - n));
1415         d[o + x[0] + 1] = x[x[0]] >> (BIGNUM_INT_BITS - n);
1416     }
1417
1418     bn_restore_invariant(d);
1419     return d;
1420 }
1421
1422 /*
1423  * Return a bignum which is the result of shifting another right by N bits
1424  * (discarding the least significant N bits, and shifting zeroes in at the
1425  * most significant end).  If N is negative then you get a left shift
1426  * instead.
1427  */
1428 Bignum biglsr(Bignum x, int n)
1429 {
1430     Bignum d;
1431     unsigned o, i;
1432
1433     if (!n || !x[0])
1434         return copybn(x);
1435     else if (n < 0)
1436         return biglsl(x, -n);
1437
1438     o = n/BIGNUM_INT_BITS;
1439     n %= BIGNUM_INT_BITS;
1440     d = newbn(x[0]);
1441
1442     if (!n) {
1443         for (i = o + 1; i <= x[0]; i++)
1444             d[i - o] = x[i];
1445     } else {
1446         d[1] = x[o + 1] >> n;
1447         for (i = o + 2; i < x[0]; i++)
1448             d[i - o] = x[
1449         d[o + x[0] + 1] = x[x[0]] >> (BIGNUM_INT_BITS - n);
1450         for (i = x[0]; i > 1; i--)
1451             d[o + i] = (x[i] << n) | (x[i - 1] >> (BIGNUM_INT_BITS - n));
1452         d[o + 1] = x[1] << n;
1453     }
1454
1455     bn_restore_invariant(d);
1456     return d;
1457 }
1458
1459 /*
1460  * Create a bignum which is the bitmask covering another one. That
1461  * is, the smallest integer which is >= N and is also one less than
1462  * a power of two.
1463  */
1464 Bignum bignum_bitmask(Bignum n)
1465 {
1466     Bignum ret = copybn(n);
1467     int i;
1468     BignumInt j;
1469
1470     i = ret[0];
1471     while (n[i] == 0 && i > 0)
1472         i--;
1473     if (i <= 0)
1474         return ret;                    /* input was zero */
1475     j = 1;
1476     while (j < n[i])
1477         j = 2 * j + 1;
1478     ret[i] = j;
1479     while (--i > 0)
1480         ret[i] = BIGNUM_INT_MASK;
1481     return ret;
1482 }
1483
1484 /*
1485  * Convert a (max 32-bit) long into a bignum.
1486  */
1487 Bignum bignum_from_long(unsigned long nn)
1488 {
1489     Bignum ret;
1490     BignumDblInt n = nn;
1491
1492     ret = newbn(3);
1493     ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
1494     ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
1495     ret[3] = 0;
1496     ret[0] = (ret[2]  ? 2 : 1);
1497     return ret;
1498 }
1499
1500 /*
1501  * Add a long to a bignum.
1502  */
1503 Bignum bignum_add_long(Bignum number, unsigned long addendx)
1504 {
1505     Bignum ret = newbn(number[0] + 1);
1506     int i, maxspot = 0;
1507     BignumDblInt carry = 0, addend = addendx;
1508
1509     for (i = 1; i <= (int)ret[0]; i++) {
1510         carry += addend & BIGNUM_INT_MASK;
1511         carry += (i <= (int)number[0] ? number[i] : 0);
1512         addend >>= BIGNUM_INT_BITS;
1513         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1514         carry >>= BIGNUM_INT_BITS;
1515         if (ret[i] != 0)
1516             maxspot = i;
1517     }
1518     ret[0] = maxspot;
1519     return ret;
1520 }
1521
1522 /*
1523  * Compute the residue of a bignum, modulo a (max 16-bit) short.
1524  */
1525 unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
1526 {
1527     BignumDblInt mod, r;
1528     int i;
1529
1530     r = 0;
1531     mod = modulus;
1532     for (i = number[0]; i > 0; i--)
1533         r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
1534     return (unsigned short) r;
1535 }
1536
1537 #ifdef DEBUG
1538 void diagbn(char *prefix, Bignum md)
1539 {
1540     int i, nibbles, morenibbles;
1541     static const char hex[] = "0123456789ABCDEF";
1542
1543     debug(("%s0x", prefix ? prefix : ""));
1544
1545     nibbles = (3 + bignum_bitcount(md)) / 4;
1546     if (nibbles < 1)
1547         nibbles = 1;
1548     morenibbles = 4 * md[0] - nibbles;
1549     for (i = 0; i < morenibbles; i++)
1550         debug(("-"));
1551     for (i = nibbles; i--;)
1552         debug(("%c",
1553                hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
1554
1555     if (prefix)
1556         debug(("\n"));
1557 }
1558 #endif
1559
1560 /*
1561  * Simple division.
1562  */
1563 Bignum bigdiv(Bignum a, Bignum b)
1564 {
1565     Bignum q = newbn(a[0]);
1566     bigdivmod(a, b, NULL, q);
1567     return q;
1568 }
1569
1570 /*
1571  * Simple remainder.
1572  */
1573 Bignum bigmod(Bignum a, Bignum b)
1574 {
1575     Bignum r = newbn(b[0]);
1576     bigdivmod(a, b, r, NULL);
1577     return r;
1578 }
1579
1580 /*
1581  * Greatest common divisor.
1582  */
1583 Bignum biggcd(Bignum av, Bignum bv)
1584 {
1585     Bignum a = copybn(av);
1586     Bignum b = copybn(bv);
1587
1588     while (bignum_cmp(b, Zero) != 0) {
1589         Bignum t = newbn(b[0]);
1590         bigdivmod(a, b, t, NULL);
1591         while (t[0] > 1 && t[t[0]] == 0)
1592             t[0]--;
1593         freebn(a);
1594         a = b;
1595         b = t;
1596     }
1597
1598     freebn(b);
1599     return a;
1600 }
1601
1602 /*
1603  * Modular inverse, using Euclid's extended algorithm.
1604  */
1605 Bignum modinv(Bignum number, Bignum modulus)
1606 {
1607     Bignum a = copybn(modulus);
1608     Bignum b = copybn(number);
1609     Bignum xp = copybn(Zero);
1610     Bignum x = copybn(One);
1611     int sign = +1;
1612
1613     while (bignum_cmp(b, One) != 0) {
1614         Bignum t = newbn(b[0]);
1615         Bignum q = newbn(a[0]);
1616         bigdivmod(a, b, t, q);
1617         while (t[0] > 1 && t[t[0]] == 0)
1618             t[0]--;
1619         freebn(a);
1620         a = b;
1621         b = t;
1622         t = xp;
1623         xp = x;
1624         x = bigmuladd(q, xp, t);
1625         sign = -sign;
1626         freebn(t);
1627         freebn(q);
1628     }
1629
1630     freebn(b);
1631     freebn(a);
1632     freebn(xp);
1633
1634     /* now we know that sign * x == 1, and that x < modulus */
1635     if (sign < 0) {
1636         /* set a new x to be modulus - x */
1637         Bignum newx = newbn(modulus[0]);
1638         BignumInt carry = 0;
1639         int maxspot = 1;
1640         int i;
1641
1642         for (i = 1; i <= (int)newx[0]; i++) {
1643             BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
1644             BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
1645             newx[i] = aword - bword - carry;
1646             bword = ~bword;
1647             carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
1648             if (newx[i] != 0)
1649                 maxspot = i;
1650         }
1651         newx[0] = maxspot;
1652         freebn(x);
1653         x = newx;
1654     }
1655
1656     /* and return. */
1657     return x;
1658 }
1659
1660 /*
1661  * Extract the largest power of 2 dividing x, storing it in p2, and returning
1662  * the product of the remaining factors.
1663  */
1664 static Bignum extract_p2(Bignum x, unsigned *p2)
1665 {
1666     unsigned i, j, k, n;
1667     Bignum y;
1668
1669     /* If x is zero then the following won't work.  And if x is odd then
1670      * there's nothing very useful to do.
1671      */
1672     if (!x[0] || (x[1] & 1)) {
1673         *p2 = 0;
1674         return copybn(x);
1675     }
1676
1677     /* Find the power of two. */
1678     for (i = 0; !x[i + 1]; i++);
1679     for (j = 0; !((x[i + 1] >> j) & 1); j++);
1680     *p2 = i*BIGNUM_INT_BITS + j;
1681
1682     /* Work out how big the copy should be. */
1683     n = x[0] - i - 1;
1684     if (x[x[0]] >> j) n++;
1685
1686     /* Copy and shift down. */
1687     y = newbn(n);
1688     for (k = 1; k <= n; k++) {
1689         y[k] = x[k + i] >> j;
1690         if (j && k < x[0]) y[k] |= x[k + i + 1] << (BIGNUM_INT_BITS - j);
1691     }
1692
1693     /* Done. */
1694     return y;
1695 }
1696
1697 /*
1698  * Kronecker symbol (a|n).  The result is always in { -1, 0, +1 }, and is
1699  * zero if and only if a and n have a nontrivial common factor.  Most
1700  * usefully, if n is prime, this is the Legendre symbol, taking the value +1
1701  * if a is a quadratic residue mod n, and -1 otherwise; i.e., (a|p) ==
1702  * a^{(p-1)/2} (mod p).
1703  */
1704 int kronecker(Bignum a, Bignum n)
1705 {
1706     unsigned s, nn;
1707     int r = +1;
1708     Bignum t;
1709
1710     /* Special case for n = 0.  This is the same convention PARI uses,
1711      * except that we can't represent negative numbers.
1712      */
1713     if (bignum_cmp(n, Zero) == 0) {
1714         if (bignum_cmp(a, One) == 0) return +1;
1715         else return 0;
1716     }
1717
1718     /* Write n = 2^s t, with t odd.  If s > 0 and a is even, then the answer
1719      * is zero; otherwise throw in a factor of (-1)^s if a == 3 or 5 (mod 8).
1720      *
1721      * At this point, we have a copy of n, and must remember to free it when
1722      * we're done.  It's convenient to take a copy of a at the same time.
1723      */
1724     a = copybn(a);
1725     n = extract_p2(n, &s);
1726
1727     if (s && (!a[0] || !(a[1] & 1))) { r = 0; goto done; }
1728     else if ((s & 1) && ((a[1] & 7) == 3 || (a[1] & 7) == 5)) r = -r;
1729
1730     /* If n is (now) a unit then we're done. */
1731     if (bignum_cmp(n, One) == 0) goto done;
1732
1733     /* Reduce a modulo n before we go any further. */
1734     if (bignum_cmp(a, n) >= 0) { t = bigmod(a, n); freebn(a); a = t; }
1735
1736     /* Main loop. */
1737     for (;;) {
1738         if (bignum_cmp(a, Zero) == 0) { r = 0; goto done; }
1739
1740         /* Strip out and handle powers of two from a. */
1741         t = extract_p2(a, &s); freebn(a); a = t;
1742         nn = n[1] & 7;
1743         if ((s & 1) && (nn == 3 || nn == 5)) r = -r;
1744         if (bignum_cmp(a, One) == 0) break;
1745
1746         /* Swap, applying quadratic reciprocity. */
1747         if ((nn & 3) == 3 && (a[1] & 3) == 3) r = -r;
1748         t = bigmod(n, a); freebn(n); n = a; a = t;
1749     }
1750
1751     /* Tidy up: we're done. */
1752 done:
1753     freebn(a); freebn(n);
1754     return r;
1755 }
1756
1757 /*
1758  * Modular square root.  We must have p prime: extracting square roots modulo
1759  * composites is equivalent to factoring (but we don't check: you'll just get
1760  * the wrong answer).  Returns NULL if x is not a quadratic residue mod p.
1761  */
1762 Bignum modsqrt(Bignum x, Bignum p)
1763 {
1764     Bignum xinv, b, c, r, t, z, X, mone;
1765     unsigned i, j, s;
1766
1767     /* If x is not a quadratic residue then we will not go to space today. */
1768     if (kronecker(x, p) != +1) return NULL;
1769
1770     /* We need a quadratic nonresidue from somewhere.  Exactly half of all
1771      * units mod p are quadratic residues, but no efficient deterministic
1772      * algorithm for finding one is known.  So pick at random: we don't
1773      * expect this to take long.
1774      */
1775     z = newbn(p[0]);
1776     do {
1777         for (i = 1; i <= p[0]; i++) z[i] = rand();
1778         z[0] = p[0]; bn_restore_invariant(z);
1779     } while (kronecker(z, p) != -1);
1780     b = bigmod(z, p); freebn(z);
1781
1782     /* We need to compute a few things before we really get started. */
1783     xinv = modinv(x, p);                /* x^{-1} mod p */
1784     mone = bigsub(p, One);              /* p - 1 == -1 (mod p) */
1785     t = extract_p2(mone, &s);           /* 2^s t = p - 1 */
1786     c = modpow(b, t, p);                /* b^t (mod p) */
1787     z = bigadd(t, One); freebn(t); t = z; /* (t + 1) */
1788     shift_right(t + 1, t[0], 1); if (!t[t[0]]) t[0]--;
1789     r = modpow(x, t, p);                /* x^{(t+1)/2} (mod p) */
1790     freebn(b); freebn(mone); freebn(t);
1791
1792     /* OK, so how does this work anyway?
1793      *
1794      * We know that x^t is somewhere in the order-2^s subgroup of GF(p)^*;
1795      * and g = c^{-1} is a generator for this subgroup (since we know that
1796      * g^{2^{s-1}} = b^{(p-1)/2} = (b|p) = -1); so x^t = g^m for some m.  In
1797      * fact, we know that m is even because x is a square.  Suppose we can
1798      * determine m; then we know that x^t/g^m = 1, so x^{t+1}/c^m = x -- but
1799      * both t + 1 and m are even, so x^{(t+1)/2}/g^{m/2} is a square root of
1800      * x.
1801      *
1802      * Conveniently, finding the discrete log of an element X in a group of
1803      * order 2^s is easy.  Write X = g^m = g^{m_0+2k'}; then X^{2^{s-1}} =
1804      * g^{m_0 2^{s-1}} c^{m' 2^s} = g^{m_0 2^{s-1}} is either -1 or +1,
1805      * telling us that m_0 is 1 or 0 respectively.  Then X/g^{m_0} =
1806      * (g^2)^{m'} has order 2^{s-1} so we can continue inductively.  What we
1807      * end up with at the end is X/g^m.
1808      *
1809      * There are a few wrinkles.  As we proceed through the induction, the
1810      * generator for the subgroup will be c^{-2}, since we know that m is
1811      * even.  While we want the discrete log of X = x^t, we're actually going
1812      * to keep track of r, which will eventually be x^{(t+1)/2}/g^{m/2} =
1813      * x^{(t+1)/2} c^m, recovering X/g^m = r^2/x as we go.  We don't actually
1814      * form the discrete log explicitly, because the final result will
1815      * actually be the square root we want.
1816      */
1817     for (i = 1; i < s; i++) {
1818
1819         /* Determine X.  We could optimize this, only recomputing it when
1820          * it's been invalidated, but that's fiddlier and this isn't
1821          * performance critical.
1822          */
1823         z = modmul(r, r, p);
1824         X = modmul(z, xinv, p);
1825         freebn(z);
1826
1827         /* Determine X^{2^{s-1-i}}. */
1828         for (j = i + 1; j < s; j++)
1829             z = modmul(X, X, p), freebn(X), X = z;
1830
1831         /* Maybe accumulate a factor of c. */
1832         if (bignum_cmp(X, One) != 0)
1833             z = modmul(r, c, p), freebn(r), r = z;
1834
1835         /* Move on to the next smaller subgroup. */
1836         z = modmul(c, c, p), freebn(c), c = z;
1837         freebn(X);
1838     }
1839
1840     /* Of course, there are two square roots of x.  For predictability's sake
1841      * we'll always return the one in [1..(p - 1)/2].  The other is, of
1842      * course, p - r.
1843      */
1844     z = bigsub(p, r);
1845     if (bignum_cmp(r, z) < 0)
1846         freebn(z);
1847     else {
1848         freebn(r);
1849         r = z;
1850     }
1851
1852     /* We're done. */
1853     freebn(xinv); freebn(c);
1854     return r;
1855 }
1856
1857 /*
1858  * Render a bignum into decimal. Return a malloced string holding
1859  * the decimal representation.
1860  */
1861 char *bignum_decimal(Bignum x)
1862 {
1863     int ndigits, ndigit;
1864     int i, iszero;
1865     BignumDblInt carry;
1866     char *ret;
1867     BignumInt *workspace;
1868
1869     /*
1870      * First, estimate the number of digits. Since log(10)/log(2)
1871      * is just greater than 93/28 (the joys of continued fraction
1872      * approximations...) we know that for every 93 bits, we need
1873      * at most 28 digits. This will tell us how much to malloc.
1874      *
1875      * Formally: if x has i bits, that means x is strictly less
1876      * than 2^i. Since 2 is less than 10^(28/93), this is less than
1877      * 10^(28i/93). We need an integer power of ten, so we must
1878      * round up (rounding down might make it less than x again).
1879      * Therefore if we multiply the bit count by 28/93, rounding
1880      * up, we will have enough digits.
1881      *
1882      * i=0 (i.e., x=0) is an irritating special case.
1883      */
1884     i = bignum_bitcount(x);
1885     if (!i)
1886         ndigits = 1;                   /* x = 0 */
1887     else
1888         ndigits = (28 * i + 92) / 93;  /* multiply by 28/93 and round up */
1889     ndigits++;                         /* allow for trailing \0 */
1890     ret = snewn(ndigits, char);
1891
1892     /*
1893      * Now allocate some workspace to hold the binary form as we
1894      * repeatedly divide it by ten. Initialise this to the
1895      * big-endian form of the number.
1896      */
1897     workspace = snewn(x[0], BignumInt);
1898     for (i = 0; i < (int)x[0]; i++)
1899         workspace[i] = x[x[0] - i];
1900
1901     /*
1902      * Next, write the decimal number starting with the last digit.
1903      * We use ordinary short division, dividing 10 into the
1904      * workspace.
1905      */
1906     ndigit = ndigits - 1;
1907     ret[ndigit] = '\0';
1908     do {
1909         iszero = 1;
1910         carry = 0;
1911         for (i = 0; i < (int)x[0]; i++) {
1912             carry = (carry << BIGNUM_INT_BITS) + workspace[i];
1913             workspace[i] = (BignumInt) (carry / 10);
1914             if (workspace[i])
1915                 iszero = 0;
1916             carry %= 10;
1917         }
1918         ret[--ndigit] = (char) (carry + '0');
1919     } while (!iszero);
1920
1921     /*
1922      * There's a chance we've fallen short of the start of the
1923      * string. Correct if so.
1924      */
1925     if (ndigit > 0)
1926         memmove(ret, ret + ndigit, ndigits - ndigit);
1927
1928     /*
1929      * Done.
1930      */
1931     sfree(workspace);
1932     return ret;
1933 }
1934
1935 #ifdef TESTBN
1936
1937 #include <stdio.h>
1938 #include <stdlib.h>
1939 #include <ctype.h>
1940
1941 /*
1942  * gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset
1943  *
1944  * Then feed to this program's standard input the output of
1945  * testdata/bignum.py .
1946  */
1947
1948 void modalfatalbox(char *p, ...)
1949 {
1950     va_list ap;
1951     fprintf(stderr, "FATAL ERROR: ");
1952     va_start(ap, p);
1953     vfprintf(stderr, p, ap);
1954     va_end(ap);
1955     fputc('\n', stderr);
1956     exit(1);
1957 }
1958
1959 #define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
1960
1961 int main(int argc, char **argv)
1962 {
1963     char *buf;
1964     int line = 0;
1965     int passes = 0, fails = 0;
1966
1967     while ((buf = fgetline(stdin)) != NULL) {
1968         int maxlen = strlen(buf);
1969         unsigned char *data = snewn(maxlen, unsigned char);
1970         unsigned char *ptrs[5], *q;
1971         int ptrnum;
1972         char *bufp = buf;
1973
1974         line++;
1975
1976         q = data;
1977         ptrnum = 0;
1978
1979         while (*bufp && !isspace((unsigned char)*bufp))
1980             bufp++;
1981         if (bufp)
1982             *bufp++ = '\0';
1983
1984         while (*bufp) {
1985             char *start, *end;
1986             int i;
1987
1988             while (*bufp && !isxdigit((unsigned char)*bufp))
1989                 bufp++;
1990             start = bufp;
1991
1992             if (!*bufp)
1993                 break;
1994
1995             while (*bufp && isxdigit((unsigned char)*bufp))
1996                 bufp++;
1997             end = bufp;
1998
1999             if (ptrnum >= lenof(ptrs))
2000                 break;
2001             ptrs[ptrnum++] = q;
2002
2003             for (i = -((end - start) & 1); i < end-start; i += 2) {
2004                 unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
2005                 val = val * 16 + fromxdigit(start[i+1]);
2006                 *q++ = val;
2007             }
2008
2009             ptrs[ptrnum] = q;
2010         }
2011
2012         if (!strcmp(buf, "mul")) {
2013             Bignum a, b, c, p;
2014
2015             if (ptrnum != 3) {
2016                 printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
2017                 exit(1);
2018             }
2019             a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2020             b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2021             c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2022             p = bigmul(a, b);
2023
2024             if (bignum_cmp(c, p) == 0) {
2025                 passes++;
2026             } else {
2027                 char *as = bignum_decimal(a);
2028                 char *bs = bignum_decimal(b);
2029                 char *cs = bignum_decimal(c);
2030                 char *ps = bignum_decimal(p);
2031
2032                 printf("%d: fail: %s * %s gave %s expected %s\n",
2033                        line, as, bs, ps, cs);
2034                 fails++;
2035
2036                 sfree(as);
2037                 sfree(bs);
2038                 sfree(cs);
2039                 sfree(ps);
2040             }
2041             freebn(a);
2042             freebn(b);
2043             freebn(c);
2044             freebn(p);
2045         } else if (!strcmp(buf, "pow")) {
2046             Bignum base, expt, modulus, expected, answer;
2047
2048             if (ptrnum != 4) {
2049                 printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);
2050                 exit(1);
2051             }
2052
2053             base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2054             expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2055             modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2056             expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
2057             answer = modpow(base, expt, modulus);
2058
2059             if (bignum_cmp(expected, answer) == 0) {
2060                 passes++;
2061             } else {
2062                 char *as = bignum_decimal(base);
2063                 char *bs = bignum_decimal(expt);
2064                 char *cs = bignum_decimal(modulus);
2065                 char *ds = bignum_decimal(answer);
2066                 char *ps = bignum_decimal(expected);
2067
2068                 printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
2069                        line, as, bs, cs, ds, ps);
2070                 fails++;
2071
2072                 sfree(as);
2073                 sfree(bs);
2074                 sfree(cs);
2075                 sfree(ds);
2076                 sfree(ps);
2077             }
2078             freebn(base);
2079             freebn(expt);
2080             freebn(modulus);
2081             freebn(expected);
2082             freebn(answer);
2083         } else if (!strcmp(buf, "modsqrt")) {
2084             Bignum x, p, expected, answer;
2085
2086             if (ptrnum != 3) {
2087                 printf("%d: modsqrt with %d parameters, expected 3\n", line, ptrnum);
2088                 exit(1);
2089             }
2090
2091             x = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2092             p = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2093             expected = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2094             answer = modsqrt(x, p);
2095             if (!answer)
2096                 answer = copybn(Zero);
2097
2098             if (bignum_cmp(expected, answer) == 0) {
2099                 passes++;
2100             } else {
2101                 char *xs = bignum_decimal(x);
2102                 char *ps = bignum_decimal(p);
2103                 char *qs = bignum_decimal(answer);
2104                 char *ws = bignum_decimal(expected);
2105
2106                 printf("%d: fail: sqrt(%s) mod %s gave %s expected %s\n",
2107                        line, xs, ps, qs, ws);
2108                 fails++;
2109
2110                 sfree(xs);
2111                 sfree(ps);
2112                 sfree(qs);
2113                 sfree(ws);
2114             }
2115             freebn(p);
2116             freebn(x);
2117             freebn(expected);
2118             freebn(answer);
2119         } else {
2120             printf("%d: unrecognised test keyword: '%s'\n", line, buf);
2121             exit(1);
2122         }
2123
2124         sfree(buf);
2125         sfree(data);
2126     }
2127
2128     printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
2129     return fails != 0;
2130 }
2131
2132 #endif