git.distorted.org.uk Git - u/mdw/putty/blob - sshbn.c

   1 /*
   2  * Bignum routines for RSA and DH and stuff.
   3  */
   4
   5 #include <stdio.h>
   6 #include <assert.h>
   7 #include <stdlib.h>
   8 #include <string.h>
   9
  10 #include "misc.h"
  11 #include "bn-internal.h"
  12 #include "ssh.h"
  13
  14 BignumInt bnZero[1] = { 0 };
  15 BignumInt bnOne[2] = { 1, 1 };
  16
  17 /*
  18  * The Bignum format is an array of `BignumInt'. The first
  19  * element of the array counts the remaining elements. The
  20  * remaining elements express the actual number, base 2^BIGNUM_INT_BITS, _least_
  21  * significant digit first. (So it's trivial to extract the bit
  22  * with value 2^n for any n.)
  23  *
  24  * All Bignums in this module are positive. Negative numbers must
  25  * be dealt with outside it.
  26  *
  27  * INVARIANT: the most significant word of any Bignum must be
  28  * nonzero.
  29  */
  30
  31 Bignum Zero = bnZero, One = bnOne;
  32
  33 static Bignum newbn(int length)
  34 {
  35     Bignum b = snewn(length + 1, BignumInt);
  36     if (!b)
  37         abort();                       /* FIXME */
  38     memset(b, 0, (length + 1) * sizeof(*b));
  39     b[0] = length;
  40     return b;
  41 }
  42
  43 void bn_restore_invariant(Bignum b)
  44 {
  45     while (b[0] > 1 && b[b[0]] == 0)
  46         b[0]--;
  47 }
  48
  49 Bignum copybn(Bignum orig)
  50 {
  51     Bignum b = snewn(orig[0] + 1, BignumInt);
  52     if (!b)
  53         abort();                       /* FIXME */
  54     memcpy(b, orig, (orig[0] + 1) * sizeof(*b));
  55     return b;
  56 }
  57
  58 void freebn(Bignum b)
  59 {
  60     /*
  61      * Burn the evidence, just in case.
  62      */
  63     smemclr(b, sizeof(b[0]) * (b[0] + 1));
  64     sfree(b);
  65 }
  66
  67 Bignum bn_power_2(int n)
  68 {
  69     Bignum ret = newbn(n / BIGNUM_INT_BITS + 1);
  70     bignum_set_bit(ret, n, 1);
  71     return ret;
  72 }
  73
  74 /*
  75  * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all
  76  * little-endian arrays of 'len' BignumInts. Returns a BignumInt carried
  77  * off the top.
  78  */
  79 static BignumInt internal_add(const BignumInt *a, const BignumInt *b,
  80                               BignumInt *c, int len)
  81 {
  82     int i;
  83     BignumDblInt carry = 0;
  84
  85     for (i = 0; i < len; i++) {
  86         carry += (BignumDblInt)a[i] + b[i];
  87         c[i] = (BignumInt)carry;
  88         carry >>= BIGNUM_INT_BITS;
  89     }
  90
  91     return (BignumInt)carry;
  92 }
  93
  94 /*
  95  * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are
  96  * all little-endian arrays of 'len' BignumInts. Any borrow from the top
  97  * is ignored.
  98  */
  99 static void internal_sub(const BignumInt *a, const BignumInt *b,
 100                          BignumInt *c, int len)
 101 {
 102     int i;
 103     BignumDblInt carry = 1;
 104
 105     for (i = 0; i < len; i++) {
 106         carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK);
 107         c[i] = (BignumInt)carry;
 108         carry >>= BIGNUM_INT_BITS;
 109     }
 110 }
 111
 112 /*
 113  * Compute c = a * b.
 114  * Input is in the first len words of a and b.
 115  * Result is returned in the first 2*len words of c.
 116  *
 117  * 'scratch' must point to an array of BignumInt of size at least
 118  * mul_compute_scratch(len). (This covers the needs of internal_mul
 119  * and all its recursive calls to itself.)
 120  */
 121 #define KARATSUBA_THRESHOLD 50
 122 static int mul_compute_scratch(int len)
 123 {
 124     int ret = 0;
 125     while (len > KARATSUBA_THRESHOLD) {
 126         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 127         int midlen = botlen + 1;
 128         ret += 4*midlen;
 129         len = midlen;
 130     }
 131     return ret;
 132 }
 133 static void internal_mul(const BignumInt *a, const BignumInt *b,
 134                          BignumInt *c, int len, BignumInt *scratch)
 135 {
 136     if (len > KARATSUBA_THRESHOLD) {
 137         int i;
 138
 139         /*
 140          * Karatsuba divide-and-conquer algorithm. Cut each input in
 141          * half, so that it's expressed as two big 'digits' in a giant
 142          * base D:
 143          *
 144          *   a = a_1 D + a_0
 145          *   b = b_1 D + b_0
 146          *
 147          * Then the product is of course
 148          *
 149          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
 150          *
 151          * and we compute the three coefficients by recursively
 152          * calling ourself to do half-length multiplications.
 153          *
 154          * The clever bit that makes this worth doing is that we only
 155          * need _one_ half-length multiplication for the central
 156          * coefficient rather than the two that it obviouly looks
 157          * like, because we can use a single multiplication to compute
 158          *
 159          *   (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0
 160          *
 161          * and then we subtract the other two coefficients (a_1 b_1
 162          * and a_0 b_0) which we were computing anyway.
 163          *
 164          * Hence we get to multiply two numbers of length N in about
 165          * three times as much work as it takes to multiply numbers of
 166          * length N/2, which is obviously better than the four times
 167          * as much work it would take if we just did a long
 168          * conventional multiply.
 169          */
 170
 171         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 172         int midlen = botlen + 1;
 173         BignumDblInt carry;
 174
 175         /*
 176          * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping
 177          * in the output array, so we can compute them immediately in
 178          * place.
 179          */
 180
 181 #ifdef KARA_DEBUG
 182         printf("a1,a0 = 0x");
 183         for (i = 0; i < len; i++) {
 184             if (i == toplen) printf(", 0x");
 185             printf("%0*x", BIGNUM_INT_BITS/4, a[len - 1 - i]);
 186         }
 187         printf("\n");
 188         printf("b1,b0 = 0x");
 189         for (i = 0; i < len; i++) {
 190             if (i == toplen) printf(", 0x");
 191             printf("%0*x", BIGNUM_INT_BITS/4, b[len - 1 - i]);
 192         }
 193         printf("\n");
 194 #endif
 195
 196         /* a_1 b_1 */
 197         internal_mul(a + botlen, b + botlen, c + 2*botlen, toplen, scratch);
 198 #ifdef KARA_DEBUG
 199         printf("a1b1 = 0x");
 200         for (i = 0; i < 2*toplen; i++) {
 201             printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - 1 - i]);
 202         }
 203         printf("\n");
 204 #endif
 205
 206         /* a_0 b_0 */
 207         internal_mul(a, b, c, botlen, scratch);
 208 #ifdef KARA_DEBUG
 209         printf("a0b0 = 0x");
 210         for (i = 0; i < 2*botlen; i++) {
 211             printf("%0*x", BIGNUM_INT_BITS/4, c[2*botlen - 1 - i]);
 212         }
 213         printf("\n");
 214 #endif
 215
 216         /* Zero padding. botlen exceeds toplen by at most 1, and we'll set
 217          * the extra carry explicitly below, so we only need to zero at most
 218          * one of the top words here.
 219          */
 220         scratch[midlen - 2] = scratch[2*midlen - 2] = 0;
 221
 222         for (i = 0; i < toplen; i++) {
 223             scratch[i] = a[i + botlen]; /* a_1 */
 224             scratch[midlen + i] = b[i + botlen]; /* b_1 */
 225         }
 226
 227         /* compute a_1 + a_0 */
 228         scratch[midlen - 1] = internal_add(scratch, a, scratch, botlen);
 229 #ifdef KARA_DEBUG
 230         printf("a1plusa0 = 0x");
 231         for (i = 0; i < midlen; i++) {
 232             printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen - 1 - i]);
 233         }
 234         printf("\n");
 235 #endif
 236         /* compute b_1 + b_0 */
 237         scratch[2*midlen - 1] = internal_add(scratch+midlen, b,
 238                                              scratch+midlen, botlen);
 239 #ifdef KARA_DEBUG
 240         printf("b1plusb0 = 0x");
 241         for (i = 0; i < midlen; i++) {
 242             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
 243         }
 244         printf("\n");
 245 #endif
 246
 247         /*
 248          * Now we can do the third multiplication.
 249          */
 250         internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen,
 251                      scratch + 4*midlen);
 252 #ifdef KARA_DEBUG
 253         printf("a1plusa0timesb1plusb0 = 0x");
 254         for (i = 0; i < 2*midlen; i++) {
 255             printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
 256         }
 257         printf("\n");
 258 #endif
 259
 260         /*
 261          * Now we can reuse the first half of 'scratch' to compute the
 262          * sum of the outer two coefficients, to subtract from that
 263          * product to obtain the middle one.
 264          */
 265         scratch[2*botlen - 2] = scratch[2*botlen - 1] = 0;
 266         for (i = 0; i < 2*toplen; i++)
 267             scratch[i] = c[2*botlen + i];
 268         scratch[2*botlen] = internal_add(scratch, c, scratch, 2*botlen);
 269         scratch[2*botlen + 1] = 0;
 270 #ifdef KARA_DEBUG
 271         printf("a1b1plusa0b0 = 0x");
 272         for (i = 0; i < 2*midlen; i++) {
 273             printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen - 1 - i]);
 274         }
 275         printf("\n");
 276 #endif
 277
 278         internal_sub(scratch + 2*midlen, scratch, scratch, 2*midlen);
 279 #ifdef KARA_DEBUG
 280         printf("a1b0plusa0b1 = 0x");
 281         for (i = 0; i < 2*midlen; i++) {
 282             printf("%0*x", BIGNUM_INT_BITS/4, scratch[4*midlen - 1 - i]);
 283         }
 284         printf("\n");
 285 #endif
 286
 287         /*
 288          * And now all we need to do is to add that middle coefficient
 289          * back into the output. We may have to propagate a carry
 290          * further up the output, but we can be sure it won't
 291          * propagate right the way off the top.
 292          */
 293         carry = internal_add(c + botlen, scratch, c + botlen, 2*midlen);
 294         i = botlen + 2*midlen;
 295         while (carry) {
 296             assert(i <= 2*len);
 297             carry += c[i];
 298             c[i] = (BignumInt)carry;
 299             carry >>= BIGNUM_INT_BITS;
 300             i++;
 301         }
 302 #ifdef KARA_DEBUG
 303         printf("ab = 0x");
 304         for (i = 0; i < 2*len; i++) {
 305             printf("%0*x", BIGNUM_INT_BITS/4, c[2*len - i]);
 306         }
 307         printf("\n");
 308 #endif
 309
 310     } else {
 311         int i;
 312         BignumInt carry;
 313         BignumDblInt t;
 314         const BignumInt *ap, *alim = a + len, *bp, *blim = b + len;
 315         BignumInt *cp, *cps;
 316
 317         /*
 318          * Multiply in the ordinary O(N^2) way.
 319          */
 320
 321         for (i = 0; i < 2 * len; i++)
 322             c[i] = 0;
 323
 324         for (cps = c, ap = a; ap < alim; ap++, cps++) {
 325             carry = 0;
 326             for (cp = cps, bp = b, i = blim - bp; i--; bp++, cp++) {
 327                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;
 328                 *cp = (BignumInt) t;
 329                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);
 330             }
 331             *cp = carry;
 332         }
 333     }
 334 }
 335
 336 /*
 337  * Variant form of internal_mul used for the initial step of
 338  * Montgomery reduction. Only bothers outputting 'len' words
 339  * (everything above that is thrown away).
 340  */
 341 static void internal_mul_low(const BignumInt *a, const BignumInt *b,
 342                              BignumInt *c, int len, BignumInt *scratch)
 343 {
 344     if (len > KARATSUBA_THRESHOLD) {
 345         int i;
 346
 347         /*
 348          * Karatsuba-aware version of internal_mul_low. As before, we
 349          * express each input value as a shifted combination of two
 350          * halves:
 351          *
 352          *   a = a_1 D + a_0
 353          *   b = b_1 D + b_0
 354          *
 355          * Then the full product is, as before,
 356          *
 357          *  ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0
 358          *
 359          * Provided we choose D on the large side (so that a_0 and b_0
 360          * are _at least_ as long as a_1 and b_1), we don't need the
 361          * topmost term at all, and we only need half of the middle
 362          * term. So there's no point in doing the proper Karatsuba
 363          * optimisation which computes the middle term using the top
 364          * one, because we'd take as long computing the top one as
 365          * just computing the middle one directly.
 366          *
 367          * So instead, we do a much more obvious thing: we call the
 368          * fully optimised internal_mul to compute a_0 b_0, and we
 369          * recursively call ourself to compute the _bottom halves_ of
 370          * a_1 b_0 and a_0 b_1, each of which we add into the result
 371          * in the obvious way.
 372          *
 373          * In other words, there's no actual Karatsuba _optimisation_
 374          * in this function; the only benefit in doing it this way is
 375          * that we call internal_mul proper for a large part of the
 376          * work, and _that_ can optimise its operation.
 377          */
 378
 379         int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */
 380
 381         /*
 382          * Scratch space for the various bits and pieces we're going
 383          * to be adding together: we need botlen*2 words for a_0 b_0
 384          * (though we may end up throwing away its topmost word), and
 385          * toplen words for each of a_1 b_0 and a_0 b_1. That adds up
 386          * to exactly 2*len.
 387          */
 388
 389         /* a_0 b_0 */
 390         internal_mul(a, b, scratch + 2*toplen, botlen, scratch + 2*len);
 391
 392         /* a_1 b_0 */
 393         internal_mul_low(a + botlen, b, scratch + toplen, toplen,
 394                          scratch + 2*len);
 395
 396         /* a_0 b_1 */
 397         internal_mul_low(a, b + botlen, scratch, toplen, scratch + 2*len);
 398
 399         /* Copy the bottom half of the big coefficient into place */
 400         for (i = 0; i < botlen; i++)
 401             c[i] = scratch[2*toplen + i];
 402
 403         /* Add the two small coefficients, throwing away the returned carry */
 404         internal_add(scratch, scratch + toplen, scratch, toplen);
 405
 406         /* And add that to the large coefficient, leaving the result in c. */
 407         internal_add(scratch, scratch + 2*toplen + botlen,
 408                      c + botlen, toplen);
 409
 410     } else {
 411         int i;
 412         BignumInt carry;
 413         BignumDblInt t;
 414         const BignumInt *ap, *alim = a + len, *bp;
 415         BignumInt *cp, *cps, *clim = c + len;
 416
 417         /*
 418          * Multiply in the ordinary O(N^2) way.
 419          */
 420
 421         for (i = 0; i < len; i++)
 422             c[i] = 0;
 423
 424         for (cps = c, ap = a; ap < alim; ap++, cps++) {
 425             carry = 0;
 426             for (cp = cps, bp = b, i = clim - cp; i--; bp++, cp++) {
 427                 t = (MUL_WORD(*ap, *bp) + carry) + *cp;
 428                 *cp = (BignumInt) t;
 429                 carry = (BignumInt)(t >> BIGNUM_INT_BITS);
 430             }
 431         }
 432     }
 433 }
 434
 435 /*
 436  * Montgomery reduction. Expects x to be a little-endian array of 2*len
 437  * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len *
 438  * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array
 439  * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <=
 440  * x' < n.
 441  *
 442  * 'n' and 'mninv' should be little-endian arrays of 'len' BignumInts
 443  * each, containing respectively n and the multiplicative inverse of
 444  * -n mod r.
 445  *
 446  * 'tmp' is an array of BignumInt used as scratch space, of length at
 447  * least 3*len + mul_compute_scratch(len).
 448  */
 449 static void monty_reduce(BignumInt *x, const BignumInt *n,
 450                          const BignumInt *mninv, BignumInt *tmp, int len)
 451 {
 452     int i;
 453     BignumInt carry;
 454
 455     /*
 456      * Multiply x by (-n)^{-1} mod r. This gives us a value m such
 457      * that mn is congruent to -x mod r. Hence, mn+x is an exact
 458      * multiple of r, and is also (obviously) congruent to x mod n.
 459      */
 460     internal_mul_low(x, mninv, tmp, len, tmp + 3*len);
 461
 462     /*
 463      * Compute t = (mn+x)/r in ordinary, non-modular, integer
 464      * arithmetic. By construction this is exact, and is congruent mod
 465      * n to x * r^{-1}, i.e. the answer we want.
 466      *
 467      * The following multiply leaves that answer in the _most_
 468      * significant half of the 'x' array, so then we must shift it
 469      * down.
 470      */
 471     internal_mul(tmp, n, tmp+len, len, tmp + 3*len);
 472     carry = internal_add(x, tmp+len, x, 2*len);
 473     for (i = 0; i < len; i++)
 474         x[i] = x[len + i], x[len + i] = 0;
 475
 476     /*
 477      * Reduce t mod n. This doesn't require a full-on division by n,
 478      * but merely a test and single optional subtraction, since we can
 479      * show that 0 <= t < 2n.
 480      *
 481      * Proof:
 482      *  + we computed m mod r, so 0 <= m < r.
 483      *  + so 0 <= mn < rn, obviously
 484      *  + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn
 485      *  + yielding 0 <= (mn+x)/r < 2n as required.
 486      */
 487     if (!carry) {
 488         for (i = len; i-- > 0; )
 489             if (x[i] != n[i])
 490                 break;
 491     }
 492     if (carry || i < 0 || x[i] > n[i])
 493         internal_sub(x, n, x, len);
 494 }
 495
 496 static void internal_add_shifted(BignumInt *number,
 497                                  unsigned n, int shift)
 498 {
 499     int word = 1 + (shift / BIGNUM_INT_BITS);
 500     int bshift = shift % BIGNUM_INT_BITS;
 501     BignumDblInt addend;
 502
 503     addend = (BignumDblInt)n << bshift;
 504
 505     while (addend) {
 506         addend += number[word];
 507         number[word] = (BignumInt) addend & BIGNUM_INT_MASK;
 508         addend >>= BIGNUM_INT_BITS;
 509         word++;
 510     }
 511 }
 512
 513 /*
 514  * Compute a = a % m.
 515  * Input in first alen words of a and first mlen words of m.
 516  * Output in first alen words of a
 517  * (of which last alen-mlen words will be zero).
 518  * The MSW of m MUST have its high bit set.
 519  * Quotient is accumulated in the `quotient' array. Quotient parts
 520  * are shifted left by `qshift' before adding into quot.
 521  */
 522 static void internal_mod(BignumInt *a, int alen,
 523                          BignumInt *m, int mlen,
 524                          BignumInt *quot, int qshift)
 525 {
 526     BignumInt m0, m1;
 527     unsigned int h;
 528     int i, j, k;
 529
 530     m0 = m[mlen - 1];
 531     if (mlen > 1)
 532         m1 = m[mlen - 2];
 533     else
 534         m1 = 0;
 535
 536     for (i = alen, h = 0; i-- >= mlen; ) {
 537         BignumDblInt t;
 538         unsigned int q, r, c, ai1;
 539
 540         if (i)
 541             ai1 = a[i - 1];
 542         else
 543             ai1 = 0;
 544
 545         /* Find q = h:a[i] / m0 */
 546         if (h >= m0) {
 547             /*
 548              * Special case.
 549              *
 550              * To illustrate it, suppose a BignumInt is 8 bits, and
 551              * we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
 552              * our initial division will be 0xA123 / 0xA1, which
 553              * will give a quotient of 0x100 and a divide overflow.
 554              * However, the invariants in this division algorithm
 555              * are not violated, since the full number A1:23:... is
 556              * _less_ than the quotient prefix A1:B2:... and so the
 557              * following correction loop would have sorted it out.
 558              *
 559              * In this situation we set q to be the largest
 560              * quotient we _can_ stomach (0xFF, of course).
 561              */
 562             q = BIGNUM_INT_MASK;
 563         } else {
 564             /* Macro doesn't want an array subscript expression passed
 565              * into it (see definition), so use a temporary. */
 566             BignumInt tmplo = a[i];
 567             DIVMOD_WORD(q, r, h, tmplo, m0);
 568
 569             /* Refine our estimate of q by looking at
 570              h:a[i]:a[i-1] / m0:m1 */
 571             t = MUL_WORD(m1, q);
 572             if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
 573                 q--;
 574                 t -= m1;
 575                 r = (r + m0) & BIGNUM_INT_MASK;     /* overflow? */
 576                 if (r >= (BignumDblInt) m0 &&
 577                     t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
 578             }
 579         }
 580
 581         j = i + 1 - mlen;
 582
 583         /* Subtract q * m from a[i...] */
 584         c = 0;
 585         for (k = 0; k < mlen; k++) {
 586             t = MUL_WORD(q, m[k]);
 587             t += c;
 588             c = (unsigned)(t >> BIGNUM_INT_BITS);
 589             if ((BignumInt) t > a[j + k])
 590                 c++;
 591             a[j + k] -= (BignumInt) t;
 592         }
 593
 594         /* Add back m in case of borrow */
 595         if (c != h) {
 596             t = 0;
 597             for (k = 0; k < mlen; k++) {
 598                 t += m[k];
 599                 t += a[j + k];
 600                 a[j + k] = (BignumInt) t;
 601                 t = t >> BIGNUM_INT_BITS;
 602             }
 603             q--;
 604         }
 605
 606         if (quot)
 607             internal_add_shifted(quot, q,
 608                                  qshift + BIGNUM_INT_BITS * (i + 1 - mlen));
 609
 610         if (i >= mlen) {
 611             h = a[i];
 612             a[i] = 0;
 613         }
 614     }
 615 }
 616
 617 static void shift_left(BignumInt *x, int xlen, int shift)
 618 {
 619     int i;
 620
 621     if (!shift)
 622         return;
 623     for (i = xlen; --i > 0; )
 624         x[i] = (x[i] << shift) | (x[i - 1] >> (BIGNUM_INT_BITS - shift));
 625     x[0] = x[0] << shift;
 626 }
 627
 628 static void shift_right(BignumInt *x, int xlen, int shift)
 629 {
 630     int i;
 631
 632     if (!shift || !xlen)
 633         return;
 634     xlen--;
 635     for (i = 0; i < xlen; i++)
 636         x[i] = (x[i] >> shift) | (x[i + 1] << (BIGNUM_INT_BITS - shift));
 637     x[i] = x[i] >> shift;
 638 }
 639
 640 /*
 641  * Compute (base ^ exp) % mod, the pedestrian way.
 642  */
 643 Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod)
 644 {
 645     BignumInt *a, *b, *n, *m, *scratch;
 646     int mshift;
 647     int mlen, scratchlen, i, j;
 648     Bignum base, result;
 649
 650     /*
 651      * The most significant word of mod needs to be non-zero. It
 652      * should already be, but let's make sure.
 653      */
 654     assert(mod[mod[0]] != 0);
 655
 656     /*
 657      * Make sure the base is smaller than the modulus, by reducing
 658      * it modulo the modulus if not.
 659      */
 660     base = bigmod(base_in, mod);
 661
 662     /* Allocate m of size mlen, copy mod to m */
 663     mlen = mod[0];
 664     m = snewn(mlen, BignumInt);
 665     for (j = 0; j < mlen; j++)
 666         m[j] = mod[j + 1];
 667
 668     /* Shift m left to make msb bit set */
 669     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
 670         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
 671             break;
 672     if (mshift)
 673         shift_left(m, mlen, mshift);
 674
 675     /* Allocate n of size mlen, copy base to n */
 676     n = snewn(mlen, BignumInt);
 677     for (i = 0; i < (int)base[0]; i++)
 678         n[i] = base[i + 1];
 679     for (; i < mlen; i++)
 680         n[i] = 0;
 681
 682     /* Allocate a and b of size 2*mlen. Set a = 1 */
 683     a = snewn(2 * mlen, BignumInt);
 684     b = snewn(2 * mlen, BignumInt);
 685     a[0] = 1;
 686     for (i = 1; i < 2 * mlen; i++)
 687         a[i] = 0;
 688
 689     /* Scratch space for multiplies */
 690     scratchlen = mul_compute_scratch(mlen);
 691     scratch = snewn(scratchlen, BignumInt);
 692
 693     /* Skip leading zero bits of exp. */
 694     i = 0;
 695     j = BIGNUM_INT_BITS-1;
 696     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
 697         j--;
 698         if (j < 0) {
 699             i++;
 700             j = BIGNUM_INT_BITS-1;
 701         }
 702     }
 703
 704     /* Main computation */
 705     while (i < (int)exp[0]) {
 706         while (j >= 0) {
 707             internal_mul(a, a, b, mlen, scratch);
 708             internal_mod(b, mlen * 2, m, mlen, NULL, 0);
 709             if ((exp[exp[0] - i] & (1 << j)) != 0) {
 710                 internal_mul(b, n, a, mlen, scratch);
 711                 internal_mod(a, mlen * 2, m, mlen, NULL, 0);
 712             } else {
 713                 BignumInt *t;
 714                 t = a;
 715                 a = b;
 716                 b = t;
 717             }
 718             j--;
 719         }
 720         i++;
 721         j = BIGNUM_INT_BITS-1;
 722     }
 723
 724     /* Fixup result in case the modulus was shifted */
 725     if (mshift) {
 726         shift_left(a, mlen + 1, mshift);
 727         internal_mod(a, mlen + 1, m, mlen, NULL, 0);
 728         shift_right(a, mlen, mshift);
 729     }
 730
 731     /* Copy result to buffer */
 732     result = newbn(mod[0]);
 733     for (i = 0; i < mlen; i++)
 734         result[i + 1] = a[i];
 735     while (result[0] > 1 && result[result[0]] == 0)
 736         result[0]--;
 737
 738     /* Free temporary arrays */
 739     for (i = 0; i < 2 * mlen; i++)
 740         a[i] = 0;
 741     sfree(a);
 742     for (i = 0; i < scratchlen; i++)
 743         scratch[i] = 0;
 744     sfree(scratch);
 745     for (i = 0; i < 2 * mlen; i++)
 746         b[i] = 0;
 747     sfree(b);
 748     for (i = 0; i < mlen; i++)
 749         m[i] = 0;
 750     sfree(m);
 751     for (i = 0; i < mlen; i++)
 752         n[i] = 0;
 753     sfree(n);
 754
 755     freebn(base);
 756
 757     return result;
 758 }
 759
 760 /*
 761  * Compute (base ^ exp) % mod. Uses the Montgomery multiplication
 762  * technique where possible, falling back to modpow_simple otherwise.
 763  */
 764 Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
 765 {
 766     BignumInt *a, *b, *x, *n, *mninv, *scratch;
 767     int len, scratchlen, i, j;
 768     Bignum base, base2, r, rn, inv, result;
 769
 770     /*
 771      * The most significant word of mod needs to be non-zero. It
 772      * should already be, but let's make sure.
 773      */
 774     assert(mod[mod[0]] != 0);
 775
 776     /*
 777      * mod had better be odd, or we can't do Montgomery multiplication
 778      * using a power of two at all.
 779      */
 780     if (!(mod[1] & 1))
 781         return modpow_simple(base_in, exp, mod);
 782
 783     /*
 784      * Make sure the base is smaller than the modulus, by reducing
 785      * it modulo the modulus if not.
 786      */
 787     base = bigmod(base_in, mod);
 788
 789     /*
 790      * Compute the inverse of n mod r, for monty_reduce. (In fact we
 791      * want the inverse of _minus_ n mod r, but we'll sort that out
 792      * below.)
 793      */
 794     len = mod[0];
 795     r = bn_power_2(BIGNUM_INT_BITS * len);
 796     inv = modinv(mod, r);
 797
 798     /*
 799      * Multiply the base by r mod n, to get it into Montgomery
 800      * representation.
 801      */
 802     base2 = modmul(base, r, mod);
 803     freebn(base);
 804     base = base2;
 805
 806     rn = bigmod(r, mod);               /* r mod n, i.e. Montgomerified 1 */
 807
 808     freebn(r);                         /* won't need this any more */
 809
 810     /*
 811      * Set up internal arrays of the right lengths containing the base,
 812      * the modulus, and the modulus's inverse.
 813      */
 814     n = snewn(len, BignumInt);
 815     for (j = 0; j < len; j++)
 816         n[j] = mod[j + 1];
 817
 818     mninv = snewn(len, BignumInt);
 819     for (j = 0; j < len; j++)
 820         mninv[j] = (j < (int)inv[0] ? inv[j + 1] : 0);
 821     freebn(inv);         /* we don't need this copy of it any more */
 822     /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */
 823     x = snewn(len, BignumInt);
 824     for (j = 0; j < len; j++)
 825         x[j] = 0;
 826     internal_sub(x, mninv, mninv, len);
 827
 828     /* x = snewn(len, BignumInt); */ /* already done above */
 829     for (j = 0; j < len; j++)
 830         x[j] = (j < (int)base[0] ? base[j + 1] : 0);
 831     freebn(base);        /* we don't need this copy of it any more */
 832
 833     a = snewn(2*len, BignumInt);
 834     b = snewn(2*len, BignumInt);
 835     for (j = 0; j < len; j++)
 836         a[j] = (j < (int)rn[0] ? rn[j + 1] : 0);
 837     freebn(rn);
 838
 839     /* Scratch space for multiplies */
 840     scratchlen = 3*len + mul_compute_scratch(len);
 841     scratch = snewn(scratchlen, BignumInt);
 842
 843     /* Skip leading zero bits of exp. */
 844     i = 0;
 845     j = BIGNUM_INT_BITS-1;
 846     while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) {
 847         j--;
 848         if (j < 0) {
 849             i++;
 850             j = BIGNUM_INT_BITS-1;
 851         }
 852     }
 853
 854     /* Main computation */
 855     while (i < (int)exp[0]) {
 856         while (j >= 0) {
 857             internal_mul(a, a, b, len, scratch);
 858             monty_reduce(b, n, mninv, scratch, len);
 859             if ((exp[exp[0] - i] & (1 << j)) != 0) {
 860                 internal_mul(b, x, a, len,  scratch);
 861                 monty_reduce(a, n, mninv, scratch, len);
 862             } else {
 863                 BignumInt *t;
 864                 t = a;
 865                 a = b;
 866                 b = t;
 867             }
 868             j--;
 869         }
 870         i++;
 871         j = BIGNUM_INT_BITS-1;
 872     }
 873
 874     /*
 875      * Final monty_reduce to get back from the adjusted Montgomery
 876      * representation.
 877      */
 878     monty_reduce(a, n, mninv, scratch, len);
 879
 880     /* Copy result to buffer */
 881     result = newbn(mod[0]);
 882     for (i = 0; i < len; i++)
 883         result[i + 1] = a[i];
 884     while (result[0] > 1 && result[result[0]] == 0)
 885         result[0]--;
 886
 887     /* Free temporary arrays */
 888     for (i = 0; i < scratchlen; i++)
 889         scratch[i] = 0;
 890     sfree(scratch);
 891     for (i = 0; i < 2 * len; i++)
 892         a[i] = 0;
 893     sfree(a);
 894     for (i = 0; i < 2 * len; i++)
 895         b[i] = 0;
 896     sfree(b);
 897     for (i = 0; i < len; i++)
 898         mninv[i] = 0;
 899     sfree(mninv);
 900     for (i = 0; i < len; i++)
 901         n[i] = 0;
 902     sfree(n);
 903     for (i = 0; i < len; i++)
 904         x[i] = 0;
 905     sfree(x);
 906
 907     return result;
 908 }
 909
 910 /*
 911  * Compute (p * q) % mod.
 912  * The most significant word of mod MUST be non-zero.
 913  * We assume that the result array is the same size as the mod array.
 914  */
 915 Bignum modmul(Bignum p, Bignum q, Bignum mod)
 916 {
 917     BignumInt *a, *n, *m, *o, *scratch;
 918     int mshift, scratchlen;
 919     int pqlen, mlen, rlen, i, j;
 920     Bignum result;
 921
 922     /* Allocate m of size mlen, copy mod to m */
 923     mlen = mod[0];
 924     m = snewn(mlen, BignumInt);
 925     for (j = 0; j < mlen; j++)
 926         m[j] = mod[j + 1];
 927
 928     /* Shift m left to make msb bit set */
 929     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
 930         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
 931             break;
 932     if (mshift)
 933         shift_left(m, mlen, mshift);
 934
 935     pqlen = (p[0] > q[0] ? p[0] : q[0]);
 936
 937     /* Make sure that we're allowing enough space.  The shifting below will
 938      * underflow the vectors we allocate if `pqlen' is too small.
 939      */
 940     if (2*pqlen <= mlen)
 941         pqlen = mlen/2 + 1;
 942
 943     /* Allocate n of size pqlen, copy p to n */
 944     n = snewn(pqlen, BignumInt);
 945     for (i = 0; i < (int)p[0]; i++)
 946         n[i] = p[i + 1];
 947     for (; i < pqlen; i++)
 948         n[i] = 0;
 949
 950     /* Allocate o of size pqlen, copy q to o */
 951     o = snewn(pqlen, BignumInt);
 952     for (i = 0; i < (int)q[0]; i++)
 953         o[i] = q[i + 1];
 954     for (; i < pqlen; i++)
 955         o[i] = 0;
 956
 957     /* Allocate a of size 2*pqlen for result */
 958     a = snewn(2 * pqlen, BignumInt);
 959
 960     /* Scratch space for multiplies */
 961     scratchlen = mul_compute_scratch(pqlen);
 962     scratch = snewn(scratchlen, BignumInt);
 963
 964     /* Main computation */
 965     internal_mul(n, o, a, pqlen, scratch);
 966     internal_mod(a, pqlen * 2, m, mlen, NULL, 0);
 967
 968     /* Fixup result in case the modulus was shifted */
 969     if (mshift) {
 970         shift_left(a, mlen + 1, mshift);
 971         internal_mod(a, mlen + 1, m, mlen, NULL, 0);
 972         shift_right(a, mlen, mshift);
 973     }
 974
 975     /* Copy result to buffer */
 976     rlen = (mlen < pqlen * 2 ? mlen : pqlen * 2);
 977     result = newbn(rlen);
 978     for (i = 0; i < rlen; i++)
 979         result[i + 1] = a[i];
 980     while (result[0] > 1 && result[result[0]] == 0)
 981         result[0]--;
 982
 983     /* Free temporary arrays */
 984     for (i = 0; i < scratchlen; i++)
 985         scratch[i] = 0;
 986     sfree(scratch);
 987     for (i = 0; i < 2 * pqlen; i++)
 988         a[i] = 0;
 989     sfree(a);
 990     for (i = 0; i < mlen; i++)
 991         m[i] = 0;
 992     sfree(m);
 993     for (i = 0; i < pqlen; i++)
 994         n[i] = 0;
 995     sfree(n);
 996     for (i = 0; i < pqlen; i++)
 997         o[i] = 0;
 998     sfree(o);
 999
1000     return result;
1001 }
1002
1003 /*
1004  * Compute p % mod.
1005  * The most significant word of mod MUST be non-zero.
1006  * We assume that the result array is the same size as the mod array.
1007  * We optionally write out a quotient if `quotient' is non-NULL.
1008  * We can avoid writing out the result if `result' is NULL.
1009  */
1010 static void bigdivmod(Bignum p, Bignum mod, Bignum result, Bignum quotient)
1011 {
1012     BignumInt *n, *m;
1013     int mshift;
1014     int plen, mlen, i, j;
1015
1016     /* Allocate m of size mlen, copy mod to m */
1017     mlen = mod[0];
1018     m = snewn(mlen, BignumInt);
1019     for (j = 0; j < mlen; j++)
1020         m[j] = mod[j + 1];
1021
1022     /* Shift m left to make msb bit set */
1023     for (mshift = 0; mshift < BIGNUM_INT_BITS-1; mshift++)
1024         if ((m[mlen - 1] << mshift) & BIGNUM_TOP_BIT)
1025             break;
1026     if (mshift)
1027         shift_left(m, mlen, mshift);
1028
1029     plen = p[0];
1030     /* Ensure plen > mlen */
1031     if (plen <= mlen)
1032         plen = mlen + 1;
1033
1034     /* Allocate n of size plen, copy p to n */
1035     n = snewn(plen, BignumInt);
1036     for (i = 0; i < (int)p[0]; i++)
1037         n[i] = p[i + 1];
1038     for (; i < plen; i++)
1039         n[i] = 0;
1040
1041     /* Main computation */
1042     internal_mod(n, plen, m, mlen, quotient, mshift);
1043
1044     /* Fixup result in case the modulus was shifted */
1045     if (mshift) {
1046         shift_left(n, mlen + 1, mshift);
1047         internal_mod(n, plen, m, mlen, quotient, 0);
1048         shift_right(n, mlen, mshift);
1049     }
1050
1051     /* Copy result to buffer */
1052     if (result) {
1053         for (i = 0; i < (int)result[0]; i++)
1054             result[i + 1] = i < plen ? n[i] : 0;
1055         bn_restore_invariant(result);
1056     }
1057
1058     /* Free temporary arrays */
1059     for (i = 0; i < mlen; i++)
1060         m[i] = 0;
1061     sfree(m);
1062     for (i = 0; i < plen; i++)
1063         n[i] = 0;
1064     sfree(n);
1065 }
1066
1067 /*
1068  * Decrement a number.
1069  */
1070 void decbn(Bignum bn)
1071 {
1072     int i = 1;
1073     while (i < (int)bn[0] && bn[i] == 0)
1074         bn[i++] = BIGNUM_INT_MASK;
1075     bn[i]--;
1076 }
1077
1078 Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
1079 {
1080     Bignum result;
1081     int w, i;
1082
1083     w = (nbytes + BIGNUM_INT_BYTES - 1) / BIGNUM_INT_BYTES; /* bytes->words */
1084
1085     result = newbn(w);
1086     for (i = 1; i <= w; i++)
1087         result[i] = 0;
1088     for (i = nbytes; i--;) {
1089         unsigned char byte = *data++;
1090         result[1 + i / BIGNUM_INT_BYTES] |= byte << (8*i % BIGNUM_INT_BITS);
1091     }
1092
1093     while (result[0] > 1 && result[result[0]] == 0)
1094         result[0]--;
1095     return result;
1096 }
1097
1098 /*
1099  * Read an SSH-1-format bignum from a data buffer. Return the number
1100  * of bytes consumed, or -1 if there wasn't enough data.
1101  */
1102 int ssh1_read_bignum(const unsigned char *data, int len, Bignum * result)
1103 {
1104     const unsigned char *p = data;
1105     int i;
1106     int w, b;
1107
1108     if (len < 2)
1109         return -1;
1110
1111     w = 0;
1112     for (i = 0; i < 2; i++)
1113         w = (w << 8) + *p++;
1114     b = (w + 7) / 8;                   /* bits -> bytes */
1115
1116     if (len < b+2)
1117         return -1;
1118
1119     if (!result)                       /* just return length */
1120         return b + 2;
1121
1122     *result = bignum_from_bytes(p, b);
1123
1124     return p + b - data;
1125 }
1126
1127 /*
1128  * Return the bit count of a bignum, for SSH-1 encoding.
1129  */
1130 int bignum_bitcount(Bignum bn)
1131 {
1132     int bitcount = bn[0] * BIGNUM_INT_BITS - 1;
1133     while (bitcount >= 0
1134            && (bn[bitcount / BIGNUM_INT_BITS + 1] >> (bitcount % BIGNUM_INT_BITS)) == 0) bitcount--;
1135     return bitcount + 1;
1136 }
1137
1138 /*
1139  * Return the byte length of a bignum when SSH-1 encoded.
1140  */
1141 int ssh1_bignum_length(Bignum bn)
1142 {
1143     return 2 + (bignum_bitcount(bn) + 7) / 8;
1144 }
1145
1146 /*
1147  * Return the byte length of a bignum when SSH-2 encoded.
1148  */
1149 int ssh2_bignum_length(Bignum bn)
1150 {
1151     return 4 + (bignum_bitcount(bn) + 8) / 8;
1152 }
1153
1154 /*
1155  * Return a byte from a bignum; 0 is least significant, etc.
1156  */
1157 int bignum_byte(Bignum bn, int i)
1158 {
1159     if (i >= (int)(BIGNUM_INT_BYTES * bn[0]))
1160         return 0;                      /* beyond the end */
1161     else
1162         return (bn[i / BIGNUM_INT_BYTES + 1] >>
1163                 ((i % BIGNUM_INT_BYTES)*8)) & 0xFF;
1164 }
1165
1166 /*
1167  * Return a bit from a bignum; 0 is least significant, etc.
1168  */
1169 int bignum_bit(Bignum bn, int i)
1170 {
1171     if (i >= (int)(BIGNUM_INT_BITS * bn[0]))
1172         return 0;                      /* beyond the end */
1173     else
1174         return (bn[i / BIGNUM_INT_BITS + 1] >> (i % BIGNUM_INT_BITS)) & 1;
1175 }
1176
1177 /*
1178  * Set a bit in a bignum; 0 is least significant, etc.
1179  */
1180 void bignum_set_bit(Bignum bn, int bitnum, int value)
1181 {
1182     if (bitnum >= (int)(BIGNUM_INT_BITS * bn[0]))
1183         abort();                       /* beyond the end */
1184     else {
1185         int v = bitnum / BIGNUM_INT_BITS + 1;
1186         int mask = 1 << (bitnum % BIGNUM_INT_BITS);
1187         if (value)
1188             bn[v] |= mask;
1189         else
1190             bn[v] &= ~mask;
1191     }
1192 }
1193
1194 /*
1195  * Write a SSH-1-format bignum into a buffer. It is assumed the
1196  * buffer is big enough. Returns the number of bytes used.
1197  */
1198 int ssh1_write_bignum(void *data, Bignum bn)
1199 {
1200     unsigned char *p = data;
1201     int len = ssh1_bignum_length(bn);
1202     int i;
1203     int bitc = bignum_bitcount(bn);
1204
1205     *p++ = (bitc >> 8) & 0xFF;
1206     *p++ = (bitc) & 0xFF;
1207     for (i = len - 2; i--;)
1208         *p++ = bignum_byte(bn, i);
1209     return len;
1210 }
1211
1212 /*
1213  * Compare two bignums. Returns like strcmp.
1214  */
1215 int bignum_cmp(Bignum a, Bignum b)
1216 {
1217     int amax = a[0], bmax = b[0];
1218     int i = (amax > bmax ? amax : bmax);
1219     while (i) {
1220         BignumInt aval = (i > amax ? 0 : a[i]);
1221         BignumInt bval = (i > bmax ? 0 : b[i]);
1222         if (aval < bval)
1223             return -1;
1224         if (aval > bval)
1225             return +1;
1226         i--;
1227     }
1228     return 0;
1229 }
1230
1231 /*
1232  * Right-shift one bignum to form another.
1233  */
1234 Bignum bignum_rshift(Bignum a, int shift)
1235 {
1236     Bignum ret;
1237     int i, shiftw, shiftb, shiftbb, bits;
1238     BignumInt ai, ai1;
1239
1240     bits = bignum_bitcount(a) - shift;
1241     ret = newbn((bits + BIGNUM_INT_BITS - 1) / BIGNUM_INT_BITS);
1242
1243     if (ret) {
1244         shiftw = shift / BIGNUM_INT_BITS;
1245         shiftb = shift % BIGNUM_INT_BITS;
1246         shiftbb = BIGNUM_INT_BITS - shiftb;
1247
1248         ai1 = a[shiftw + 1];
1249         for (i = 1; i <= (int)ret[0]; i++) {
1250             ai = ai1;
1251             ai1 = (i + shiftw + 1 <= (int)a[0] ? a[i + shiftw + 1] : 0);
1252             ret[i] = ((ai >> shiftb) | (ai1 << shiftbb)) & BIGNUM_INT_MASK;
1253         }
1254     }
1255
1256     return ret;
1257 }
1258
1259 /*
1260  * Non-modular multiplication and addition.
1261  */
1262 Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
1263 {
1264     int alen = a[0], blen = b[0];
1265     int mlen = (alen > blen ? alen : blen);
1266     int rlen, i, maxspot;
1267     int wslen;
1268     BignumInt *workspace;
1269     Bignum ret;
1270
1271     /* mlen space for a, mlen space for b, 2*mlen for result,
1272      * plus scratch space for multiplication */
1273     wslen = mlen * 4 + mul_compute_scratch(mlen);
1274     workspace = snewn(wslen, BignumInt);
1275     for (i = 0; i < mlen; i++) {
1276         workspace[0 * mlen + i] = i < (int)a[0] ? a[i + 1] : 0;
1277         workspace[1 * mlen + i] = i < (int)b[0] ? b[i + 1] : 0;
1278     }
1279
1280     internal_mul(workspace + 0 * mlen, workspace + 1 * mlen,
1281                  workspace + 2 * mlen, mlen, workspace + 4 * mlen);
1282
1283     /* now just copy the result back */
1284     rlen = alen + blen + 1;
1285     if (addend && rlen <= (int)addend[0])
1286         rlen = addend[0] + 1;
1287     ret = newbn(rlen);
1288     maxspot = 0;
1289     for (i = 0; i < (int)ret[0]; i++) {
1290         ret[i + 1] = (i < 2 * mlen ? workspace[2 * mlen + i] : 0);
1291         if (ret[i + 1] != 0)
1292             maxspot = i + 1;
1293     }
1294     ret[0] = maxspot;
1295
1296     /* now add in the addend, if any */
1297     if (addend) {
1298         BignumDblInt carry = 0;
1299         for (i = 1; i <= rlen; i++) {
1300             carry += (i <= (int)ret[0] ? ret[i] : 0);
1301             carry += (i <= (int)addend[0] ? addend[i] : 0);
1302             ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1303             carry >>= BIGNUM_INT_BITS;
1304             if (ret[i] != 0 && i > maxspot)
1305                 maxspot = i;
1306         }
1307     }
1308     ret[0] = maxspot;
1309
1310     for (i = 0; i < wslen; i++)
1311         workspace[i] = 0;
1312     sfree(workspace);
1313     return ret;
1314 }
1315
1316 /*
1317  * Non-modular multiplication.
1318  */
1319 Bignum bigmul(Bignum a, Bignum b)
1320 {
1321     return bigmuladd(a, b, NULL);
1322 }
1323
1324 /*
1325  * Simple addition.
1326  */
1327 Bignum bigadd(Bignum a, Bignum b)
1328 {
1329     int alen = a[0], blen = b[0];
1330     int rlen = (alen > blen ? alen : blen) + 1;
1331     int i, maxspot;
1332     Bignum ret;
1333     BignumDblInt carry;
1334
1335     ret = newbn(rlen);
1336
1337     carry = 0;
1338     maxspot = 0;
1339     for (i = 1; i <= rlen; i++) {
1340         carry += (i <= (int)a[0] ? a[i] : 0);
1341         carry += (i <= (int)b[0] ? b[i] : 0);
1342         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1343         carry >>= BIGNUM_INT_BITS;
1344         if (ret[i] != 0 && i > maxspot)
1345             maxspot = i;
1346     }
1347     ret[0] = maxspot;
1348
1349     return ret;
1350 }
1351
1352 /*
1353  * Subtraction. Returns a-b, or NULL if the result would come out
1354  * negative (recall that this entire bignum module only handles
1355  * positive numbers).
1356  */
1357 Bignum bigsub(Bignum a, Bignum b)
1358 {
1359     int alen = a[0], blen = b[0];
1360     int rlen = (alen > blen ? alen : blen);
1361     int i, maxspot;
1362     Bignum ret;
1363     BignumDblInt carry;
1364
1365     ret = newbn(rlen);
1366
1367     carry = 1;
1368     maxspot = 0;
1369     for (i = 1; i <= rlen; i++) {
1370         carry += (i <= (int)a[0] ? a[i] : 0);
1371         carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK);
1372         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1373         carry >>= BIGNUM_INT_BITS;
1374         if (ret[i] != 0 && i > maxspot)
1375             maxspot = i;
1376     }
1377     ret[0] = maxspot;
1378
1379     if (!carry) {
1380         freebn(ret);
1381         return NULL;
1382     }
1383
1384     return ret;
1385 }
1386
1387 /*
1388  * Return a bignum which is the result of shifting another left by N bits.
1389  * If N is negative then you get a right shift instead.
1390  */
1391 Bignum biglsl(Bignum x, int n)
1392 {
1393     Bignum d;
1394     unsigned o, i;
1395
1396     /* Eliminate some simple special cases. */
1397     if (!n || !x[0]) return copybn(x);
1398     else if (n < 0) return biglsr(x, -n);
1399
1400     /* Some initial setup. */
1401     o = n/BIGNUM_INT_BITS;
1402     n %= BIGNUM_INT_BITS;
1403     d = newbn(x[0] + o + !!n);
1404
1405     /* Clear the low-significant words of d. */
1406     for (i = 1; i <= o; i++) d[i] = 0;
1407
1408     if (!n) {
1409         /* Easy case: we're shifting by a multiple of the word size, so we
1410          * can just copy whole words.
1411          */
1412         for (i = 1; i <= x[0]; i++) d[o + i] = x[i];
1413     } else {
1414         /* Hard case: destination words can be a combination of two source
1415          * words.
1416          */
1417
1418         /* Take the low bits from the least significant source word. */
1419         d[o + 1] = x[1] << n;
1420
1421         /* The intermediate words really are a combination of two source
1422          * words.
1423          */
1424         for (i = 2; i <= x[0]; i++)
1425             d[o + i] = (x[i] << n) | (x[i - 1] >> (BIGNUM_INT_BITS - n));
1426
1427         /* Finally, the high bits of the most significant input word. */
1428         d[o + i + 1] = x[i] >> (BIGNUM_INT_BITS - n);
1429     }
1430
1431     /* The destination length is a conservative estimate, so we'll need to
1432      * sort that out.
1433      */
1434     bn_restore_invariant(d);
1435
1436     /* We're done. */
1437     return d;
1438 }
1439
1440 /*
1441  * Return a bignum which is the result of shifting another right by N bits
1442  * (discarding the least significant N bits, and shifting zeroes in at the
1443  * most significant end).  If N is negative then you get a left shift
1444  * instead.
1445  */
1446 Bignum biglsr(Bignum x, int n)
1447 {
1448     Bignum d;
1449     unsigned o, i;
1450
1451     /* Eliminate some simple special cases. */
1452     if (!n || !x[0]) return copybn(x);
1453     else if (n < 0) return biglsl(x, -n);
1454
1455     /* Some initial setup. */
1456     o = n/BIGNUM_INT_BITS;
1457     n %= BIGNUM_INT_BITS;
1458     d = newbn(x[0] - o);
1459
1460     if (!n) {
1461         /* Simple case: we're shifting by a multiple of the word size, so we
1462          * can just copy whole words across.
1463          */
1464         for (i = o + 1; i <= x[0]; i++) d[i - o] = x[i];
1465     } else {
1466         /* Hard case: some destination words will be a combination of two
1467          * source words.  We get to discard some of the input words.
1468          */
1469
1470         /* The intermediate words are combinations of two input words. */
1471         for (i = o + 1; i < x[0]; i++)
1472             d[i - o] = (x[i] >> n) | (x[i + 1] << (BIGNUM_INT_BITS - n));
1473
1474         /* And finally the high-significance bits of the top source word. */
1475         d[i - o + 1] = x[i] << (BIGNUM_INT_BITS - n);
1476     }
1477
1478     /* The destination length is a conservative estimate, so we'll need to
1479      * sort that out.
1480      */
1481     bn_restore_invariant(d);
1482
1483     /* And we're done. */
1484     return d;
1485 }
1486
1487 /*
1488  * Create a bignum which is the bitmask covering another one. That
1489  * is, the smallest integer which is >= N and is also one less than
1490  * a power of two.
1491  */
1492 Bignum bignum_bitmask(Bignum n)
1493 {
1494     Bignum ret = copybn(n);
1495     int i;
1496     BignumInt j;
1497
1498     i = ret[0];
1499     while (n[i] == 0 && i > 0)
1500         i--;
1501     if (i <= 0)
1502         return ret;                    /* input was zero */
1503     j = 1;
1504     while (j < n[i])
1505         j = 2 * j + 1;
1506     ret[i] = j;
1507     while (--i > 0)
1508         ret[i] = BIGNUM_INT_MASK;
1509     return ret;
1510 }
1511
1512 /*
1513  * Convert a (max 32-bit) long into a bignum.
1514  */
1515 Bignum bignum_from_long(unsigned long nn)
1516 {
1517     Bignum ret;
1518     BignumDblInt n = nn;
1519
1520     ret = newbn(3);
1521     ret[1] = (BignumInt)(n & BIGNUM_INT_MASK);
1522     ret[2] = (BignumInt)((n >> BIGNUM_INT_BITS) & BIGNUM_INT_MASK);
1523     ret[3] = 0;
1524     ret[0] = (ret[2]  ? 2 : 1);
1525     return ret;
1526 }
1527
1528 /*
1529  * Add a long to a bignum.
1530  */
1531 Bignum bignum_add_long(Bignum number, unsigned long addendx)
1532 {
1533     Bignum ret = newbn(number[0] + 1);
1534     int i, maxspot = 0;
1535     BignumDblInt carry = 0, addend = addendx;
1536
1537     for (i = 1; i <= (int)ret[0]; i++) {
1538         carry += addend & BIGNUM_INT_MASK;
1539         carry += (i <= (int)number[0] ? number[i] : 0);
1540         addend >>= BIGNUM_INT_BITS;
1541         ret[i] = (BignumInt) carry & BIGNUM_INT_MASK;
1542         carry >>= BIGNUM_INT_BITS;
1543         if (ret[i] != 0)
1544             maxspot = i;
1545     }
1546     ret[0] = maxspot;
1547     return ret;
1548 }
1549
1550 /*
1551  * Compute the residue of a bignum, modulo a (max 16-bit) short.
1552  */
1553 unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
1554 {
1555     BignumDblInt mod, r;
1556     int i;
1557
1558     r = 0;
1559     mod = modulus;
1560     for (i = number[0]; i > 0; i--)
1561         r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
1562     return (unsigned short) r;
1563 }
1564
1565 #ifdef DEBUG
1566 void diagbn(char *prefix, Bignum md)
1567 {
1568     int i, nibbles, morenibbles;
1569     static const char hex[] = "0123456789ABCDEF";
1570
1571     debug(("%s0x", prefix ? prefix : ""));
1572
1573     nibbles = (3 + bignum_bitcount(md)) / 4;
1574     if (nibbles < 1)
1575         nibbles = 1;
1576     morenibbles = 4 * md[0] - nibbles;
1577     for (i = 0; i < morenibbles; i++)
1578         debug(("-"));
1579     for (i = nibbles; i--;)
1580         debug(("%c",
1581                hex[(bignum_byte(md, i / 2) >> (4 * (i % 2))) & 0xF]));
1582
1583     if (prefix)
1584         debug(("\n"));
1585 }
1586 #endif
1587
1588 /*
1589  * Simple division.
1590  */
1591 Bignum bigdiv(Bignum a, Bignum b)
1592 {
1593     Bignum q = newbn(a[0]);
1594     bigdivmod(a, b, NULL, q);
1595     return q;
1596 }
1597
1598 /*
1599  * Simple remainder.
1600  */
1601 Bignum bigmod(Bignum a, Bignum b)
1602 {
1603     Bignum r = newbn(b[0]);
1604     bigdivmod(a, b, r, NULL);
1605     return r;
1606 }
1607
1608 /*
1609  * Greatest common divisor.
1610  */
1611 Bignum biggcd(Bignum av, Bignum bv)
1612 {
1613     Bignum a = copybn(av);
1614     Bignum b = copybn(bv);
1615
1616     while (bignum_cmp(b, Zero) != 0) {
1617         Bignum t = newbn(b[0]);
1618         bigdivmod(a, b, t, NULL);
1619         while (t[0] > 1 && t[t[0]] == 0)
1620             t[0]--;
1621         freebn(a);
1622         a = b;
1623         b = t;
1624     }
1625
1626     freebn(b);
1627     return a;
1628 }
1629
1630 /*
1631  * Modular inverse, using Euclid's extended algorithm.
1632  */
1633 Bignum modinv(Bignum number, Bignum modulus)
1634 {
1635     Bignum a = copybn(modulus);
1636     Bignum b = copybn(number);
1637     Bignum xp = copybn(Zero);
1638     Bignum x = copybn(One);
1639     int sign = +1;
1640
1641     while (bignum_cmp(b, One) != 0) {
1642         Bignum t = newbn(b[0]);
1643         Bignum q = newbn(a[0]);
1644         bigdivmod(a, b, t, q);
1645         while (t[0] > 1 && t[t[0]] == 0)
1646             t[0]--;
1647         freebn(a);
1648         a = b;
1649         b = t;
1650         t = xp;
1651         xp = x;
1652         x = bigmuladd(q, xp, t);
1653         sign = -sign;
1654         freebn(t);
1655         freebn(q);
1656     }
1657
1658     freebn(b);
1659     freebn(a);
1660     freebn(xp);
1661
1662     /* now we know that sign * x == 1, and that x < modulus */
1663     if (sign < 0) {
1664         /* set a new x to be modulus - x */
1665         Bignum newx = newbn(modulus[0]);
1666         BignumInt carry = 0;
1667         int maxspot = 1;
1668         int i;
1669
1670         for (i = 1; i <= (int)newx[0]; i++) {
1671             BignumInt aword = (i <= (int)modulus[0] ? modulus[i] : 0);
1672             BignumInt bword = (i <= (int)x[0] ? x[i] : 0);
1673             newx[i] = aword - bword - carry;
1674             bword = ~bword;
1675             carry = carry ? (newx[i] >= bword) : (newx[i] > bword);
1676             if (newx[i] != 0)
1677                 maxspot = i;
1678         }
1679         newx[0] = maxspot;
1680         freebn(x);
1681         x = newx;
1682     }
1683
1684     /* and return. */
1685     return x;
1686 }
1687
1688 /*
1689  * Extract the largest power of 2 dividing x, storing it in p2, and returning
1690  * the product of the remaining factors.
1691  */
1692 static Bignum extract_p2(Bignum x, unsigned *p2)
1693 {
1694     unsigned i, j, k, n;
1695     Bignum y;
1696
1697     /* If x is zero then the following won't work.  And if x is odd then
1698      * there's nothing very useful to do.
1699      */
1700     if (!x[0] || (x[1] & 1)) {
1701         *p2 = 0;
1702         return copybn(x);
1703     }
1704
1705     /* Find the power of two. */
1706     for (i = 0; !x[i + 1]; i++);
1707     for (j = 0; !((x[i + 1] >> j) & 1); j++);
1708     *p2 = i*BIGNUM_INT_BITS + j;
1709
1710     /* Work out how big the copy should be. */
1711     n = x[0] - i - 1;
1712     if (x[x[0]] >> j) n++;
1713
1714     /* Copy and shift down. */
1715     y = newbn(n);
1716     for (k = 1; k <= n; k++) {
1717         y[k] = x[k + i] >> j;
1718         if (j && k < x[0]) y[k] |= x[k + i + 1] << (BIGNUM_INT_BITS - j);
1719     }
1720
1721     /* Done. */
1722     return y;
1723 }
1724
1725 /*
1726  * Kronecker symbol (a|n).  The result is always in { -1, 0, +1 }, and is
1727  * zero if and only if a and n have a nontrivial common factor.  Most
1728  * usefully, if n is prime, this is the Legendre symbol, taking the value +1
1729  * if a is a quadratic residue mod n, and -1 otherwise; i.e., (a|p) ==
1730  * a^{(p-1)/2} (mod p).
1731  */
1732 int kronecker(Bignum a, Bignum n)
1733 {
1734     unsigned s, nn;
1735     int r = +1;
1736     Bignum t;
1737
1738     /* Special case for n = 0.  This is the same convention PARI uses,
1739      * except that we can't represent negative numbers.
1740      */
1741     if (bignum_cmp(n, Zero) == 0) {
1742         if (bignum_cmp(a, One) == 0) return +1;
1743         else return 0;
1744     }
1745
1746     /* Write n = 2^s t, with t odd.  If s > 0 and a is even, then the answer
1747      * is zero; otherwise throw in a factor of (-1)^s if a == 3 or 5 (mod 8).
1748      *
1749      * At this point, we have a copy of n, and must remember to free it when
1750      * we're done.  It's convenient to take a copy of a at the same time.
1751      */
1752     a = copybn(a);
1753     n = extract_p2(n, &s);
1754
1755     if (s && (!a[0] || !(a[1] & 1))) { r = 0; goto done; }
1756     else if ((s & 1) && ((a[1] & 7) == 3 || (a[1] & 7) == 5)) r = -r;
1757
1758     /* If n is (now) a unit then we're done. */
1759     if (bignum_cmp(n, One) == 0) goto done;
1760
1761     /* Reduce a modulo n before we go any further. */
1762     if (bignum_cmp(a, n) >= 0) { t = bigmod(a, n); freebn(a); a = t; }
1763
1764     /* Main loop. */
1765     for (;;) {
1766         if (bignum_cmp(a, Zero) == 0) { r = 0; goto done; }
1767
1768         /* Strip out and handle powers of two from a. */
1769         t = extract_p2(a, &s); freebn(a); a = t;
1770         nn = n[1] & 7;
1771         if ((s & 1) && (nn == 3 || nn == 5)) r = -r;
1772         if (bignum_cmp(a, One) == 0) break;
1773
1774         /* Swap, applying quadratic reciprocity. */
1775         if ((nn & 3) == 3 && (a[1] & 3) == 3) r = -r;
1776         t = bigmod(n, a); freebn(n); n = a; a = t;
1777     }
1778
1779     /* Tidy up: we're done. */
1780 done:
1781     freebn(a); freebn(n);
1782     return r;
1783 }
1784
1785 /*
1786  * Modular square root.  We must have p prime: extracting square roots modulo
1787  * composites is equivalent to factoring (but we don't check: you'll just get
1788  * the wrong answer).  Returns NULL if x is not a quadratic residue mod p.
1789  */
1790 Bignum modsqrt(Bignum x, Bignum p)
1791 {
1792     Bignum xinv, b, c, r, t, z, X, mone;
1793     unsigned i, j, s;
1794
1795     /* If x is not a quadratic residue then we will not go to space today. */
1796     if (kronecker(x, p) != +1) return NULL;
1797
1798     /* We need a quadratic nonresidue from somewhere.  Exactly half of all
1799      * units mod p are quadratic residues, but no efficient deterministic
1800      * algorithm for finding one is known.  So pick at random: we don't
1801      * expect this to take long.
1802      */
1803     z = newbn(p[0]);
1804     do {
1805         for (i = 1; i <= p[0]; i++) z[i] = rand();
1806         z[0] = p[0]; bn_restore_invariant(z);
1807     } while (kronecker(z, p) != -1);
1808     b = bigmod(z, p); freebn(z);
1809
1810     /* We need to compute a few things before we really get started. */
1811     xinv = modinv(x, p);                /* x^{-1} mod p */
1812     mone = bigsub(p, One);              /* p - 1 == -1 (mod p) */
1813     t = extract_p2(mone, &s);           /* 2^s t = p - 1 */
1814     c = modpow(b, t, p);                /* b^t (mod p) */
1815     z = bigadd(t, One); freebn(t); t = z; /* (t + 1) */
1816     shift_right(t + 1, t[0], 1); if (!t[t[0]]) t[0]--;
1817     r = modpow(x, t, p);                /* x^{(t+1)/2} (mod p) */
1818     freebn(b); freebn(mone); freebn(t);
1819
1820     /* OK, so how does this work anyway?
1821      *
1822      * We know that x^t is somewhere in the order-2^s subgroup of GF(p)^*;
1823      * and g = c^{-1} is a generator for this subgroup (since we know that
1824      * g^{2^{s-1}} = b^{(p-1)/2} = (b|p) = -1); so x^t = g^m for some m.  In
1825      * fact, we know that m is even because x is a square.  Suppose we can
1826      * determine m; then we know that x^t/g^m = 1, so x^{t+1}/c^m = x -- but
1827      * both t + 1 and m are even, so x^{(t+1)/2}/g^{m/2} is a square root of
1828      * x.
1829      *
1830      * Conveniently, finding the discrete log of an element X in a group of
1831      * order 2^s is easy.  Write X = g^m = g^{m_0+2k'}; then X^{2^{s-1}} =
1832      * g^{m_0 2^{s-1}} c^{m' 2^s} = g^{m_0 2^{s-1}} is either -1 or +1,
1833      * telling us that m_0 is 1 or 0 respectively.  Then X/g^{m_0} =
1834      * (g^2)^{m'} has order 2^{s-1} so we can continue inductively.  What we
1835      * end up with at the end is X/g^m.
1836      *
1837      * There are a few wrinkles.  As we proceed through the induction, the
1838      * generator for the subgroup will be c^{-2}, since we know that m is
1839      * even.  While we want the discrete log of X = x^t, we're actually going
1840      * to keep track of r, which will eventually be x^{(t+1)/2}/g^{m/2} =
1841      * x^{(t+1)/2} c^m, recovering X/g^m = r^2/x as we go.  We don't actually
1842      * form the discrete log explicitly, because the final result will
1843      * actually be the square root we want.
1844      */
1845     for (i = 1; i < s; i++) {
1846
1847         /* Determine X.  We could optimize this, only recomputing it when
1848          * it's been invalidated, but that's fiddlier and this isn't
1849          * performance critical.
1850          */
1851         z = modmul(r, r, p);
1852         X = modmul(z, xinv, p);
1853         freebn(z);
1854
1855         /* Determine X^{2^{s-1-i}}. */
1856         for (j = i + 1; j < s; j++)
1857             z = modmul(X, X, p), freebn(X), X = z;
1858
1859         /* Maybe accumulate a factor of c. */
1860         if (bignum_cmp(X, One) != 0)
1861             z = modmul(r, c, p), freebn(r), r = z;
1862
1863         /* Move on to the next smaller subgroup. */
1864         z = modmul(c, c, p), freebn(c), c = z;
1865         freebn(X);
1866     }
1867
1868     /* Of course, there are two square roots of x.  For predictability's sake
1869      * we'll always return the one in [1..(p - 1)/2].  The other is, of
1870      * course, p - r.
1871      */
1872     z = bigsub(p, r);
1873     if (bignum_cmp(r, z) < 0)
1874         freebn(z);
1875     else {
1876         freebn(r);
1877         r = z;
1878     }
1879
1880     /* We're done. */
1881     freebn(xinv); freebn(c);
1882     return r;
1883 }
1884
1885 /*
1886  * Render a bignum into decimal. Return a malloced string holding
1887  * the decimal representation.
1888  */
1889 char *bignum_decimal(Bignum x)
1890 {
1891     int ndigits, ndigit;
1892     int i, iszero;
1893     BignumDblInt carry;
1894     char *ret;
1895     BignumInt *workspace;
1896
1897     /*
1898      * First, estimate the number of digits. Since log(10)/log(2)
1899      * is just greater than 93/28 (the joys of continued fraction
1900      * approximations...) we know that for every 93 bits, we need
1901      * at most 28 digits. This will tell us how much to malloc.
1902      *
1903      * Formally: if x has i bits, that means x is strictly less
1904      * than 2^i. Since 2 is less than 10^(28/93), this is less than
1905      * 10^(28i/93). We need an integer power of ten, so we must
1906      * round up (rounding down might make it less than x again).
1907      * Therefore if we multiply the bit count by 28/93, rounding
1908      * up, we will have enough digits.
1909      *
1910      * i=0 (i.e., x=0) is an irritating special case.
1911      */
1912     i = bignum_bitcount(x);
1913     if (!i)
1914         ndigits = 1;                   /* x = 0 */
1915     else
1916         ndigits = (28 * i + 92) / 93;  /* multiply by 28/93 and round up */
1917     ndigits++;                         /* allow for trailing \0 */
1918     ret = snewn(ndigits, char);
1919
1920     /*
1921      * Now allocate some workspace to hold the binary form as we
1922      * repeatedly divide it by ten. Initialise this to the
1923      * big-endian form of the number.
1924      */
1925     workspace = snewn(x[0], BignumInt);
1926     for (i = 0; i < (int)x[0]; i++)
1927         workspace[i] = x[x[0] - i];
1928
1929     /*
1930      * Next, write the decimal number starting with the last digit.
1931      * We use ordinary short division, dividing 10 into the
1932      * workspace.
1933      */
1934     ndigit = ndigits - 1;
1935     ret[ndigit] = '\0';
1936     do {
1937         iszero = 1;
1938         carry = 0;
1939         for (i = 0; i < (int)x[0]; i++) {
1940             carry = (carry << BIGNUM_INT_BITS) + workspace[i];
1941             workspace[i] = (BignumInt) (carry / 10);
1942             if (workspace[i])
1943                 iszero = 0;
1944             carry %= 10;
1945         }
1946         ret[--ndigit] = (char) (carry + '0');
1947     } while (!iszero);
1948
1949     /*
1950      * There's a chance we've fallen short of the start of the
1951      * string. Correct if so.
1952      */
1953     if (ndigit > 0)
1954         memmove(ret, ret + ndigit, ndigits - ndigit);
1955
1956     /*
1957      * Done.
1958      */
1959     sfree(workspace);
1960     return ret;
1961 }
1962
1963 #ifdef TESTBN
1964
1965 #include <stdio.h>
1966 #include <stdlib.h>
1967 #include <ctype.h>
1968
1969 /*
1970  * gcc -Wall -g -O0 -DTESTBN -o testbn sshbn.c misc.c conf.c tree234.c unix/uxmisc.c -I. -I unix -I charset
1971  *
1972  * Then feed to this program's standard input the output of
1973  * testdata/bignum.py .
1974  */
1975
1976 void modalfatalbox(char *p, ...)
1977 {
1978     va_list ap;
1979     fprintf(stderr, "FATAL ERROR: ");
1980     va_start(ap, p);
1981     vfprintf(stderr, p, ap);
1982     va_end(ap);
1983     fputc('\n', stderr);
1984     exit(1);
1985 }
1986
1987 #define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' )
1988
1989 int main(int argc, char **argv)
1990 {
1991     char *buf;
1992     int line = 0;
1993     int passes = 0, fails = 0;
1994
1995     while ((buf = fgetline(stdin)) != NULL) {
1996         int maxlen = strlen(buf);
1997         unsigned char *data = snewn(maxlen, unsigned char);
1998         unsigned char *ptrs[5], *q;
1999         int ptrnum;
2000         char *bufp = buf;
2001
2002         line++;
2003
2004         q = data;
2005         ptrnum = 0;
2006
2007         while (*bufp && !isspace((unsigned char)*bufp))
2008             bufp++;
2009         if (bufp)
2010             *bufp++ = '\0';
2011
2012         while (*bufp) {
2013             char *start, *end;
2014             int i;
2015
2016             while (*bufp && !isxdigit((unsigned char)*bufp))
2017                 bufp++;
2018             start = bufp;
2019
2020             if (!*bufp)
2021                 break;
2022
2023             while (*bufp && isxdigit((unsigned char)*bufp))
2024                 bufp++;
2025             end = bufp;
2026
2027             if (ptrnum >= lenof(ptrs))
2028                 break;
2029             ptrs[ptrnum++] = q;
2030
2031             for (i = -((end - start) & 1); i < end-start; i += 2) {
2032                 unsigned char val = (i < 0 ? 0 : fromxdigit(start[i]));
2033                 val = val * 16 + fromxdigit(start[i+1]);
2034                 *q++ = val;
2035             }
2036
2037             ptrs[ptrnum] = q;
2038         }
2039
2040         if (!strcmp(buf, "mul")) {
2041             Bignum a, b, c, p;
2042
2043             if (ptrnum != 3) {
2044                 printf("%d: mul with %d parameters, expected 3\n", line, ptrnum);
2045                 exit(1);
2046             }
2047             a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2048             b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2049             c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2050             p = bigmul(a, b);
2051
2052             if (bignum_cmp(c, p) == 0) {
2053                 passes++;
2054             } else {
2055                 char *as = bignum_decimal(a);
2056                 char *bs = bignum_decimal(b);
2057                 char *cs = bignum_decimal(c);
2058                 char *ps = bignum_decimal(p);
2059
2060                 printf("%d: fail: %s * %s gave %s expected %s\n",
2061                        line, as, bs, ps, cs);
2062                 fails++;
2063
2064                 sfree(as);
2065                 sfree(bs);
2066                 sfree(cs);
2067                 sfree(ps);
2068             }
2069             freebn(a);
2070             freebn(b);
2071             freebn(c);
2072             freebn(p);
2073         } else if (!strcmp(buf, "pow")) {
2074             Bignum base, expt, modulus, expected, answer;
2075
2076             if (ptrnum != 4) {
2077                 printf("%d: mul with %d parameters, expected 4\n", line, ptrnum);
2078                 exit(1);
2079             }
2080
2081             base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2082             expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2083             modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2084             expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]);
2085             answer = modpow(base, expt, modulus);
2086
2087             if (bignum_cmp(expected, answer) == 0) {
2088                 passes++;
2089             } else {
2090                 char *as = bignum_decimal(base);
2091                 char *bs = bignum_decimal(expt);
2092                 char *cs = bignum_decimal(modulus);
2093                 char *ds = bignum_decimal(answer);
2094                 char *ps = bignum_decimal(expected);
2095
2096                 printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n",
2097                        line, as, bs, cs, ds, ps);
2098                 fails++;
2099
2100                 sfree(as);
2101                 sfree(bs);
2102                 sfree(cs);
2103                 sfree(ds);
2104                 sfree(ps);
2105             }
2106             freebn(base);
2107             freebn(expt);
2108             freebn(modulus);
2109             freebn(expected);
2110             freebn(answer);
2111         } else if (!strcmp(buf, "modsqrt")) {
2112             Bignum x, p, expected, answer;
2113
2114             if (ptrnum != 3) {
2115                 printf("%d: modsqrt with %d parameters, expected 3\n", line, ptrnum);
2116                 exit(1);
2117             }
2118
2119             x = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]);
2120             p = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]);
2121             expected = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]);
2122             answer = modsqrt(x, p);
2123             if (!answer)
2124                 answer = copybn(Zero);
2125
2126             if (bignum_cmp(expected, answer) == 0) {
2127                 passes++;
2128             } else {
2129                 char *xs = bignum_decimal(x);
2130                 char *ps = bignum_decimal(p);
2131                 char *qs = bignum_decimal(answer);
2132                 char *ws = bignum_decimal(expected);
2133
2134                 printf("%d: fail: sqrt(%s) mod %s gave %s expected %s\n",
2135                        line, xs, ps, qs, ws);
2136                 fails++;
2137
2138                 sfree(xs);
2139                 sfree(ps);
2140                 sfree(qs);
2141                 sfree(ws);
2142             }
2143             freebn(p);
2144             freebn(x);
2145             freebn(expected);
2146             freebn(answer);
2147         } else {
2148             printf("%d: unrecognised test keyword: '%s'\n", line, buf);
2149             exit(1);
2150         }
2151
2152         sfree(buf);
2153         sfree(data);
2154     }
2155
2156     printf("passed %d failed %d total %d\n", passes, fails, passes+fails);
2157     return fails != 0;
2158 }
2159
2160 #endif