Improvements from Spyros Blanas to the MSVC optimisations of r6469:

[u/mdw/putty] / sshbn.c
diff --git a/sshbn.c b/sshbn.c

index 3742227..728b6fa 100644 (file)
--- a/sshbn.c
+++ b/sshbn.c
@@ -3,11 +3,26 @@
   */
  
  #include <stdio.h>
+#include <assert.h>
  #include <stdlib.h>
  #include <string.h>
  
  #include "misc.h"
  
+/*
+ * Usage notes:
+ *  * Do not call the DIVMOD_WORD macro with expressions such as array
+ *    subscripts, as some implementations object to this (see below).
+ *  * Note that none of the division methods below will cope if the
+ *    quotient won't fit into BIGNUM_INT_BITS. Callers should be careful
+ *    to avoid this case.
+ *    If this condition occurs, in the case of the x86 DIV instruction,
+ *    an overflow exception will occur, which (according to a correspondent)
+ *    will manifest on Windows as something like
+ *      0xC0000095: Integer overflow
+ *    The C variant won't give the right answer, either.
+ */
+
  #if defined __GNUC__ && defined __i386__
  typedef unsigned long BignumInt;
  typedef unsigned long long BignumDblInt;
@@ -19,6 +34,23 @@ typedef unsigned long long BignumDblInt;
      __asm__("div %2" : \
             "=d" (r), "=a" (q) : \
             "r" (w), "d" (hi), "a" (lo))
+#elif defined _MSC_VER && defined _M_IX86
+typedef unsigned __int32 BignumInt;
+typedef unsigned __int64 BignumDblInt;
+#define BIGNUM_INT_MASK  0xFFFFFFFFUL
+#define BIGNUM_TOP_BIT   0x80000000UL
+#define BIGNUM_INT_BITS  32
+#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
+/* Note: MASM interprets array subscripts in the macro arguments as
+ * assembler syntax, which gives the wrong answer. Don't supply them.
+ * <http://msdn2.microsoft.com/en-us/library/bf1dw62z.aspx> */
+#define DIVMOD_WORD(q, r, hi, lo, w) do { \
+    __asm mov edx, hi \
+    __asm mov eax, lo \
+    __asm div w \
+    __asm mov r, edx \
+    __asm mov q, eax \
+} while(0)
  #else
  typedef unsigned short BignumInt;
  typedef unsigned long BignumDblInt;
@@ -133,7 +165,7 @@ static void internal_add_shifted(BignumInt *number,
      int bshift = shift % BIGNUM_INT_BITS;
      BignumDblInt addend;
  
-    addend = n << bshift;
+    addend = (BignumDblInt)n << bshift;
  
      while (addend) {
         addend += number[word];
@@ -184,17 +216,39 @@ static void internal_mod(BignumInt *a, int alen,
             ai1 = a[i + 1];
  
         /* Find q = h:a[i] / m0 */
-       DIVMOD_WORD(q, r, h, a[i], m0);
-
-       /* Refine our estimate of q by looking at
-          h:a[i]:a[i+1] / m0:m1 */
-       t = MUL_WORD(m1, q);
-       if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
-           q--;
-           t -= m1;
-           r = (r + m0) & BIGNUM_INT_MASK;     /* overflow? */
-           if (r >= (BignumDblInt) m0 &&
-               t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
+       if (h >= m0) {
+           /*
+            * Special case.
+            * 
+            * To illustrate it, suppose a BignumInt is 8 bits, and
+            * we are dividing (say) A1:23:45:67 by A1:B2:C3. Then
+            * our initial division will be 0xA123 / 0xA1, which
+            * will give a quotient of 0x100 and a divide overflow.
+            * However, the invariants in this division algorithm
+            * are not violated, since the full number A1:23:... is
+            * _less_ than the quotient prefix A1:B2:... and so the
+            * following correction loop would have sorted it out.
+            * 
+            * In this situation we set q to be the largest
+            * quotient we _can_ stomach (0xFF, of course).
+            */
+           q = BIGNUM_INT_MASK;
+       } else {
+           /* Macro doesn't want an array subscript expression passed
+            * into it (see definition), so use a temporary. */
+           BignumInt tmplo = a[i];
+           DIVMOD_WORD(q, r, h, tmplo, m0);
+
+           /* Refine our estimate of q by looking at
+            h:a[i]:a[i+1] / m0:m1 */
+           t = MUL_WORD(m1, q);
+           if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
+               q--;
+               t -= m1;
+               r = (r + m0) & BIGNUM_INT_MASK;     /* overflow? */
+               if (r >= (BignumDblInt) m0 &&
+                   t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) q--;
+           }
         }
  
         /* Subtract q * m from a[i...] */
@@ -226,16 +280,25 @@ static void internal_mod(BignumInt *a, int alen,
  
  /*
   * Compute (base ^ exp) % mod.
- * The base MUST be smaller than the modulus.
- * The most significant word of mod MUST be non-zero.
- * We assume that the result array is the same size as the mod array.
   */
-Bignum modpow(Bignum base, Bignum exp, Bignum mod)
+Bignum modpow(Bignum base_in, Bignum exp, Bignum mod)
  {
      BignumInt *a, *b, *n, *m;
      int mshift;
      int mlen, i, j;
-    Bignum result;
+    Bignum base, result;
+
+    /*
+     * The most significant word of mod needs to be non-zero. It
+     * should already be, but let's make sure.
+     */
+    assert(mod[mod[0]] != 0);
+
+    /*
+     * Make sure the base is smaller than the modulus, by reducing
+     * it modulo the modulus if not.
+     */
+    base = bigmod(base_in, mod);
  
      /* Allocate m of size mlen, copy mod to m */
      /* We use big endian internally */
@@ -331,6 +394,8 @@ Bignum modpow(Bignum base, Bignum exp, Bignum mod)
         n[i] = 0;
      sfree(n);
  
+    freebn(base);
+
      return result;
  }
  
@@ -527,20 +592,26 @@ Bignum bignum_from_bytes(const unsigned char *data, int nbytes)
  }
  
  /*
- * Read an ssh1-format bignum from a data buffer. Return the number
- * of bytes consumed.
+ * Read an SSH-1-format bignum from a data buffer. Return the number
+ * of bytes consumed, or -1 if there wasn't enough data.
   */
-int ssh1_read_bignum(const unsigned char *data, Bignum * result)
+int ssh1_read_bignum(const unsigned char *data, int len, Bignum * result)
  {
      const unsigned char *p = data;
      int i;
      int w, b;
  
+    if (len < 2)
+       return -1;
+
      w = 0;
      for (i = 0; i < 2; i++)
         w = (w << 8) + *p++;
      b = (w + 7) / 8;                  /* bits -> bytes */
  
+    if (len < b+2)
+       return -1;
+
      if (!result)                      /* just return length */
         return b + 2;
  
@@ -550,7 +621,7 @@ int ssh1_read_bignum(const unsigned char *data, Bignum * result)
  }
  
  /*
- * Return the bit count of a bignum, for ssh1 encoding.
+ * Return the bit count of a bignum, for SSH-1 encoding.
   */
  int bignum_bitcount(Bignum bn)
  {
@@ -561,7 +632,7 @@ int bignum_bitcount(Bignum bn)
  }
  
  /*
- * Return the byte length of a bignum when ssh1 encoded.
+ * Return the byte length of a bignum when SSH-1 encoded.
   */
  int ssh1_bignum_length(Bignum bn)
  {
@@ -569,7 +640,7 @@ int ssh1_bignum_length(Bignum bn)
  }
  
  /*
- * Return the byte length of a bignum when ssh2 encoded.
+ * Return the byte length of a bignum when SSH-2 encoded.
   */
  int ssh2_bignum_length(Bignum bn)
  {
@@ -617,7 +688,7 @@ void bignum_set_bit(Bignum bn, int bitnum, int value)
  }
  
  /*
- * Write a ssh1-format bignum into a buffer. It is assumed the
+ * Write a SSH-1-format bignum into a buffer. It is assumed the
   * buffer is big enough. Returns the number of bytes used.
   */
  int ssh1_write_bignum(void *data, Bignum bn)
@@ -729,6 +800,7 @@ Bignum bigmuladd(Bignum a, Bignum b, Bignum addend)
      }
      ret[0] = maxspot;
  
+    sfree(workspace);
      return ret;
  }
  
@@ -814,7 +886,7 @@ unsigned short bignum_mod_short(Bignum number, unsigned short modulus)
      r = 0;
      mod = modulus;
      for (i = number[0]; i > 0; i--)
-       r = (r * 65536 + number[i]) % mod;
+       r = (r * (BIGNUM_TOP_BIT % mod) * 2 + number[i] % mod) % mod;
      return (unsigned short) r;
  }
  
@@ -908,6 +980,7 @@ Bignum modinv(Bignum number, Bignum modulus)
         x = bigmuladd(q, xp, t);
         sign = -sign;
         freebn(t);
+       freebn(q);
      }
  
      freebn(b);
@@ -1009,5 +1082,6 @@ char *bignum_decimal(Bignum x)
      /*
       * Done.
       */
+    sfree(workspace);
      return ret;
  }