X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/e2f967aeff8b2c0b42d0b0a209f47543d61e48b1..ac4a43c1da1d1554247212289cf483934b9e6d1c:/symm/gcm.c

diff --git a/symm/gcm.c b/symm/gcm.c
index 73b28517..25be7482 100644
--- a/symm/gcm.c
+++ b/symm/gcm.c
@@ -121,7 +121,7 @@
 
 /*----- Low-level utilities -----------------------------------------------*/
 
-/* --- @mult@ --- *
+/* --- @mult@, @divt@ --- *
  *
  * Arguments:	@const gcm_params *p@ = pointer to the parameters
  *		@uint32 *z@ = where to write the result
@@ -129,11 +129,11 @@
  *
  * Returns:	---
  *
- * Use:		Multiply the input field element by %$t$%, and write the
- *		product to @z@.  It's safe for @x@ and @z@ to be equal, but
- *		they should not otherwise overlap.  Both input and output are
- *		in big-endian form, i.e., with the lowest-degree coefficients
- *		in the most significant bits.
+ * Use:		Multiply or divide the input field element by %$t$%, and
+ *		write the product or quotient to @z@.  It's safe for @x@ and
+ *		@z@ to be equal, but they should not otherwise overlap.  Both
+ *		input and output are in big-endian form, i.e., with the
+ *		lowest-degree coefficients in the most significant bits.
  */
 
 static void mult(const gcm_params *p, uint32 *z, const uint32 *x)
@@ -145,6 +145,18 @@ static void mult(const gcm_params *p, uint32 *z, const uint32 *x)
   for (i = 0; i < p->n; i++) { t = x[i]; z[i] = (t >> 1) ^ c; c = t << 31; }
 }
 
+#if CPUFAM_X86 || CPUFAM_AMD64 || CPUFAM_ARMEL
+static void divt(const gcm_params *p, uint32 *z, const uint32 *x)
+{
+  uint32 m, c, t;
+  unsigned i;
+
+  t = x[0]; m = -((t >> 31)&1u); c = m&1u;
+  for (i = p->n - 1; i; i--) { t = x[i]; z[i] = (t << 1) | c; c = t >> 31; }
+  t = x[0]; z[0] = ((t ^ (m&p->poly)) << 1) | c;
+}
+#endif
+
 /* --- @mul@ --- *
  *
  * Arguments:	@const gcm_params *p@ = pointer to the parameters
@@ -238,22 +250,26 @@ static void simple_mktable(const gcm_params *p,
 static void pclmul_mktable(const gcm_params *p,
 			   uint32 *ktab, const uint32 *k)
 {
-  unsigned n = p->n;
+  unsigned i, n = p->n;
   unsigned nz;
-  uint32 *t;
+  uint32 k_over_t[GCM_NMAX], *t;
 
-  /* We just need to store the value in a way which is convenient for the
-   * assembler code to read back.  That involves reordering the words, and,
-   * in the case of 96-bit blocks, padding with zeroes to fill out a 128-bit
-   * chunk.
+  /* We need to divide the value by t (to compensate for the one-bit shift
+   * resulting from GCM's backwards bit ordering) and store the value in a
+   * way which is convenient for the assembler code to read back.  That
+   * involves reordering the words, and, in the case of 96-bit blocks,
+   * padding with zeroes to fill out a 128-bit chunk.
    */
 
+  if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k);
+  else {
+    for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]);
+    divt(p, k_over_t, k_over_t);
+  }
+
   if (n == 3) nz = 1;
   else nz = 0;
-  t = ktab + n + nz;
-
-  if (p->f&GCMF_SWAP) while (n--) { *--t = ENDSWAP32(*k); k++; }
-  else while (n--) *--t = *k++;
+  k = k_over_t; t = ktab + n + nz; while (n--) *--t = *k++;
   while (nz--) *--t = 0;
 }
 #endif
@@ -262,28 +278,27 @@ static void pclmul_mktable(const gcm_params *p,
 static void arm_crypto_mktable(const gcm_params *p,
 			       uint32 *ktab, const uint32 *k)
 {
-  unsigned n = p->n;
-  uint32 *t;
+  unsigned i, n = p->n;
+  uint32 k_over_t[GCM_NMAX], *t;
 
-  /* We just need to store the value in a way which is convenient for the
-   * assembler code to read back.  That involves swapping the bytes in each
-   * 64-bit lane.
+  /* We need to divide the value by t (to compensate for the one-bit shift
+   * resulting from GCM's backwards bit ordering) and store the value in a
+   * way which is convenient for the assembler code to read back.  That
+   * involves swapping the bytes in each 64-bit lane.
    */
 
-  t = ktab;
-  if (p->f&GCMF_SWAP) {
-    while (n >= 2) {
-      t[1] = ENDSWAP32(k[0]); t[0] = ENDSWAP32(k[1]);
-      t += 2; k += 2; n -= 2;
-    }
-    if (n) { t[1] = ENDSWAP32(k[0]); t[0] = 0; }
-  } else {
-    while (n >= 2) {
-      t[1] = k[0]; t[0] = k[1];
-      t += 2; k += 2; n -= 2;
-    }
-    if (n) { t[1] = k[0]; t[0] = 0; }
+  if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k);
+  else {
+    for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]);
+    divt(p, k_over_t, k_over_t);
+  }
+
+  t = ktab; k = k_over_t;
+  while (n >= 2) {
+    t[1] = k[0]; t[0] = k[1];
+    t += 2; k += 2; n -= 2;
   }
+  if (n) { t[1] = k[0]; t[0] = 0; }
 }
 #endif
 
@@ -407,12 +422,14 @@ static void pclmul_recover_k(const gcm_params *p,
   const uint32 *t;
 
   /* The representation is already independent of the blockcipher endianness.
-   * We need to compensate for padding, and reorder the words.
+   * We need to compensate for padding, reorder the words, and multiply by t
+   * to compensate for the factor of t we divided out earlier.
    */
 
   if (n == 3) nz = 1; else nz = 0;
   t = ktab + n + nz;
   while (n--) *k++ = *--t;
+  mult(p, k - p->n, k - p->n);
 }
 #endif
 
@@ -424,12 +441,14 @@ static void arm_crypto_recover_k(const gcm_params *p,
   const uint32 *t;
 
   /* The representation is already independent of the blockcipher endianness.
-   * We only need to reorder the words.
+   * We only need to reorder the words, and multiply by t to compensate for
+   * the factor of t we divided out earlier.
    */
 
   t = ktab;
   while (n >= 2) { k[1] = t[0]; k[0] = t[1]; t += 2; k += 2; n -= 2; }
-  if (n) k[0] = t[1];
+  if (n) { k[0] = t[1]; k++; n--; }
+  mult(p, k - p->n, k - p->n);
 }
 #endif
 
@@ -778,9 +797,14 @@ void gcm_concat(const gcm_params *p, uint32 *z, const uint32 *x,
 
 #ifdef TEST_RIG
 
+#include <mLib/macros.h>
 #include <mLib/quis.h>
 #include <mLib/testrig.h>
 
+#ifdef ENABLE_ASM_DEBUG
+#  include "regdump.h"
+#endif
+
 static void report_failure(const char *test, unsigned nbits,
 			   const char *ref, dstr v[], dstr *d)
 {
@@ -820,7 +844,7 @@ static int test_mul(uint32 poly, dstr v[])
 
 #define CHECK(E, what, ref) do {					\
   for (i = 0; i < nbits/32; i++) STORE32_##E(d.buf + 4*i, z[i]);	\
-  if (memcmp(d.buf, v[I_##ref].buf, nbits/8) != 0)			\
+  if (MEMCMP(d.buf, !=, v[I_##ref].buf, nbits/8))			\
     { ok = 0; report_failure(what, nbits, #ref, v, &d); }		\
 } while (0)
 
@@ -873,6 +897,9 @@ GCM_WIDTHS(TEST)
 int main(int argc,  char *argv[])
 {
   ego(argv[0]);
+#ifdef ENABLE_ASM_DEBUG
+  regdump_init();
+#endif
   test_run(argc, argv, defs, SRCDIR"/t/gcm");
   return (0);
 }