X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/e2f967aeff8b2c0b42d0b0a209f47543d61e48b1..ac4a43c1da1d1554247212289cf483934b9e6d1c:/symm/gcm.c diff --git a/symm/gcm.c b/symm/gcm.c index 73b28517..25be7482 100644 --- a/symm/gcm.c +++ b/symm/gcm.c @@ -121,7 +121,7 @@ /*----- Low-level utilities -----------------------------------------------*/ -/* --- @mult@ --- * +/* --- @mult@, @divt@ --- * * * Arguments: @const gcm_params *p@ = pointer to the parameters * @uint32 *z@ = where to write the result @@ -129,11 +129,11 @@ * * Returns: --- * - * Use: Multiply the input field element by %$t$%, and write the - * product to @z@. It's safe for @x@ and @z@ to be equal, but - * they should not otherwise overlap. Both input and output are - * in big-endian form, i.e., with the lowest-degree coefficients - * in the most significant bits. + * Use: Multiply or divide the input field element by %$t$%, and + * write the product or quotient to @z@. It's safe for @x@ and + * @z@ to be equal, but they should not otherwise overlap. Both + * input and output are in big-endian form, i.e., with the + * lowest-degree coefficients in the most significant bits. */ static void mult(const gcm_params *p, uint32 *z, const uint32 *x) @@ -145,6 +145,18 @@ static void mult(const gcm_params *p, uint32 *z, const uint32 *x) for (i = 0; i < p->n; i++) { t = x[i]; z[i] = (t >> 1) ^ c; c = t << 31; } } +#if CPUFAM_X86 || CPUFAM_AMD64 || CPUFAM_ARMEL +static void divt(const gcm_params *p, uint32 *z, const uint32 *x) +{ + uint32 m, c, t; + unsigned i; + + t = x[0]; m = -((t >> 31)&1u); c = m&1u; + for (i = p->n - 1; i; i--) { t = x[i]; z[i] = (t << 1) | c; c = t >> 31; } + t = x[0]; z[0] = ((t ^ (m&p->poly)) << 1) | c; +} +#endif + /* --- @mul@ --- * * * Arguments: @const gcm_params *p@ = pointer to the parameters @@ -238,22 +250,26 @@ static void simple_mktable(const gcm_params *p, static void pclmul_mktable(const gcm_params *p, uint32 *ktab, const uint32 *k) { - unsigned n = p->n; + unsigned i, n = p->n; unsigned nz; - uint32 *t; + uint32 k_over_t[GCM_NMAX], *t; - /* We just need to store the value in a way which is convenient for the - * assembler code to read back. That involves reordering the words, and, - * in the case of 96-bit blocks, padding with zeroes to fill out a 128-bit - * chunk. + /* We need to divide the value by t (to compensate for the one-bit shift + * resulting from GCM's backwards bit ordering) and store the value in a + * way which is convenient for the assembler code to read back. That + * involves reordering the words, and, in the case of 96-bit blocks, + * padding with zeroes to fill out a 128-bit chunk. */ + if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k); + else { + for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]); + divt(p, k_over_t, k_over_t); + } + if (n == 3) nz = 1; else nz = 0; - t = ktab + n + nz; - - if (p->f&GCMF_SWAP) while (n--) { *--t = ENDSWAP32(*k); k++; } - else while (n--) *--t = *k++; + k = k_over_t; t = ktab + n + nz; while (n--) *--t = *k++; while (nz--) *--t = 0; } #endif @@ -262,28 +278,27 @@ static void pclmul_mktable(const gcm_params *p, static void arm_crypto_mktable(const gcm_params *p, uint32 *ktab, const uint32 *k) { - unsigned n = p->n; - uint32 *t; + unsigned i, n = p->n; + uint32 k_over_t[GCM_NMAX], *t; - /* We just need to store the value in a way which is convenient for the - * assembler code to read back. That involves swapping the bytes in each - * 64-bit lane. + /* We need to divide the value by t (to compensate for the one-bit shift + * resulting from GCM's backwards bit ordering) and store the value in a + * way which is convenient for the assembler code to read back. That + * involves swapping the bytes in each 64-bit lane. */ - t = ktab; - if (p->f&GCMF_SWAP) { - while (n >= 2) { - t[1] = ENDSWAP32(k[0]); t[0] = ENDSWAP32(k[1]); - t += 2; k += 2; n -= 2; - } - if (n) { t[1] = ENDSWAP32(k[0]); t[0] = 0; } - } else { - while (n >= 2) { - t[1] = k[0]; t[0] = k[1]; - t += 2; k += 2; n -= 2; - } - if (n) { t[1] = k[0]; t[0] = 0; } + if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k); + else { + for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]); + divt(p, k_over_t, k_over_t); + } + + t = ktab; k = k_over_t; + while (n >= 2) { + t[1] = k[0]; t[0] = k[1]; + t += 2; k += 2; n -= 2; } + if (n) { t[1] = k[0]; t[0] = 0; } } #endif @@ -407,12 +422,14 @@ static void pclmul_recover_k(const gcm_params *p, const uint32 *t; /* The representation is already independent of the blockcipher endianness. - * We need to compensate for padding, and reorder the words. + * We need to compensate for padding, reorder the words, and multiply by t + * to compensate for the factor of t we divided out earlier. */ if (n == 3) nz = 1; else nz = 0; t = ktab + n + nz; while (n--) *k++ = *--t; + mult(p, k - p->n, k - p->n); } #endif @@ -424,12 +441,14 @@ static void arm_crypto_recover_k(const gcm_params *p, const uint32 *t; /* The representation is already independent of the blockcipher endianness. - * We only need to reorder the words. + * We only need to reorder the words, and multiply by t to compensate for + * the factor of t we divided out earlier. */ t = ktab; while (n >= 2) { k[1] = t[0]; k[0] = t[1]; t += 2; k += 2; n -= 2; } - if (n) k[0] = t[1]; + if (n) { k[0] = t[1]; k++; n--; } + mult(p, k - p->n, k - p->n); } #endif @@ -778,9 +797,14 @@ void gcm_concat(const gcm_params *p, uint32 *z, const uint32 *x, #ifdef TEST_RIG +#include #include #include +#ifdef ENABLE_ASM_DEBUG +# include "regdump.h" +#endif + static void report_failure(const char *test, unsigned nbits, const char *ref, dstr v[], dstr *d) { @@ -820,7 +844,7 @@ static int test_mul(uint32 poly, dstr v[]) #define CHECK(E, what, ref) do { \ for (i = 0; i < nbits/32; i++) STORE32_##E(d.buf + 4*i, z[i]); \ - if (memcmp(d.buf, v[I_##ref].buf, nbits/8) != 0) \ + if (MEMCMP(d.buf, !=, v[I_##ref].buf, nbits/8)) \ { ok = 0; report_failure(what, nbits, #ref, v, &d); } \ } while (0) @@ -873,6 +897,9 @@ GCM_WIDTHS(TEST) int main(int argc, char *argv[]) { ego(argv[0]); +#ifdef ENABLE_ASM_DEBUG + regdump_init(); +#endif test_run(argc, argv, defs, SRCDIR"/t/gcm"); return (0); }