/*----- Low-level utilities -----------------------------------------------*/
-/* --- @mult@ --- *
+/* --- @mult@, @divt@ --- *
*
* Arguments: @const gcm_params *p@ = pointer to the parameters
* @uint32 *z@ = where to write the result
*
* Returns: ---
*
- * Use: Multiply the input field element by %$t$%, and write the
- * product to @z@. It's safe for @x@ and @z@ to be equal, but
- * they should not otherwise overlap. Both input and output are
- * in big-endian form, i.e., with the lowest-degree coefficients
- * in the most significant bits.
+ * Use: Multiply or divide the input field element by %$t$%, and
+ * write the product or quotient to @z@. It's safe for @x@ and
+ * @z@ to be equal, but they should not otherwise overlap. Both
+ * input and output are in big-endian form, i.e., with the
+ * lowest-degree coefficients in the most significant bits.
*/
static void mult(const gcm_params *p, uint32 *z, const uint32 *x)
for (i = 0; i < p->n; i++) { t = x[i]; z[i] = (t >> 1) ^ c; c = t << 31; }
}
+#if CPUFAM_X86 || CPUFAM_AMD64 || CPUFAM_ARMEL
+static void divt(const gcm_params *p, uint32 *z, const uint32 *x)
+{
+ uint32 m, c, t;
+ unsigned i;
+
+ t = x[0]; m = -((t >> 31)&1u); c = m&1u;
+ for (i = p->n - 1; i; i--) { t = x[i]; z[i] = (t << 1) | c; c = t >> 31; }
+ t = x[0]; z[0] = ((t ^ (m&p->poly)) << 1) | c;
+}
+#endif
+
/* --- @mul@ --- *
*
* Arguments: @const gcm_params *p@ = pointer to the parameters
static void pclmul_mktable(const gcm_params *p,
uint32 *ktab, const uint32 *k)
{
- unsigned n = p->n;
+ unsigned i, n = p->n;
unsigned nz;
- uint32 *t;
+ uint32 k_over_t[GCM_NMAX], *t;
- /* We just need to store the value in a way which is convenient for the
- * assembler code to read back. That involves reordering the words, and,
- * in the case of 96-bit blocks, padding with zeroes to fill out a 128-bit
- * chunk.
+ /* We need to divide the value by t (to compensate for the one-bit shift
+ * resulting from GCM's backwards bit ordering) and store the value in a
+ * way which is convenient for the assembler code to read back. That
+ * involves reordering the words, and, in the case of 96-bit blocks,
+ * padding with zeroes to fill out a 128-bit chunk.
*/
+ if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k);
+ else {
+ for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]);
+ divt(p, k_over_t, k_over_t);
+ }
+
if (n == 3) nz = 1;
else nz = 0;
- t = ktab + n + nz;
-
- if (p->f&GCMF_SWAP) while (n--) { *--t = ENDSWAP32(*k); k++; }
- else while (n--) *--t = *k++;
+ k = k_over_t; t = ktab + n + nz; while (n--) *--t = *k++;
while (nz--) *--t = 0;
}
#endif
static void arm_crypto_mktable(const gcm_params *p,
uint32 *ktab, const uint32 *k)
{
- unsigned n = p->n;
- uint32 *t;
+ unsigned i, n = p->n;
+ uint32 k_over_t[GCM_NMAX], *t;
- /* We just need to store the value in a way which is convenient for the
- * assembler code to read back. That involves swapping the bytes in each
- * 64-bit lane.
+ /* We need to divide the value by t (to compensate for the one-bit shift
+ * resulting from GCM's backwards bit ordering) and store the value in a
+ * way which is convenient for the assembler code to read back. That
+ * involves swapping the bytes in each 64-bit lane.
*/
- t = ktab;
- if (p->f&GCMF_SWAP) {
- while (n >= 2) {
- t[1] = ENDSWAP32(k[0]); t[0] = ENDSWAP32(k[1]);
- t += 2; k += 2; n -= 2;
- }
- if (n) { t[1] = ENDSWAP32(k[0]); t[0] = 0; }
- } else {
- while (n >= 2) {
- t[1] = k[0]; t[0] = k[1];
- t += 2; k += 2; n -= 2;
- }
- if (n) { t[1] = k[0]; t[0] = 0; }
+ if (!(p->f&GCMF_SWAP)) divt(p, k_over_t, k);
+ else {
+ for (i = 0; i < n; i++) k_over_t[i] = ENDSWAP32(k[i]);
+ divt(p, k_over_t, k_over_t);
+ }
+
+ t = ktab; k = k_over_t;
+ while (n >= 2) {
+ t[1] = k[0]; t[0] = k[1];
+ t += 2; k += 2; n -= 2;
}
+ if (n) { t[1] = k[0]; t[0] = 0; }
}
#endif
const uint32 *t;
/* The representation is already independent of the blockcipher endianness.
- * We need to compensate for padding, and reorder the words.
+ * We need to compensate for padding, reorder the words, and multiply by t
+ * to compensate for the factor of t we divided out earlier.
*/
if (n == 3) nz = 1; else nz = 0;
t = ktab + n + nz;
while (n--) *k++ = *--t;
+ mult(p, k - p->n, k - p->n);
}
#endif
const uint32 *t;
/* The representation is already independent of the blockcipher endianness.
- * We only need to reorder the words.
+ * We only need to reorder the words, and multiply by t to compensate for
+ * the factor of t we divided out earlier.
*/
t = ktab;
while (n >= 2) { k[1] = t[0]; k[0] = t[1]; t += 2; k += 2; n -= 2; }
- if (n) k[0] = t[1];
+ if (n) { k[0] = t[1]; k++; n--; }
+ mult(p, k - p->n, k - p->n);
}
#endif