extern void gcm_mktable(const gcm_params */*p*/,
uint32 */*ktab*/, const uint32 */*k*/);
-/* --- @gcm_mulk_N@ --- *
+/* --- @gcm_mulk_N{b,l}@ --- *
*
* Arguments: @uint32 *a@ = accumulator to multiply
* @const uint32 *ktab@ = table constructed by @gcm_mktable@
*
* Use: Multiply @a@ by @k@ (implicitly represented in @ktab@),
* updating @a@ in-place. There are separate functions for each
- * supported block size because this is the function whose
- * performance actually matters.
+ * supported block size and endianness because this is the
+ * function whose performance actually matters.
*/
#define GCM_DECL_MULK(nbits) \
- extern void gcm_mulk_##nbits(uint32 */*a*/, const uint32 */*ktab*/);
+ extern void gcm_mulk_##nbits##b(uint32 */*a*/, const uint32 */*ktab*/); \
+ extern void gcm_mulk_##nbits##l(uint32 */*a*/, const uint32 */*ktab*/);
GCM_WIDTHS(GCM_DECL_MULK)
#undef GCM_DECL_MULK
/* Dispatch to the appropriate variant of @gcm_mulk@. */
-#define GCM_MULK(PRE, a, ktab) BLKC_GLUE(gcm_mulk_, BLKC_BITS(PRE))(a, ktab)
+#define GCM_MULK(PRE, a, ktab) \
+ BLKC_GLUE(GCM_MULK_, BLKC_ENDIAN(PRE))(BLKC_BITS(PRE), a, ktab)
+#define GCM_MULK_B(nbits, a, ktab) \
+ BLKC_GLUE(BLKC_GLUE(gcm_mulk_, nbits), b)(a, ktab)
+#define GCM_MULK_L(nbits, a, ktab) \
+ BLKC_GLUE(BLKC_GLUE(gcm_mulk_, nbits), l)(a, ktab)
/* --- @gcm_ghashdone@ --- *
*