symm/gcm.c: Make `gcm_mktable' and `gcm_mulk_...' be CPU-dependent.
[catacomb] / symm / gcm-def.h
index f8688c4..34f95aa 100644 (file)
@@ -107,7 +107,7 @@ typedef struct gcm_params {
 extern void gcm_mktable(const gcm_params */*p*/,
                        uint32 */*ktab*/, const uint32 */*k*/);
 
-/* --- @gcm_mulk_N@ --- *
+/* --- @gcm_mulk_N{b,l}@ --- *
  *
  * Arguments:  @uint32 *a@ = accumulator to multiply
  *             @const uint32 *ktab@ = table constructed by @gcm_mktable@
@@ -116,17 +116,23 @@ extern void gcm_mktable(const gcm_params */*p*/,
  *
  * Use:                Multiply @a@ by @k@ (implicitly represented in @ktab@),
  *             updating @a@ in-place.  There are separate functions for each
- *             supported block size because this is the function whose
- *             performance actually matters.
+ *             supported block size and endianness because this is the
+ *             function whose performance actually matters.
  */
 
 #define GCM_DECL_MULK(nbits)                                           \
-  extern void gcm_mulk_##nbits(uint32 */*a*/, const uint32 */*ktab*/);
+  extern void gcm_mulk_##nbits##b(uint32 */*a*/, const uint32 */*ktab*/); \
+  extern void gcm_mulk_##nbits##l(uint32 */*a*/, const uint32 */*ktab*/);
 GCM_WIDTHS(GCM_DECL_MULK)
 #undef GCM_DECL_MULK
 
 /* Dispatch to the appropriate variant of @gcm_mulk@. */
-#define GCM_MULK(PRE, a, ktab) BLKC_GLUE(gcm_mulk_, BLKC_BITS(PRE))(a, ktab)
+#define GCM_MULK(PRE, a, ktab)                                         \
+  BLKC_GLUE(GCM_MULK_, BLKC_ENDIAN(PRE))(BLKC_BITS(PRE), a, ktab)
+#define GCM_MULK_B(nbits, a, ktab)                                     \
+  BLKC_GLUE(BLKC_GLUE(gcm_mulk_, nbits), b)(a, ktab)
+#define GCM_MULK_L(nbits, a, ktab)                                     \
+  BLKC_GLUE(BLKC_GLUE(gcm_mulk_, nbits), l)(a, ktab)
 
 /* --- @gcm_ghashdone@ --- *
  *