symm/gcm.h, symm/gcm-def.h: Implement the GCM authenticated encryption mode.
[catacomb] / symm / gcm-def.h
diff --git a/symm/gcm-def.h b/symm/gcm-def.h
new file mode 100644 (file)
index 0000000..f8688c4
--- /dev/null
@@ -0,0 +1,959 @@
+/* -*-c-*-
+ *
+ * The GCM authenticated encryption mode
+ *
+ * (c) 2018 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Library General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb.  If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef CATACOMB_GCM_DEF_H
+#define CATACOMB_GCM_DEF_H
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/*----- Header files ------------------------------------------------------*/
+
+#include <string.h>
+
+#include <mLib/bits.h>
+#include <mLib/sub.h>
+
+#ifndef CATACOMB_ARENA_H
+#  include "arena.h"
+#endif
+
+#ifndef CATACOMB_BLKC_H
+#  include "blkc.h"
+#endif
+
+#ifndef CATACOMB_CT_H
+#  include "ct.h"
+#endif
+
+#ifndef CATACOMB_KEYSZ_H
+#  include "keysz.h"
+#endif
+
+#ifndef CATACOMB_PARANOIA_H
+#  include "paranoia.h"
+#endif
+
+#ifndef CATACOMB_RSVR_H
+#  include "rsvr.h"
+#endif
+
+/*----- Type definitions --------------------------------------------------*/
+
+typedef struct gcm_params {
+  unsigned f;                          /* flags */
+#define GCMF_SWAP 1u                   /*   swap byte order? */
+  unsigned n;                          /* number of words in block */
+  uint32 poly;                         /* selected polynomial mask */
+} gcm_params;
+
+/*----- Utilities ---------------------------------------------------------*/
+
+/* Supported block sizes. */
+#define GCM_WIDTHS(_) _(64) _(96) _(128) _(192) _(256)
+#define GCM_NMAX 8
+
+/* Polynomial tails for the supported block sizes. */
+#define GCM_POLY_64  0xd8000000
+#define GCM_POLY_96  0x82600000
+#define GCM_POLY_128 0xe1000000
+#define GCM_POLY_192 0xe1000000
+#define GCM_POLY_256 0xa4200000
+
+/* Determine whether to set the @GCMF_SWAP@ flag. */
+#define GCM_SWAP_L GCMF_SWAP
+#define GCM_SWAP_B 0
+
+/* --- @gcm_mktable@ --- *
+ *
+ * Arguments:  @const gcm_params *p@ = pointer to the parameters
+ *             @uint32 *ktab@ = where to write the table; there must be
+ *                     space for %$32 n$% $%n$%-word entries, i.e.,
+ *                     %$32 n^2$% 32-bit words in total, where %$n$% is
+ *                     @p->n@, the block size in words
+ *             @const uint32 *k@ = input field element
+ *
+ * Returns:    ---
+ *
+ * Use:                Construct a table for use by @gcm_mulk_...@ below, to
+ *             multiply (vaguely) efficiently by @k@.
+ */
+
+extern void gcm_mktable(const gcm_params */*p*/,
+                       uint32 */*ktab*/, const uint32 */*k*/);
+
+/* --- @gcm_mulk_N@ --- *
+ *
+ * Arguments:  @uint32 *a@ = accumulator to multiply
+ *             @const uint32 *ktab@ = table constructed by @gcm_mktable@
+ *
+ * Returns:    ---
+ *
+ * Use:                Multiply @a@ by @k@ (implicitly represented in @ktab@),
+ *             updating @a@ in-place.  There are separate functions for each
+ *             supported block size because this is the function whose
+ *             performance actually matters.
+ */
+
+#define GCM_DECL_MULK(nbits)                                           \
+  extern void gcm_mulk_##nbits(uint32 */*a*/, const uint32 */*ktab*/);
+GCM_WIDTHS(GCM_DECL_MULK)
+#undef GCM_DECL_MULK
+
+/* Dispatch to the appropriate variant of @gcm_mulk@. */
+#define GCM_MULK(PRE, a, ktab) BLKC_GLUE(gcm_mulk_, BLKC_BITS(PRE))(a, ktab)
+
+/* --- @gcm_ghashdone@ --- *
+ *
+ * Arguments:  @const gcm_params *p@ = pointer to the parameters
+ *             @uint32 *a@ = GHASH accumulator
+ *             @const uint32 *ktab@ = multiplication table, built by
+ *                     @gcm_mktable@
+ *             @unsigned long xblocks, yblocks@ = number of whole blocks in
+ *                     the two inputs
+ *             @unsigned xbytes, ybytes@ = number of trailing bytes in the
+ *                     two inputs
+ *
+ * Returns:    ---
+ *
+ * Use:                Finishes a GHASH operation by appending the appropriately
+ *             encoded lengths of the two constituent messages.
+ */
+
+extern void gcm_ghashdone(const gcm_params */*p*/,
+                         uint32 */*a*/, const uint32 */*ktab*/,
+                         unsigned long /*xblocks*/, unsigned /*xbytes*/,
+                         unsigned long /*yblocks*/, unsigned /*ybytes*/);
+
+/* --- @gcm_concat@ --- *
+ *
+ * Arguments:  @const gcm_params *p@ = pointer to the parameters
+ *             @uint32 *z@ = GHASH accumulator for suffix, updated
+ *             @const uint32 *x@ = GHASH accumulator for prefix
+ *             @const uint32 *ktab@ = multiplication table, built by
+ *                     @gcm_mktable@
+ *             @unsigned long n@ = length of suffix in whole blocks
+ *
+ * Returns:    ---
+ *
+ * Use:                On entry, @x@ and @z@ are the results of hashing two strings
+ *             %$a$% and %$b$%, each a whole number of blocks long; in
+ *             particular, %$b$% is @n@ blocks long.  On exit, @z@ is
+ *             updated to be the hash of %$a \cat b$%.
+ */
+
+extern void gcm_concat(const gcm_params */*p*/,
+                      uint32 */*z*/, const uint32 */*x*/,
+                      const uint32 */*ktab*/, unsigned long /*n*/);
+
+/* Step the counter using GCM's strange only-the-last-32-bits convention. */
+#define GCM_STEP(PRE, w) BLKC_GLUE(GCM_STEP_, BLKC_ENDIAN(PRE))(PRE, w)
+#define GCM_STEP_B(PRE, w) GCM_STEP_X(PRE, BLKC_ID, w)
+#define GCM_STEP_L(PRE, w) GCM_STEP_X(PRE, ENDSWAP32, w)
+#define GCM_STEP_X(PRE, op, w) do {                                    \
+  BLKC_W(w);                                                           \
+  _w[PRE##_BLKSZ/4 - 1] = op(op(_w[PRE##_BLKSZ/4 - 1]) + 1);           \
+} while (0)
+
+/*----- Macros ------------------------------------------------------------*/
+
+/* --- @GCM_DEF@ --- *
+ *
+ * Arguments:  @PRE@, @pre@ = prefixes for the underlying block cipher
+ *
+ * Use:                Creates an implementation for the GCM authenticated-
+ *             encryption mode.
+ */
+
+#define GCM_DEF(PRE, pre) GCM_DEFX(PRE, pre, #pre, #pre)
+
+#define GCM_DEFX(PRE, pre, name, fname)                                        \
+                                                                       \
+static const gcm_params pre##_gcmparams = {                            \
+  BLKC_GLUE(GCM_SWAP_, BLKC_ENDIAN(PRE)),                              \
+  PRE##_BLKSZ/4,                                                       \
+  BLKC_GLUE(GCM_POLY_, BLKC_BITS(PRE))                                 \
+};                                                                     \
+                                                                       \
+const octet                                                            \
+  pre##_gcmnoncesz[] = { KSZ_ANY, PRE##_BLKSZ - 4 },                   \
+  pre##_gcmtagsz[] = { KSZ_RANGE, PRE##_BLKSZ, 0, PRE##_BLKSZ, 1 };    \
+                                                                       \
+static const rsvr_policy pre##_gcmpolicy = { 0, PRE##_BLKSZ, PRE##_BLKSZ }; \
+                                                                       \
+/* --- @pre_gcmsetkey@ --- *                                           \
+ *                                                                     \
+ * Arguments:  @pre_gcmkey *key@ = pointer to key block to fill in     \
+ *             @const void *k@ = pointer to key material               \
+ *             @size_t ksz@ = size of key material                     \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Initializes an GCM key block.                           \
+ */                                                                    \
+                                                                       \
+void pre##_gcmsetkey(pre##_gcmkey *key, const void *k, size_t ksz)     \
+{                                                                      \
+  uint32 t[PRE##_BLKSZ/4];                                             \
+                                                                       \
+  /* Initialize the block cipher. */                                   \
+  pre##_init(&key->ctx, k, ksz);                                       \
+                                                                       \
+  /* Set up the GHASH multiplication table. */                         \
+  BLKC_ZERO(PRE, t); pre##_eblk(&key->ctx, t, t);                      \
+  gcm_mktable(&pre##_gcmparams, key->ktab, t);                         \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmaadinit@ --- *                                          \
+ *                                                                     \
+ * Arguments:  @pre_gcmaadctx *aad@ = pointer to AAD context           \
+ *             @const pre_gcmkey *key@ = pointer to key block          \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Initializes an GCM AAD (`additional authenticated       \
+ *             data') context associated with a given key.  AAD        \
+ *             contexts can be copied and/or reused, saving time if    \
+ *             the AAD for a number of messages has a common prefix.   \
+ *                                                                     \
+ *             The @key@ doesn't need to be kept around, though        \
+ *             usually there'll at least be another copy in some GCM   \
+ *             operation context because the AAD on its own isn't much \
+ *             good.                                                   \
+ */                                                                    \
+                                                                       \
+void pre##_gcmaadinit(pre##_gcmaadctx *aad, const pre##_gcmkey *key)   \
+  { aad->k = *key; aad->off = 0; aad->len = 0; BLKC_ZERO(PRE, aad->a); } \
+                                                                       \
+/* --- @pre_gcmaadhash@ --- *                                          \
+ *                                                                     \
+ * Arguments:  @pre_gcmaadctx *aad@ = pointer to AAD context           \
+ *             @const void *p@ = pointer to AAD material               \
+ *             @size_t sz@ = length of AAD material                    \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Feeds AAD into the context.                             \
+ */                                                                    \
+                                                                       \
+void pre##_gcmaadhash(pre##_gcmaadctx *aad, const void *p, size_t sz)  \
+{                                                                      \
+  rsvr_state st;                                                       \
+  const octet *q;                                                      \
+                                                                       \
+  rsvr_setup(&st, &pre##_gcmpolicy, aad->b, &aad->off, p, sz);         \
+  RSVR_DO(&st) while ((q = RSVR_NEXT(&st, PRE##_BLKSZ)) != 0) {                \
+    BLKC_XLOAD(PRE, aad->a, q); GCM_MULK(PRE, aad->a, aad->k.ktab);    \
+    aad->len++;                                                                \
+  }                                                                    \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcminit@ --- *                                             \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to GCM context              \
+ *             @const pre_gcmkey *key@ = pointer to key block          \
+ *             @const void *n@ = pointer to nonce                      \
+ *             @size_t nsz@ = size of nonce                            \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Initialize an GCM operation context with a given key.   \
+ *                                                                     \
+ *             The original key needn't be kept around any more.       \
+ */                                                                    \
+                                                                       \
+void pre##_gcminit(pre##_gcmctx *ctx, const pre##_gcmkey *k,           \
+                  const void *n, size_t nsz)                           \
+  { ctx->k = *k; pre##_gcmreinit(ctx, n, nsz); }                       \
+                                                                       \
+/* --- @pre_gcmreinit@ --- *                                           \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to GCM context              \
+ *             @const void *n@ = pointer to nonce                      \
+ *             @size_t nsz@ = size of nonce                            \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Reinitialize an GCM operation context, changing the     \
+ *             nonce.                                                  \
+ */                                                                    \
+                                                                       \
+void pre##_gcmreinit(pre##_gcmctx *ctx, const void *n, size_t nsz)     \
+{                                                                      \
+  octet b[PRE##_BLKSZ];                                                        \
+  const octet *q = n;                                                  \
+  size_t nblocks;                                                      \
+  unsigned i;                                                          \
+                                                                       \
+  /* Zero the counters. */                                             \
+  ctx->off = 0; ctx->len = 0;                                          \
+  BLKC_ZERO(PRE, ctx->a);                                              \
+                                                                       \
+  /* Calculate the initial counter from the nonce. */                  \
+  if (nsz == PRE##_BLKSZ - 4) {                                                \
+    /* Easy version: initialize the final word to 1 and copy the       \
+     * remaining words from the nonce.  (The spec shows the nonce and  \
+     * counter the other way around for 64-bit block ciphers, but I'm  \
+     * sure this is just a mistake.)                                   \
+     */                                                                        \
+                                                                       \
+    for (i = 0; i < PRE##_BLKSZ/4 - 1; i++)                            \
+      { ctx->c0[i] = BLKC_LOAD_E(PRE)(q); q += 4; }                    \
+    ctx->c0[PRE##_BLKSZ/4 - 1] = BLKC_BWORD(PRE, 1);                   \
+  } else {                                                             \
+    /* Harder version: hash the nonce down with GHASH. */              \
+                                                                       \
+    BLKC_ZERO(PRE, ctx->c0); nblocks = 0;                              \
+    while (nsz >= PRE##_BLKSZ) {                                       \
+      BLKC_XLOAD(PRE, ctx->c0, q); q += PRE##_BLKSZ;                   \
+      GCM_MULK(PRE, ctx->c0, ctx->k.ktab);                             \
+      nsz -= PRE##_BLKSZ; nblocks++;                                   \
+    }                                                                  \
+    if (nsz) {                                                         \
+      memcpy(b, q, nsz); memset(b + nsz, 0, PRE##_BLKSZ - nsz);                \
+      BLKC_XLOAD(PRE, ctx->c0, b);                                     \
+      GCM_MULK(PRE, ctx->c0, ctx->k.ktab);                             \
+    }                                                                  \
+    gcm_ghashdone(&pre##_gcmparams, ctx->c0, ctx->k.ktab,              \
+                 0, 0, nblocks, nsz);                                  \
+  }                                                                    \
+                                                                       \
+  /* We must remember the initial counter for the final tag            \
+   * calculation.  (I conjecture that storing the final counter instead        \
+   * would be just as secure, and require less state, but I've not     \
+   * proven this, and anyway it wouldn't interoperate.)  Copy it to    \
+   * make the working counter.                                         \
+   */                                                                  \
+  BLKC_MOVE(PRE, ctx->c, ctx->c0);                                     \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmencrypt@ --- *                                          \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to GCM operation context    \
+ *             @const void *src@ = pointer to plaintext message chunk  \
+ *             @size_t sz@ = size of the plaintext                     \
+ *             @buf *dst@ = a buffer to write the ciphertext to        \
+ *                                                                     \
+ * Returns:    Zero on success; @-1@ on failure.                       \
+ *                                                                     \
+ * Use:                Encrypts a chunk of a plaintext message, writing a      \
+ *             chunk of ciphertext to the output buffer and updating   \
+ *             the operation state.                                    \
+ *                                                                     \
+ *             For GCM, we always write a ciphertext chunk the same    \
+ *             size as the plaintext.  The messing about with @buf@    \
+ *             objects makes the interface consistent with other AEAD  \
+ *             schemes which can't do this.                            \
+ */                                                                    \
+                                                                       \
+int pre##_gcmencrypt(pre##_gcmctx *ctx,                                        \
+                    const void *src, size_t sz, buf *dst)              \
+{                                                                      \
+  rsvr_plan plan;                                                      \
+  uint32 t[PRE##_BLKSZ/4];                                             \
+  const octet *p = src;                                                        \
+  octet *q, *r, y;                                                     \
+                                                                       \
+  /* Allocate space for the ciphertext. */                             \
+  if (sz) { q = buf_get(dst, sz); if (!q) return (-1); }               \
+  else q = 0;                                                          \
+                                                                       \
+  /* Determine the buffering plan.  Our buffer is going to do double-  \
+   * duty here.  The end portion is going to contain mask from the     \
+   * encrypted counter which we mix into the plaintext to encrypt it;  \
+   * the start portion, which originally mask bytes we've already used,        \
+   * will hold the output ciphertext, which will eventually be         \
+   * collected into the GHASH state.                                   \
+   */                                                                  \
+  rsvr_mkplan(&plan, &pre##_gcmpolicy, ctx->off, sz);                  \
+                                                                       \
+  /* Initial portion, fulfilled from the buffer.  If the buffer is     \
+   * empty, then that means that we haven't yet encrypted the current  \
+   * counter, so we should do that and advance it.                     \
+   */                                                                  \
+  if (plan.head) {                                                     \
+    if (!ctx->off) {                                                   \
+      GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);       \
+      BLKC_STORE(PRE, ctx->b, t);                                      \
+    }                                                                  \
+    r = ctx->b + ctx->off; ctx->off += plan.head;                      \
+    while (plan.head--) { y = *p++ ^ *r; *r++ = *q++ = y; }            \
+  }                                                                    \
+                                                                       \
+  /* If we've filled up the buffer then we need to cycle the MAC and   \
+   * reset the offset.                                                 \
+   */                                                                  \
+  if (plan.from_rsvr) {                                                        \
+    BLKC_XLOAD(PRE, ctx->a, ctx->b); GCM_MULK(PRE, ctx->a, ctx->k.ktab); \
+    ctx->len++; ctx->off = 0;                                          \
+  }                                                                    \
+                                                                       \
+  /* Now to process the main body of the input. */                     \
+  while (plan.from_input) {                                            \
+    GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);         \
+    BLKC_XLOAD(PRE, t, p); p += PRE##_BLKSZ;                           \
+    BLKC_STORE(PRE, q, t); q += PRE##_BLKSZ;                           \
+    BLKC_XMOVE(PRE, ctx->a, t); GCM_MULK(PRE, ctx->a, ctx->k.ktab);    \
+    plan.from_input -= PRE##_BLKSZ; ctx->len++;                                \
+  }                                                                    \
+                                                                       \
+  /* Finally, deal with any final portion.  If there is one, we know   \
+   * that the buffer is empty: we must have filled it above, or this   \
+   * would all count as `initial' data.                                        \
+   */                                                                  \
+  if (plan.tail) {                                                     \
+    GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);         \
+    BLKC_STORE(PRE, ctx->b, t);                                                \
+    r = ctx->b; ctx->off += plan.tail;                                 \
+    while (plan.tail--) { y = *p++ ^ *r; *r++ = *q++ = y; }            \
+  }                                                                    \
+                                                                       \
+  /* And we're done. */                                                        \
+  return (0);                                                          \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmdecrypt@ --- *                                          \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to GCM operation context    \
+ *             @const void *src@ = pointer to ciphertext message chunk \
+ *             @size_t sz@ = size of the ciphertext                    \
+ *             @buf *dst@ = a buffer to write the plaintext to         \
+ *                                                                     \
+ * Returns:    Zero on success; @-1@ on failure.                       \
+ *                                                                     \
+ * Use:                Decrypts a chunk of a ciphertext message, writing a     \
+ *             chunk of plaintext to the output buffer and updating    \
+ *             the operation state.                                    \
+ *                                                                     \
+ *             For GCM, we always write a plaintext chunk the same     \
+ *             size as the ciphertext.  The messing about with @buf@   \
+ *             objects makes the interface consistent with other AEAD  \
+ *             schemes which can't do this.                            \
+ */                                                                    \
+                                                                       \
+int pre##_gcmdecrypt(pre##_gcmctx *ctx,                                        \
+                    const void *src, size_t sz, buf *dst)              \
+{                                                                      \
+  rsvr_plan plan;                                                      \
+  uint32 t[PRE##_BLKSZ/4], u[PRE##_BLKSZ];                             \
+  const octet *p = src;                                                        \
+  octet *q, *r, y;                                                     \
+                                                                       \
+  /* Allocate space for the plaintext. */                              \
+  if (sz) { q = buf_get(dst, sz); if (!q) return (-1); }               \
+  else q = 0;                                                          \
+                                                                       \
+  /* Determine the buffering plan.  Our buffer is going to do double-  \
+   * duty here.  The end portion is going to contain mask from the     \
+   * encrypted counter which we mix into the plaintext to encrypt it;  \
+   * the start portion, which originally mask bytes we've already used,        \
+   * will hold the input ciphertext, which will eventually be          \
+   * collected into the GHASH state.                                   \
+   */                                                                  \
+  rsvr_mkplan(&plan, &pre##_gcmpolicy, ctx->off, sz);                  \
+                                                                       \
+  /* Initial portion, fulfilled from the buffer.  If the buffer is     \
+   * empty, then that means that we haven't yet encrypted the current  \
+   * counter, so we should do that and advance it.                     \
+   */                                                                  \
+  if (plan.head) {                                                     \
+    if (!ctx->off) {                                                   \
+      GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);       \
+      BLKC_STORE(PRE, ctx->b, t);                                      \
+    }                                                                  \
+    r = ctx->b + ctx->off; ctx->off += plan.head;                      \
+    while (plan.head--) { y = *p++; *q++ = y ^ *r; *r++ = y; }         \
+  }                                                                    \
+                                                                       \
+  /* If we've filled up the buffer then we need to cycle the MAC and   \
+   * reset the offset.                                                 \
+   */                                                                  \
+  if (plan.from_rsvr) {                                                        \
+    BLKC_XLOAD(PRE, ctx->a, ctx->b); GCM_MULK(PRE, ctx->a, ctx->k.ktab); \
+    ctx->len++; ctx->off = 0;                                          \
+  }                                                                    \
+                                                                       \
+  /* Now to process the main body of the input. */                     \
+  while (plan.from_input) {                                            \
+    GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);         \
+    BLKC_LOAD(PRE, u, p); p += PRE##_BLKSZ;                            \
+    BLKC_XSTORE(PRE, q, t, u); q += PRE##_BLKSZ;                       \
+    BLKC_XMOVE(PRE, ctx->a, u); GCM_MULK(PRE, ctx->a, ctx->k.ktab);    \
+    plan.from_input -= PRE##_BLKSZ; ctx->len++;                                \
+  }                                                                    \
+                                                                       \
+  /* Finally, deal with any final portion.  If there is one, we know   \
+   * that the buffer is empty: we must have filled it above, or this   \
+   * would all count as `initial' data.                                        \
+   */                                                                  \
+  if (plan.tail) {                                                     \
+    GCM_STEP(PRE, ctx->c); pre##_eblk(&ctx->k.ctx, ctx->c, t);         \
+    BLKC_STORE(PRE, ctx->b, t);                                                \
+    r = ctx->b; ctx->off += plan.tail;                                 \
+    while (plan.tail--) { y = *p++; *q++ = y ^ *r; *r++ = y; }         \
+  }                                                                    \
+                                                                       \
+  /* And we're done. */                                                        \
+  return (0);                                                          \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmtag@ --- *                                              \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to an GCM context           \
+ *             @const pre_gcmaadctx *aad@ = pointer to AAD context, or \
+ *                     null                                            \
+ *             @octet *t@ = where to write a (full-length) tag         \
+ *                                                                     \
+ * Returns:    ---                                                     \
+ *                                                                     \
+ * Use:                Finishes an GCM operation, by calculating the tag.      \
+ */                                                                    \
+                                                                       \
+static void pre##_gcmtag(pre##_gcmctx *ctx,                            \
+                        const pre##_gcmaadctx *aad, octet *t)          \
+{                                                                      \
+  octet b[PRE##_BLKSZ];                                                        \
+  uint32 u[PRE##_BLKSZ/4];                                             \
+  unsigned long n;                                                     \
+                                                                       \
+  /* Finish tagging the ciphertext. */                                 \
+  if (ctx->off) {                                                      \
+    memcpy(b, ctx->b, ctx->off);                                       \
+    memset(b + ctx->off, 0, PRE##_BLKSZ - ctx->off);                   \
+    BLKC_XLOAD(PRE, ctx->a, b); GCM_MULK(PRE, ctx->a, ctx->k.ktab);    \
+  }                                                                    \
+                                                                       \
+  /* If there's no AAD, because the pointer is null or no data was     \
+   * supplied, then apply that to the GHASH state.  (Otherwise there's \
+   * nothing to do here.)                                              \
+   */                                                                  \
+  if (aad && (aad->len || aad->off)) {                                 \
+    BLKC_MOVE(PRE, u, aad->a);                                         \
+    if (aad->off) {                                                    \
+      memcpy(b, aad->b, aad->off);                                     \
+      memset(b + aad->off, 0, PRE##_BLKSZ - aad->off);                 \
+      BLKC_XLOAD(PRE, u, b); GCM_MULK(PRE, u, ctx->k.ktab);            \
+    }                                                                  \
+    n = ctx->len; if (ctx->off) n++;                                   \
+    gcm_concat(&pre##_gcmparams, ctx->a, u, ctx->k.ktab, n);           \
+  }                                                                    \
+                                                                       \
+  /* Finish off the hash by appending the length. */                   \
+  gcm_ghashdone(&pre##_gcmparams, ctx->a, ctx->k.ktab,                 \
+               aad ? aad->len : 0, aad ? aad->off : 0,                 \
+               ctx->len, ctx->off);                                    \
+                                                                       \
+  /* Mask the hash and store. */                                       \
+  pre##_eblk(&ctx->k.ctx, ctx->c0, u);                                 \
+  BLKC_XSTORE(PRE, t, ctx->a, u);                                      \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmencryptdone@ --- *                                      \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to an GCM context           \
+ *             @const pre_gcmaadctx *aad@ = pointer to AAD context, or \
+ *                     null                                            \
+ *             @buf *dst@ = buffer for remaining ciphertext            \
+ *             @void *tag@ = where to write the tag                    \
+ *             @size_t tsz@ = length of tag to store                   \
+ *                                                                     \
+ * Returns:    Zero on success; @-1@ on failure.                       \
+ *                                                                     \
+ * Use:                Completes an GCM encryption operation.  The @aad@       \
+ *             pointer may be null if there is no additional           \
+ *             authenticated data.  GCM doesn't buffer ciphertext, but \
+ *             the output buffer is provided anyway for consistency    \
+ *             with other AEAD schemes which don't have this property; \
+ *             the function will fail if the output buffer is broken.  \
+ */                                                                    \
+                                                                       \
+int pre##_gcmencryptdone(pre##_gcmctx *ctx,                            \
+                        const pre##_gcmaadctx *aad, buf *dst,          \
+                        void *tag, size_t tsz)                         \
+{                                                                      \
+  octet t[PRE##_BLKSZ];                                                        \
+                                                                       \
+  if (tsz > PRE##_BLKSZ) return (-1);                                  \
+  if (!BOK(dst)) return (-1);                                          \
+  pre##_gcmtag(ctx, aad, t); memcpy(tag, t, tsz);                      \
+  return (0);                                                          \
+}                                                                      \
+                                                                       \
+/* --- @pre_gcmdecryptdone@ --- *                                      \
+ *                                                                     \
+ * Arguments:  @pre_gcmctx *ctx@ = pointer to an GCM context           \
+ *             @const pre_gcmaadctx *aad@ = pointer to AAD context, or \
+ *                     null                                            \
+ *             @buf *dst@ = buffer for remaining plaintext             \
+ *             @const void *tag@ = tag to verify                       \
+ *             @size_t tsz@ = length of tag                            \
+ *                                                                     \
+ * Returns:    @+1@ for complete success; @0@ if tag verification      \
+ *             failed; @-1@ for other kinds of errors.                 \
+ *                                                                     \
+ * Use:                Completes an GCM decryption operation.  The @aad@       \
+ *             pointer may be null if there is no additional           \
+ *             authenticated data.  GCM doesn't buffer plaintext, but  \
+ *             the output buffer is provided anyway for consistency    \
+ *             with other AEAD schemes which don't have this property; \
+ *             the function will fail if the output buffer is broken.  \
+ */                                                                    \
+                                                                       \
+int pre##_gcmdecryptdone(pre##_gcmctx *ctx,                            \
+                        const pre##_gcmaadctx *aad, buf *dst,          \
+                        const void *tag, size_t tsz)                   \
+{                                                                      \
+  octet t[PRE##_BLKSZ];                                                        \
+                                                                       \
+  if (tsz > PRE##_BLKSZ) return (-1);                                  \
+  if (!BOK(dst)) return (-1);                                          \
+  pre##_gcmtag(ctx, aad, t);                                           \
+  if (!ct_memeq(tag, t, tsz)) return (0);                              \
+  else return (+1);                                                    \
+}                                                                      \
+                                                                       \
+/* --- Generic AEAD interface --- */                                   \
+                                                                       \
+typedef struct gactx {                                                 \
+  gaead_aad a;                                                         \
+  pre##_gcmaadctx aad;                                                 \
+} gactx;                                                               \
+                                                                       \
+static gaead_aad *gadup(const gaead_aad *a)                            \
+  { gactx *aad = S_CREATE(gactx); *aad = *(gactx *)a; return (&aad->a); } \
+                                                                       \
+static void gahash(gaead_aad *a, const void *h, size_t hsz)            \
+  { gactx *aad = (gactx *)a; pre##_gcmaadhash(&aad->aad, h, hsz); }    \
+                                                                       \
+static void gadestroy(gaead_aad *a)                                    \
+  { gactx *aad = (gactx *)a; BURN(*aad); S_DESTROY(aad); }             \
+                                                                       \
+static const gaead_aadops gaops =                                      \
+  { &pre##_gcm, gadup, gahash, gadestroy };                            \
+                                                                       \
+static gaead_aad *gaad(const pre##_gcmkey *k)                          \
+{                                                                      \
+  gactx *aad = S_CREATE(gactx);                                                \
+  aad->a.ops = &gaops;                                                 \
+  pre##_gcmaadinit(&aad->aad, k);                                      \
+  return (&aad->a);                                                    \
+}                                                                      \
+                                                                       \
+typedef struct gectx {                                                 \
+  gaead_enc e;                                                         \
+  pre##_gcmctx ctx;                                                    \
+} gectx;                                                               \
+                                                                       \
+static gaead_aad *geaad(gaead_enc *e)                                  \
+  { gectx *enc = (gectx *)e; return (gaad(&enc->ctx.k)); }             \
+                                                                       \
+static int gereinit(gaead_enc *e, const void *n, size_t nsz,           \
+                   size_t hsz, size_t msz, size_t tsz)                 \
+{                                                                      \
+  gectx *enc = (gectx *)e;                                             \
+                                                                       \
+  if (tsz > PRE##_BLKSZ) return (-1);                                  \
+  pre##_gcmreinit(&enc->ctx, n, nsz);                                  \
+  return (0);                                                          \
+}                                                                      \
+                                                                       \
+static int geenc(gaead_enc *e, const void *m, size_t msz, buf *b)      \
+{                                                                      \
+  gectx *enc = (gectx *)e;                                             \
+  return (pre##_gcmencrypt(&enc->ctx, m, msz, b));                     \
+}                                                                      \
+                                                                       \
+static int gedone(gaead_enc *e, const gaead_aad *a,                    \
+                 buf *b, void *t, size_t tsz)                          \
+{                                                                      \
+  gectx *enc = (gectx *)e; gactx *aad = (gactx *)a;                    \
+  assert(!a || a->ops == &gaops);                                      \
+  return (pre##_gcmencryptdone(&enc->ctx, a ? &aad->aad : 0, b, t, tsz)); \
+}                                                                      \
+                                                                       \
+static void gedestroy(gaead_enc *e)                                    \
+  { gectx *enc = (gectx *)e; BURN(*enc); S_DESTROY(enc); }             \
+                                                                       \
+static const gaead_encops geops =                                      \
+  { &pre##_gcm, geaad, gereinit, geenc, gedone, gedestroy };           \
+                                                                       \
+typedef struct gdctx {                                                 \
+  gaead_dec d;                                                         \
+  pre##_gcmctx ctx;                                                    \
+} gdctx;                                                               \
+                                                                       \
+static gaead_aad *gdaad(gaead_dec *d)                                  \
+  { gdctx *dec = (gdctx *)d; return (gaad(&dec->ctx.k)); }             \
+                                                                       \
+static int gdreinit(gaead_dec *d, const void *n, size_t nsz,           \
+                    size_t hsz, size_t csz, size_t tsz)                \
+{                                                                      \
+  gdctx *dec = (gdctx *)d;                                             \
+                                                                       \
+  if (tsz > PRE##_BLKSZ) return (-1);                                  \
+  pre##_gcmreinit(&dec->ctx, n, nsz);                                  \
+  return (0);                                                          \
+}                                                                      \
+                                                                       \
+static int gddec(gaead_dec *d, const void *c, size_t csz, buf *b)      \
+{                                                                      \
+  gdctx *dec = (gdctx *)d;                                             \
+  return (pre##_gcmdecrypt(&dec->ctx, c, csz, b));                     \
+}                                                                      \
+                                                                       \
+static int gddone(gaead_dec *d, const gaead_aad *a,                    \
+                 buf *b, const void *t, size_t tsz)                    \
+{                                                                      \
+  gdctx *dec = (gdctx *)d; gactx *aad = (gactx *)a;                    \
+  assert(!a || a->ops == &gaops);                                      \
+  return (pre##_gcmdecryptdone(&dec->ctx, a ? &aad->aad : 0, b, t, tsz)); \
+}                                                                      \
+                                                                       \
+static void gddestroy(gaead_dec *d)                                    \
+  { gdctx *dec = (gdctx *)d; BURN(*dec); S_DESTROY(dec); }             \
+                                                                       \
+static const gaead_decops gdops =                                      \
+  { &pre##_gcm, gdaad, gdreinit, gddec, gddone, gddestroy };           \
+                                                                       \
+typedef struct gkctx {                                                 \
+  gaead_key k;                                                         \
+  pre##_gcmkey key;                                                    \
+} gkctx;                                                               \
+                                                                       \
+static gaead_aad *gkaad(const gaead_key *k)                            \
+  { gkctx *key = (gkctx *)k; return (gaad(&key->key)); }               \
+                                                                       \
+static gaead_enc *gkenc(const gaead_key *k, const void *n, size_t nsz, \
+                       size_t hsz, size_t msz, size_t tsz)             \
+{                                                                      \
+  gkctx *key = (gkctx *)k;                                             \
+  gectx *enc = S_CREATE(gectx);                                                \
+                                                                       \
+  enc->e.ops = &geops;                                                 \
+  pre##_gcminit(&enc->ctx, &key->key, n, nsz);                         \
+  return (&enc->e);                                                    \
+}                                                                      \
+                                                                       \
+static gaead_dec *gkdec(const gaead_key *k, const void *n, size_t nsz, \
+                       size_t hsz, size_t csz, size_t tsz)             \
+{                                                                      \
+  gkctx *key = (gkctx *)k;                                             \
+  gdctx *dec = S_CREATE(gdctx);                                                \
+                                                                       \
+  dec->d.ops = &gdops;                                                 \
+  pre##_gcminit(&dec->ctx, &key->key, n, nsz);                         \
+  return (&dec->d);                                                    \
+}                                                                      \
+                                                                       \
+static void gkdestroy(gaead_key *k)                                    \
+  { gkctx *key = (gkctx *)k; BURN(*key); S_DESTROY(key); }             \
+                                                                       \
+static const gaead_keyops gkops =                                      \
+  { &pre##_gcm, gkaad, gkenc, gkdec, gkdestroy };                      \
+                                                                       \
+static gaead_key *gckey(const void *k, size_t ksz)                     \
+{                                                                      \
+  gkctx *key = S_CREATE(gkctx);                                                \
+  key->k.ops = &gkops;                                                 \
+  pre##_gcmsetkey(&key->key, k, ksz);                                  \
+  return (&key->k);                                                    \
+}                                                                      \
+                                                                       \
+const gcaead pre##_gcm = {                                             \
+  name "-gcm",                                                         \
+  pre##_keysz, pre##_gcmnoncesz, pre##_gcmtagsz,                       \
+  PRE##_BLKSZ, 0, 0, 0,                                                        \
+  gckey                                                                        \
+};                                                                     \
+                                                                       \
+GCM_TESTX(PRE, pre, name, fname)
+
+/*----- Test rig ----------------------------------------------------------*/
+
+#define GCM_TEST(PRE, pre) GCM_TESTX(PRE, pre, #pre, #pre)
+
+/* --- @GCM_TEST@ --- *
+ *
+ * Arguments:  @PRE, pre@ = prefixes for the underlying block cipher
+ *
+ * Use:                Standard test rig for GCM functions.
+ */
+
+#ifdef TEST_RIG
+
+#include <stdio.h>
+
+#include <mLib/dstr.h>
+#include <mLib/quis.h>
+#include <mLib/testrig.h>
+
+#define GCM_TESTX(PRE, pre, name, fname)                               \
+                                                                       \
+static int gcmverify(dstr *v)                                          \
+{                                                                      \
+  pre##_gcmkey key;                                                    \
+  pre##_gcmaadctx aad;                                                 \
+  pre##_gcmctx ctx;                                                    \
+  int ok = 1, win;                                                     \
+  int i;                                                               \
+  octet *p;                                                            \
+  int szs[] = { 1, 7, 192, -1, 0 }, *ip;                               \
+  size_t hsz, msz;                                                     \
+  dstr d = DSTR_INIT, t = DSTR_INIT;                                   \
+  buf b;                                                               \
+                                                                       \
+  dstr_ensure(&d, v[4].len > v[3].len ? v[4].len : v[3].len);          \
+  dstr_ensure(&t, v[5].len); t.len = v[5].len;                         \
+                                                                       \
+  pre##_gcmsetkey(&key, v[0].buf, v[0].len);                           \
+                                                                       \
+  for (ip = szs; *ip; ip++) {                                          \
+                                                                       \
+    pre##_gcminit(&ctx, &key, (octet *)v[1].buf, v[1].len);            \
+                                                                       \
+    i = *ip;                                                           \
+    hsz = v[2].len;                                                    \
+    if (i == -1) i = hsz;                                              \
+    if (i > hsz) continue;                                             \
+    p = (octet *)v[2].buf;                                             \
+    pre##_gcmaadinit(&aad, &key);                                      \
+    while (hsz) {                                                      \
+      if (i > hsz) i = hsz;                                            \
+      pre##_gcmaadhash(&aad, p, i);                                    \
+      p += i; hsz -= i;                                                        \
+    }                                                                  \
+                                                                       \
+    buf_init(&b, d.buf, d.sz);                                         \
+    i = *ip;                                                           \
+    msz = v[3].len;                                                    \
+    if (i == -1) i = msz;                                              \
+    if (i > msz) continue;                                             \
+    p = (octet *)v[3].buf;                                             \
+    while (msz) {                                                      \
+      if (i > msz) i = msz;                                            \
+      if (pre##_gcmencrypt(&ctx, p, i, &b)) {                          \
+       puts("!! gcmencrypt reports failure");                          \
+       goto fail_enc;                                                  \
+      }                                                                        \
+      p += i; msz -= i;                                                        \
+    }                                                                  \
+                                                                       \
+    if (pre##_gcmencryptdone(&ctx, &aad, &b, (octet *)t.buf, t.len)) { \
+      puts("!! gcmencryptdone reports failure");                       \
+      goto fail_enc;                                                   \
+    }                                                                  \
+    d.len = BLEN(&b);                                                  \
+                                                                       \
+    if (d.len != v[4].len ||                                           \
+       memcmp(d.buf, v[4].buf, v[4].len) != 0 ||                       \
+       memcmp(t.buf, v[5].buf, v[5].len) != 0) {                       \
+    fail_enc:                                                          \
+      printf("\nfail encrypt:\n\tstep = %i", *ip);                     \
+      fputs("\n\tkey = ", stdout); type_hex.dump(&v[0], stdout);       \
+      fputs("\n\tnonce = ", stdout); type_hex.dump(&v[1], stdout);     \
+      fputs("\n\theader = ", stdout); type_hex.dump(&v[2], stdout);    \
+      fputs("\n\tmessage = ", stdout); type_hex.dump(&v[3], stdout);   \
+      fputs("\n\texp ct = ", stdout); type_hex.dump(&v[4], stdout);    \
+      fputs("\n\tcalc ct = ", stdout); type_hex.dump(&d, stdout);      \
+      fputs("\n\texp tag = ", stdout); type_hex.dump(&v[5], stdout);   \
+      fputs("\n\tcalc tag = ", stdout); type_hex.dump(&t, stdout);     \
+      putchar('\n');                                                   \
+      ok = 0;                                                          \
+    }                                                                  \
+                                                                       \
+    pre##_gcminit(&ctx, &key, (octet *)v[1].buf, v[1].len);            \
+                                                                       \
+    buf_init(&b, d.buf, d.sz);                                         \
+    i = *ip;                                                           \
+    msz = v[4].len;                                                    \
+    if (i == -1) i = msz;                                              \
+    if (i > msz) continue;                                             \
+    p = (octet *)v[4].buf;                                             \
+    while (msz) {                                                      \
+      if (i > msz) i = msz;                                            \
+      if (pre##_gcmdecrypt(&ctx, p, i, &b)) {                          \
+       puts("!! gcmdecrypt reports failure");                          \
+       win = 0; goto fail_dec;                                         \
+      }                                                                        \
+      p += i; msz -= i;                                                        \
+    }                                                                  \
+                                                                       \
+    win = pre##_gcmdecryptdone(&ctx, &aad, &b,                         \
+                              (octet *)v[5].buf, v[5].len);            \
+    if (win < 0) {                                                     \
+      puts("!! gcmdecryptdone reports failure");                       \
+      goto fail_dec;                                                   \
+    }                                                                  \
+    d.len = BLEN(&b);                                                  \
+                                                                       \
+    if (d.len != v[3].len || !win ||                                   \
+       memcmp(d.buf, v[3].buf, v[3].len) != 0) {                       \
+    fail_dec:                                                          \
+      printf("\nfail decrypt:\n\tstep = %i", *ip);                     \
+      fputs("\n\tkey = ", stdout); type_hex.dump(&v[0], stdout);       \
+      fputs("\n\tnonce = ", stdout); type_hex.dump(&v[1], stdout);     \
+      fputs("\n\theader = ", stdout); type_hex.dump(&v[2], stdout);    \
+      fputs("\n\tciphertext = ", stdout); type_hex.dump(&v[4], stdout);        \
+      fputs("\n\texp pt = ", stdout); type_hex.dump(&v[3], stdout);    \
+      fputs("\n\tcalc pt = ", stdout); type_hex.dump(&d, stdout);      \
+      fputs("\n\ttag = ", stdout); type_hex.dump(&v[5], stdout);       \
+      printf("\n\tverify %s", win ? "ok" : "FAILED");                  \
+      putchar('\n');                                                   \
+      ok = 0;                                                          \
+    }                                                                  \
+  }                                                                    \
+                                                                       \
+  dstr_destroy(&d); dstr_destroy(&t);                                  \
+  return (ok);                                                         \
+}                                                                      \
+                                                                       \
+static test_chunk aeaddefs[] = {                                       \
+  { name "-gcm", gcmverify,                                            \
+    { &type_hex, &type_hex, &type_hex, &type_hex,                      \
+      &type_hex, &type_hex, 0 } },                                     \
+  { 0, 0, { 0 } }                                                      \
+};                                                                     \
+                                                                       \
+int main(int argc, char *argv[])                                       \
+{                                                                      \
+  ego(argv[0]);                                                                \
+  test_run(argc, argv, aeaddefs, SRCDIR"/t/" fname);                   \
+  return (0);                                                          \
+}
+
+#else
+#  define GCM_TESTX(PRE, pre, name, fname)
+#endif
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif