mdw@git.distorted.org.uk Git - doc/ips/blob - auth-mac.tex

   1 \xcalways\section{Message authentication codes}\x
   2
   3 \xcalways\subsection{Definitions and security notions}\x
   4
   5 \begin{slide}
   6   \head{Definition of a MAC}
   7
   8   A MAC is a pair of algorithms $\mathcal{M} = (T, V)$:
   9   \begin{itemize}
  10   \item The \emph{tagging} algorithm $T\colon \{0, 1\}^k \times \{0, 1\}^*
  11     \to \{0, 1\}^L$ is a probabilistic algorithm which, given a key and a
  12     string, returns a \emph{tag}.  We write $\tau \in T_K(m)$.
  13   \item The \emph{verification} algorithm $V\colon \{0, 1\}^k \times \{0,
  14     1\}^* \times \{0, 1\}^L \to \{0, 1\}$ is a deterministic algorithm which,
  15     given a key, a message and a tag, returns $1$ if the tag is valid, or $0$
  16     otherwise; i.e., we require that $V_K(m, \tau) = 1 \iff \tau \in T_K(m)$.
  17   \end{itemize}
  18   The basic idea is that it's hard for an adversary to \emph{forge} a tag for
  19   a message it's not seen before.
  20 \end{slide}
  21
  22 \begin{slide}
  23   \topic{informal security notion}
  24   \resetseq
  25   \head{Strong MACs, \seq: informal security notion}
  26
  27   Our standard notion of security for MACs is \emph{strong unforgeability
  28     against chosen message attack}, or SUF-CMA, for short
  29   \cite{Abdalla:2001:DHIES, Bellare:2000:AER}.  Let $A$ be an adversary which
  30   is attempting to attack the MAC $\mathcal{M} = (T, V)$.
  31
  32   We play a game with the adversary.  We invent a key $K \inr \{0, 1\}^k$.
  33   The adversary \emph{wins} if, after requesting tags for some messages of
  34   its choice, and checking some guesses, it can return a pair $(m, \tau)$
  35   such that:
  36   \begin{itemize}
  37   \item the tag is correct, i.e., $V_K(m, \tau) = 1$; and
  38   \item the tag is not one returned by the adversary's tagging oracle for
  39     that message.
  40   \end{itemize}
  41 \end{slide}
  42
  43 \begin{slide}
  44   \topic{strong MACs}
  45   \head{Strong MACs, \seq: the experiment}
  46
  47   We perform the following experiment with the adversary.
  48   \begin{program}
  49     Experiment $\Expt{suf-cma}{\mathcal{M}}(A)$: \+ \\
  50       $K \getsr \{0, 1\}^k$; \\
  51       $\Xid{T}{list} \gets \emptyset$; \\
  52       $(m, \tau) \gets A^{\id{tag}(\cdot), V_K(\cdot, \cdot)}$; \\
  53       \IF $V_K(m, \tau) \land (m, \tau) \notin \Xid{T}{list}$
  54       \THEN \RETURN $1$; \\
  55       \ELSE \RETURN $0$; \- \\[\smallskipamount]
  56     Oracle $\id{tag}(m)$: \+ \\
  57       $\tau \gets T_K(m)$; \\
  58       $\Xid{T}{list} \gets \Xid{T}{list} \cup \{(m, \tau)\}$; \\
  59       \RETURN $\tau$;
  60   \end{program}
  61 \end{slide}
  62
  63 \begin{slide}
  64   \head{Strong MACs, \seq: wrapping up the notation}
  65
  66   The \emph{success probability} of an adversary $A$ against the MAC
  67   $\mathcal{M}$ in the sense of SUF-CMA is
  68   \[ \Succ{suf-cma}{\mathcal{M}}(A) =
  69        \Pr[\Expt{suf-cma}{\mathcal{M}}(A) = 1]. \]%
  70   The \emph{insecurity} of a MAC $\mathcal{M}$ in the SUF-CMA sense is then
  71   \[ \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) =
  72        \max_A \Succ{suf-cma}{\mathcal{M}}(A) \]%
  73   where the maximum is taken over all adversaries $A$ running in time $t$ and
  74   making $q_T$ tagging and $q_V$ verification queries.
  75
  76   If $\InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) \le \epsilon$ then we say
  77   that $\mathcal{M}$ is a \emph{$(t, q_T, q_V, \epsilon)$-secure MAC in the
  78   SUF-CMA sense}.
  79 \end{slide}
  80
  81 \begin{slide}
  82   \topic{other notions}
  83   \head{Other security notions for MACs}
  84
  85   There are a number of weaker security notions in use:
  86   \begin{itemize}
  87   \item The definition of a \emph{weak MAC} restricts the adversary from
  88     returning any message with which it queried its tagging oracle.  The
  89     strong MAC definition considers this OK, as long as the tag is different
  90     from any returned by the oracle for that message.
  91   \item Some definitions of MACs don't equip the adversary with a
  92     verification oracle.  Our definition considers these to be $(t, q_T, 0,
  93     \epsilon)$-secure.
  94   \item You can have a MAC with a bounded domain $\{0, 1\}^L$ rather than
  95     $\{0, 1\}^*$ as shown previously.
  96   \item Further quantification is possible, e.g., counting the total number
  97     of bytes queried, or the maximum size of a tagging query.
  98   \end{itemize}
  99 \end{slide}
 100
 101 \xcalways\subsection{Basic results}\x
 102
 103 \begin{slide}
 104   \topic{PRFs are MACs}
 105   \resetseq
 106   \head{PRFs are MACs, \seq}
 107
 108   If $F_K\colon \{0, 1\}^* \to \{0, 1\}^L$ is a PRF then it's also a MAC.
 109   Quantitatively:
 110   \[ \InSec{suf-cma}(F; t, q_T, q_V) \le
 111        \InSec{prf}(F; t + O(q), q_T + q_V) +
 112        \frac(q_V + 1}{2^L}. \]
 113   The constant hidden by the $O(\cdot)$ is small and depends on the model of
 114   computation.
 115
 116   Suppose $A$ can break $F$ used as a MAC in time $t$ and with $q_T$ and
 117   $q_V$ queries to its tagging and verification oracles respectively.
 118
 119   If we can construct an adversary which distinguishes $F_K$ from a random
 120   function using $A$ as an essential component, then we can prove the
 121   result.
 122 \end{slide}
 123
 124 \begin{slide}
 125   \head{PRFs are MACs, \seq: the distinguisher}
 126
 127   \begin{program}
 128     Distinguisher $D^{F(\cdot)}$: \+ \\
 129       $\Xid{T}{list} \gets \emptyset$; \\
 130       $(m, \tau) \gets A^{T_F(\cdot), V_F(\cdot, \cdot)}$; \\
 131       \IF $m \notin \Xid{T}{list} \land \tau = F(m)$
 132       \THEN \RETURN $1$; \\
 133       \ELSE \RETURN $0$; \- \\[\smallskipamount]
 134     Oracle $T_F(m)$: \+ \\
 135       $\Xid{T}{list} \gets \Xid{T}{list} \cup \{m\}$; \\
 136       \RETURN $F(m)$; \- \\[\smallskipamount]
 137     Oracle $V_F(m, \tau)$: \+ \\
 138       \IF $\tau = F(m)$ \THEN \RETURN $1$; \\
 139       \ELSE \RETURN $0$;
 140   \end{program}
 141 \end{slide}
 142
 143 \begin{slide}
 144   \head{PRFs are MACs, \seq: wrapping up}
 145
 146   The distinguisher simulates the tagging and verification oracles for the
 147   MAC forger, using its supplied oracle.  If the forger succeeds, then the
 148   distinguisher returns 1; otherwise it returns zero.
 149
 150   The probability that the distinguisher returns 1 given an instance $F_K$ of
 151   the PRF is precisely $\Succ{suf-cma}{F}(A)$.
 152
 153   The probability that it returns 0 given a random function depends on what
 154   $A$ does when it's given a random function.  But we know that the
 155   probability of it successfully guessing the MAC for a message for which it
 156   didn't query $T$ can be at most $(q_V + 1) 2^{-L}$.  So
 157   \[ \Adv{prf}{F}(D) \ge \Succ{suf-cma}{F}(A) - (q_V + 1) 2^{-L}. \]
 158   Let $q = q_T + q_V + 1$; then counting, rearranging, maximizing yields
 159   \[ \InSec{suf-cma}(F; t, q_T, q_V) \le
 160      \InSec{prf}(F; t + O(q), q) + (q_V + 1)2^{-L}. \]%
 161 \end{slide}
 162
 163 \begin{slide}
 164   \head{PRFs are MACs, \seq: MACs aren't PRFs}
 165
 166   The converse of our result is not true.  Suppose $\mathcal{M} = (T, V)$ is
 167   a deterministic MAC.  Choose some integer $n$.  Then define $\mathcal{M}' =
 168   (T', V')$, as follows:
 169   \[ T'_K(x) = 0^n \cat T_K(x); \qquad
 170      V'_K(x, \tau) = \begin{cases}
 171        1 & if $T'_K(x) = \tau$ \\
 172        0 & otherwise
 173      \end{cases}.
 174   \]
 175   $T'$ is obviously not a PRF: an adversary checking for the string of $n$
 176   zero bits on the output will succeed with advantage $1 - 2^{-qn}$.
 177
 178   However, $\mathcal{M}'$ is a secure MAC.  Suppose $A'$ attacks
 179   $\mathcal{M}'$.
 180   \begin{program}
 181     Adversary $A^{T(\cdot), V(\cdot)}$: \+ \\
 182       $(m, \tau') \gets A'^{\id{tag}(\cdot), \id{verify}(\cdot)}$; \\
 183       \PARSE $\tau'$ \AS $n\colon z, \tau$; \\
 184       \RETURN $(m, \tau)$;
 185   \next
 186     Oracle $\id{tag}(m)$: \+ \\
 187       \RETURN $0^n \cat T(m)$; \- \\[\smallskipamount]
 188     Oracle $\id{verify}(m, \tau')$: \+ \\
 189       \PARSE $\tau'$ \AS $n\colon z, \tau$; \\
 190       \IF $z \ne 0^n$ \THEN \RETURN $0$; \\
 191       \ELSE \RETURN $V(m, \tau)$;
 192   \end{program}
 193 \end{slide}
 194
 195 \begin{exercise}
 196   \begin{parenum}
 197   \item Suppose that $F\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^L$ is
 198     a $(t, q, \epsilon)$-secure PRF.  Let $T^{(\ell)}_K(x)$ be the leftmost
 199     $\ell$~bits of $F_K(x)$ for $\ell \le L$.  Demonstrate the security of
 200     $T^{(\ell)}(\cdot)$ as a MAC.
 201   \item Discuss the merits of truncating MAC tags in practical situations.
 202   \end{parenum}
 203   \answer%
 204   \begin{parenum}
 205   \item The follows exactly the same pattern as the `PRFs are MACs' proof in
 206     the slides: $T^{(\ell)}$ is a $(t, q_T, q_V, \epsilon + (q_V +
 207     1)2^{-\ell})$-secure MAC, where $q_T + q_V = q$.
 208   \item Obviously, truncating tags saves bandwidth.  There is a trade-off
 209     between tag size and security, as the $2^{-\ell}$ term shows.  Note that
 210     this term represents the probability of the adversary guessing the
 211     correct tag when it's actually attacking a random function, and
 212     therefore, when this occurs, the adversary has one `by accident'.
 213     Including sequence numbers in packets ensures that replay of accidental
 214     forgery (or honest messages) will be detected.  Hence, for some
 215     applications, setting $\ell = 32$ or even lower is of no particular harm.
 216     Perhaps more significantly, if the PRF isn't actually as good as it ought
 217     to be, and (say) leaks key material very slowly, then truncating its
 218     output can actually improve security.
 219   \end{parenum}
 220 \end{exercise}
 221
 222 \begin{exercise}
 223   A traditional MAC construction is the \emph{CBC-MAC}: it works like this.
 224   Suppose $F\colon \{0, 1\}^k \times \{0, 1\}^l \to \{0, 1\}^l$ is a PRF.
 225   Split a message~$x$ into $l$-bit blocks $x_0, x_1, \ldots, x_{n-1}$
 226   (applying some sort of padding if you need to).  Then we define the CBC-MAC
 227   as $F^{(n)}_K(x)$, where
 228   \[ F^{(1)}_K(x) = F_K(x);
 229      \qquad F^{(i+i)}(x) = F_K(x_i \xor F^{(i)}_K(x)). \]%
 230   In \cite{Bellare:1994:SCB}, Mihir Bellare, Joe Kilian and Phil Rogaway
 231   introduced the world to the concrete-security approach and, almost
 232   incidentally, proved that the CBC-MAC is a PRF (and therefore a MAC) for
 233   any \emph{fixed sized} input.
 234
 235   Show that the CBC-MAC is easily broken if you're allowed to choose messages
 236   of different lengths.
 237   \answer%
 238   Request tags $\tau$ for the message $x = x_0, x_1, \ldots, x_{n-1}$ and
 239   $\tau'$ for $x' = x'_0 \xor \tau, x'_1, \ldots, x'_{n'-1}$.  Let $y = x_0,
 240   x_1, \ldots, x_{n-1}, x'_0 , x'_1, \ldots, x'_{n'-1}$.  Note that
 241   $F^{(n)}_K(y) = \tau$, and $F^{(n+1)}_K(y) = F_K(x'_0 \xor F^{(n)}_K(x)) =
 242   F^{(1)}_K(x')$.  Hence, $F^{(n+n')}_K(y) = \tau'$, and we have a correct
 243   forgery.
 244 \end{exercise}
 245
 246 \begin{slide}
 247   \topic{verification oracles}
 248   \head{Verification oracles}
 249
 250   We can leave verification oracles out of our analysis.  This simplifies
 251   analysis, but produces slightly less satisfactory quantitative results.
 252
 253   Suppose that $\mathcal{M} = (T, V)$ is a $(t, q_T, 0, \epsilon)$-secure
 254   MAC.  Then, for any $q_V$,
 255   \[ \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) \le
 256      (q_V + 1)\InSec{suf-cma}(\mathcal{M}; t, q_T, 0). \]%
 257   This bound is \emph{tight}: it's not possible for a general result like
 258   this to do better.
 259 \end{slide}
 260
 261 \begin{proof}
 262   Consider an adversary $A$ which uses $q_V$ verification queries.  We assume
 263   the following properties of $A$'s behaviour:
 264   \begin{itemize}
 265   \item No verification query contains a message and a tag for that message
 266     received from the tagging oracle.
 267   \item If a verification query succeeds, the message is not given in a query
 268     to the tagging oracle.
 269   \item Once a verification query succeeds, all subsequent verification
 270     queries also succeed and the adversary returns a correct forgery (e.g.,
 271     by simply repeating the successful query).
 272   \end{itemize}
 273   It's clear that any adversary can be transformed into one which has these
 274   properties and succeeds with probability at least as high.
 275
 276   Let $V$ be the event that at least one verification query succeeds, and let
 277   $S$ be the event that $A$ succeeds.  Then
 278   \begin{eqnarray*}[rl]
 279     \Succ{suf-cma}{\mathcal{M}}(A)
 280       &= \Pr[S \mid V] \Pr[V] + \Pr[S \mid \lnot V] \Pr[\lnot V] \\
 281       &= \Pr[V] + \Pr[S \mid \lnot V] \Pr[\lnot V].
 282   \end{eqnarray*}
 283   Now consider these two adversaries:
 284   \begin{program}
 285     Adversary $A'^{T(\cdot), V(\cdot, \cdot)}$: \+ \\
 286       $i \gets 0$; \\
 287       $(m, \tau) \gets A^{T(\cdot), \Xid{V}{sim}(\cdot, \cdot)}$; \\
 288       \RETURN $(m^*, \tau^*)$; \- \\[\smallskipamount]
 289     Oracle $\Xid{V}{sim}(m, \tau)$: \+ \\
 290       $i \gets i + 1$; \\
 291       \IF $i < q_V$ \THEN \RETURN $V(m, \tau)$; \\
 292       $(m^*, \tau^*) \gets (m, \tau)$; \\
 293       \RETURN $1$;
 294   \next
 295     Adversary $Z^{T(\cdot), V(\cdot, \cdot)}$: \+ \\
 296       \\
 297       $(m, \tau) \gets A^{T(\cdot), \Xid{V}{zero}(\cdot, \cdot)}$; \\
 298       \RETURN $(m, \tau)$; \- \\[\smallskipamount]
 299     Oracle $\Xid{V}{zero}(m, \tau)$: \+ \\
 300       \RETURN $0$;
 301   \end{program}
 302   The adversary $A'$ uses $q_V - 1$ verification queries.  It ignores the
 303   output of $A$, returning instead $A$'s $q_V$-th verification query.  Thus,
 304   by our assumptions on the behaviour of $A$, we have that $A'$ succeeds
 305   precisely whenever one of $A$'s verification queries succeeds.  Thus:
 306   \[ \Pr[V] = \Succ{suf-cma}{\mathcal{M}}(A')
 307             \le \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V - 1). \]%
 308   Similarly, the adversary $Z$ succeeds with precisely the same probability
 309   as $A$, given that all of its verification queries failed; i.e.,
 310   \[ \Pr[S \mid \lnot V] \Pr[\lnot V] = \Succ{suf-cma}{\mathcal{M}}(Z)
 311                          \le \InSec{suf-cma}(\mathcal{M}; t, q_T, 0). \]%
 312   Because $A$ was chosen arbitrarily, we can maximize:
 313   \begin{eqnarray*}[rl]
 314     \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V)
 315     & \le \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V - 1) +
 316           \InSec{suf-cma}(\mathcal{M}; t, q_T, 0) \\
 317     & \le (q_V + 1)\InSec{suf-cma}(\mathcal{M}; t, q_T, 0)
 318   \end{eqnarray*}
 319   as required.
 320
 321   To show that the bound is tight, consider a random function $F$ used as a
 322   MAC, with $L$-bit output.  Then we claim that $\InSec{suf-cma}(F; t, q_T,
 323   q_V) = (q_V + 1)/2^L$.  To save typing, let $e_q = \InSec{suf-cma}(F; t,
 324   q_T, q)$.  We prove the claim by induction on $q$.  Firstly, note that if
 325   $q = 0$ then necessarily $e_q = 1/2^L$.  Now suppose that $e_{q-1} =
 326   q/2^L$.  We're now allowed an extra query, so rather than returning the
 327   result, we feed it to the verification oracle.  If it answers `yes' then we
 328   return it; otherwise we guess randomly from the remaining $2^L - q$
 329   possibilities.  Now
 330   \begin{eqnarray*}[rl]
 331     e_q &= e_{q-1} + \frac{1 - e_{q-1}}{2^L - q} \\
 332         &= \frac{q}{2^L} + \frac{2^L - q}{2^L} \cdot \frac{1}{2^L - q} \\
 333         &= \frac{q + 1}{2^L}
 334   \end{eqnarray*}
 335   as claimed.
 336 \end{proof}
 337
 338 \xcalways\subsection{The HMAC construction}\x
 339
 340 \begin{slide}
 341   \resetseq
 342   \head{The HMAC construction \cite{Bellare:1996:KHF}, \seq: motivation}
 343
 344   It ought to be possible to construct a decent MAC using a hash function.
 345   Many attempts have failed, however.  For example, these constructions are
 346   weak if used with standard one-pass Merkle-Damg\aa{}rd iterated hashes.
 347   \begin{itemize}
 348   \item Secret prefix: $T_K(m) = H(K \cat m)$.  Given $H(K \cat m)$, it's
 349     easy to compute $H(K \cat m \cat p \cat m')$ for a padding string $p$ and
 350     arbitrary suffix $m'$.
 351   \item Secret suffix: $T_K(m) = H(m \cat K)$.  Finding a collision $H(m) =
 352     H(m')$ yields $H(m \cat K) = H(m' \cat K)$.  We saw earlier that
 353     adversaries which know collisions \emph{exist} even if we don't know how
 354     to describe them.
 355   \end{itemize}
 356
 357   It would be nice to have a construction whose security was provably related
 358   to some plausible property of the underlying hash function.
 359 \end{slide}
 360
 361 \begin{slide}
 362   \head{The HMAC construction, \seq: definition of NMAC}
 363
 364   Let $H\colon \{0, 1\}^* \to \{0, 1\}^k$ be an iterated hash, constructed
 365   from the compression function $F\colon \{0, 1\}^k \times \{0, 1\}^L \to
 366   \{0, 1\}^k$.  We define a keyed version of $H$.  Let $K \in \{0, 1\}^k$;
 367   then we compute $H_K(x)$ as follows:
 368   \begin{enumerate}
 369   \item Pad and split $x$ into the $L$-bit blocks $x_0$, $x_1$, \ldots,
 370     $x_{n-1}$ as before.
 371   \item Set $I_0 = K$.  Let $I_{i+1} = F(I_i \cat x_i)$ for $0 \le i < n$.
 372   \item The result $H_K(x) = I_n$.
 373   \end{enumerate}
 374   The NMAC (nested-MAC) construction requires two independent $k$-bit keys
 375   $K_0$ and $K_1$.  The construction itself is simply:
 376   \[ \Xid{T}{NMAC}^H_{K_0, K_1}(x) = H_{K_0}(H_{K_1}(x)). \]
 377   NMAC is deterministic, so verification consists of computing the tag and
 378   comparing.
 379 \end{slide}
 380
 381 \begin{slide}
 382   \head{The HMAC construction, \seq: security of NMAC}
 383
 384   Consider a function $F\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^k$.
 385   We say that $F$ is \emph{$(t, q, \epsilon)$-weakly collision resistant} if,
 386   for any adversary $A$ constrained to run in time $t$ and permitted $q$
 387   oracle queries,
 388   \[ \Pr[K \getsr \{0, 1\}^k;
 389          (x, y) \gets A^{F_K(\cdot)} :
 390          x \ne y \land F_K(x) = F_K(y)] \le \epsilon \]%
 391
 392   If $H_K$ is a $(t, q_T, q_V, \epsilon)$-secure MAC on $k$-bit messages, and
 393   moreover $(t, q_T + q_V, \epsilon')$-weakly collision resistant, then
 394   $\Xid{T}{NMAC}^H$ is a $(t, q_T, q_V, \epsilon + \epsilon')$-secure MAC.
 395 \end{slide}
 396
 397 \begin{slide}
 398   \head{The HMAC construction, \seq: NMAC security proof}
 399
 400   Let $A$ be an adversary which forges a $\Xid{T}{NMAC}^H$ tag in time $t$,
 401   using $q_T$ tagging queries and $q_V$ verification queries with probability
 402   $\epsilon$.  We construct an adversary $A'$ which forges an $H$-tag for a
 403   $k$-bit message in essentially the same time.
 404   \begin{program}
 405     Adversary $A'^{T(\cdot), V(\cdot, \cdot)}$ \+ \\
 406       $K \getsr \{0, 1\}^k$; \\
 407       $(m, \tau) \gets A^{T(H_K(\cdot)), V(H_K(\cdot), \cdot)}$; \\
 408       \RETURN $(H_K(m), \tau)$;
 409   \end{program}
 410   $A'$ might fail even though $A$ succeeded only if the message it returns,
 411   $H_K(m)$, collides with one of its tagging queries.  But since $H_K$ is
 412   $(t, q_T + q_V, \epsilon')$-weakly collision resistant, this happens with
 413   at most probability $\epsilon'$.  Hence, $A'$ succeeds with probability at
 414   least $\epsilon - \epsilon'$.  Rearrangement yields the required result.
 415 \end{slide}
 416
 417 \begin{slide}
 418   \head{The HMAC construction, \seq: from NMAC to HMAC}
 419
 420   Implementing NMAC involves using strange initialization vectors and
 421   generally messing about with your hash function.  HMAC is an attempt to
 422   capture the provable security properties using a plain ol' hash function.
 423
 424   Suppose $H$ is an iterated hash function with a $k$-bit output and $L$-bit
 425   blocks (with $L \ge k$).  We set $\id{ipad}$ to be the byte $\hex{36}$
 426   repeated $L/8$ times, and $\id{opad}$ to be the byte $\hex{5C}$ repeated
 427   $L/8$ times.  Select a key $K$ of $L$ bits: if your key is shorter, pad it
 428   by appending zero bits; if it's longer, hash it with $H$ and then pad.
 429
 430   The HMAC tagging function is then defined as
 431   \[ \Xid{T}{HMAC}^H_K(m) =
 432      H(K \xor \id{opad} \cat H(K \xor \id{ipad} \cat m)). \]%
 433 \end{slide}
 434
 435 \begin{slide}
 436   \head{The HMAC construction, \seq: comparison with NMAC}
 437
 438   Comparing the two constructions, we see that
 439   \[ \Xid{T}{HMAC}^H_K =
 440      \Xid{T}{NMAC}^{H'}_{F(I \cat K \xor \id{opad}),
 441                          F(I \cat K \xor \id{ipad})}. \]%
 442   Here, $I$ is $H$'s initialization vector, $F$ is the compression function;
 443   $H'$ denotes a keyed hash function that is `like' $H$ but performs padding
 444   as if there were an extra initial block of message data for each message.
 445
 446   The proof of NMAC assumes that the two keys are random and independent.
 447   This isn't the case in HMAC, but a little handwaving about pseudorandomness
 448   of the compression function makes the problem go away.
 449 \end{slide}
 450
 451 \begin{exercise}
 452   Suppose that $F\colon \{0, 1\}^k \times \{0, 1\}^{t+\ell} \to \{0,
 453   1\}^t$ is a PRF.  Let $x \in \{0, 1\}^*$ be a message.  We define the
 454   function $H_K(x)$ as follows:
 455   \begin{itemize}
 456   \item Pad $x$ to a multiple of $\ell$ bits using some injective
 457     mapping.  Break the image of $x$ under this mapping into $\ell$-bit
 458     blocks $x_0, x_1, \ldots, x_{n-1}$.
 459   \item For $0 \le i \le n$, define $H^{(i)}_K(x)$ by
 460     \[ H^{(0)}_K(x) = I; \qquad
 461        H^{(i+1)}_K(x) = F_K(H^{(i)}(x) \cat x_i) \]%
 462     where $I$ is some fixed $t$-bit string (e.g., $I = 0^t$).
 463   \item Then set $H_K(x) = H^{(n)}_K(x)$.
 464   \end{itemize}
 465   We define two (deterministic) MACs $\mathcal{M}^i = (T^i, V^i)$ (for
 466   $i \in \{0, 1\}$) using the $H_K$ construction.  Verification in each
 467   case consists of computing the tag and comparing to the one offered.
 468   \begin{eqlines*}
 469     T^0_K(x) = H_K(x); \qquad T^1_K(x) = H_K(x \cat K); \\
 470     V^i_K(x, \tau) = \begin{cases}
 471       1 & if $\tau = T^i_K(x)$ \\
 472       0 & otherwise
 473     \end{cases}.
 474   \end{eqlines*}
 475   Decide whether each of these constructions is secure.  A full proof is
 476   rather hard: an informal justification would be good.
 477   \answer%
 478   $\mathcal{M}^0$ is secure; $\mathcal{M}^1$ isn't, under the sole
 479   assumption that $F$ is a PRF.
 480
 481   To see that $\mathcal{M}^0$ is secure, it suffices to show that $T^0$
 482   is a PRF.  This is actually quite involved.  Given an adversary $A$
 483   attacking $T^1$ as a PRF, we construct an adversary $B$ attacking $F$,
 484   which simply computes $H$ as required, using the oracle supplied.  To
 485   complete the proof, we need to show a bound on the
 486   information-theoretic security of $H$ when instantiated using a random
 487   function $F$.  For the sake of simplicity, we allow the adversary $A$
 488   to query on \emph{padded} messages, rather than the raw unpadded
 489   messages.  We count the number $q'$ of individual message blocks.
 490
 491   As the game with $A$ progresses, we can construct a directed \emph{graph}
 492   of the query results so far.  We start with a node labelled $I$.  When
 493   processing an $H$-query, each time we compute $t' = F(t \cat x_i)$, we add
 494   a node $t'$, and an edge $x_i$ from $t$ to $t'$.  The `bad' event occurs
 495   whenever we add an edge to a previously existing node.  We must show,
 496   firstly, that the adversary cannot distinguish $H$ from a random function
 497   unless the bad event occurs; and, secondly, that the bad event doesn't
 498   occur very often.
 499
 500   The latter is easier: our standard collision bound shows that the bad
 501   event occurs during the game with probability at most $q'(q' - 1)/2^{t+1}$.
 502
 503   The former is trickier.  This needs a lot more work to make it really
 504   rigorous, but we show the idea.  Assume that the bad event has not
 505   occurred. Consider a query $x_0, x_1, \ldots, x_{n-1}$.  If it's the same
 506   as an earlier query, then $A$ learns nothing (because it could have
 507   remembered the answer from last time).  If it's a \emph{prefix} of some
 508   earlier query, then the answer is the value of some internal node which
 509   hasn't been revealed before; however, the value of that internal node was
 510   chosen uniformly at random (we claim).  Finally, if the query is not a
 511   prefix of any previous query, then we add a new edge to our graph.  If the
 512   bad event doesn't occur, we must add a new node for the result, and the
 513   value at that node will be uniformly random, because $F$ is a random
 514   function being evaluated at a new point -- this is the only time we add new
 515   nodes to the graph, justifying the claim made earlier.
 516
 517   At the end of all of this, we see that
 518   \[ \InSec{prf}(T^0; t, q) \le
 519        \InSec{prf}(F; t, q') + \frac{q'(q' - 1)}{2^{t+1}} \]%
 520   and hence
 521   \[ \InSec{suf-cma}(\mathcal{M}^0; t, q) \le
 522        \InSec{prf}(F; t, q') + \frac{2 q'(q' - 1) + 1}{2^{t+1}}. \]%
 523
 524   Now we turn our attention to $T^1$.  It's clear that we can't simulate
 525   $T^1$ very easily using an oracle for $F$, since we don't know $K$
 526   (and indeed there might not be a key $K$).  The intuitive reason why
 527   $T^1$ is insecure is that $F$ might leak useful information if  its input
 528   matches its key.  This doesn't affect the strength of $F$ as a PRF
 529   because you have to know the key before you can exploit this leakage; but
 530   $T^1$ already knows the key, and this can be exploited to break the MAC.
 531
 532   To show that this is insecure formally, let $F'$ be defined as
 533   follows:
 534   \[ F'_K(x) = \begin{cases}
 535        K      & if $x = p \cat K \cat q$ where
 536                 $|p| = t$ and $|q| = \ell - k$ \\
 537        F_K(x) & otherwise
 538      \end{cases}. \]%
 539   We choose a simple injective padding scheme: if $x$ is a message then
 540   we form $x' = x \cat 1 \cat 0^n$, where $0 \le n < \ell$ and $|x'|$ is
 541   a multiple of $\ell$.  If $T^1$ is instantiated with this PRF then it
 542   is insecure as a MAC: submitting a tagging query for the empty string
 543   $\emptystring$ reveals the key $K$, which can be used to construct a
 544   forgery.
 545
 546   To complete the proof, we must show that $F'$ is a PRF.  Let $A$ be an
 547   adversary attacking $F'$.  We consider a series of games; for each
 548   $\G{i}$, let $S_i$ be the event that $A$ returns $1$ in that game.
 549   Game~$\G0$ is the standard attack game with $A$ given an oracle for a
 550   random function; game~$\G1$ is the same, except that $A$ is given an
 551   oracle for $F'_K$ for some $K \inr \{0, 1\}^k$.  Then
 552   $\Adv{prf}{F'}(A) = \Pr[S_1] - \Pr[S_0]$.  Let game~$\G2$ be the same
 553   as $\G1$, except that if $A$ makes any query of the form $p \cat K
 554   \cat q$ with $|p| = t$ and $|q| = \ell - k$ then the game halts
 555   immediately, and let $F_2$ be the event that this occurs.  By
 556   Lemma~\ref{lem:shoup} (slide~\pageref{lem:shoup}), then, $|{\Pr[S_2]} -
 557   \Pr[S_1]| \le \Pr[F_2]$.  Let game~$\G3$ be the same as $\G2$ except
 558   that we give $A$ an oracle for $F_K$ rather than $F'_K$.  Since $F$
 559   and $F'$ differ only on queries of the form $p \cat K \cat q$, we have
 560   $\Pr[S_3] = \Pr[S_2]$.  But $\Pr[S_3] - \Pr[S_0] = \Adv{prf}{F}(A) \le
 561   \InSec{prf}(F; t, q)$.  Hence, $\Adv{prf}{F'}(A) \le \InSec{prf}{F}(A)
 562   - \Pr[F_2]$.
 563
 564   Finally, we bound the probability of $F_2$.  Fix an integer $n$.
 565   Consider an adversary $B$ attacking $F$ which runs as follows.  It
 566   initially requests $F(0), F(1), \ldots, F(n - 1)$ from its oracle.  It
 567   then runs $A$, except that, for each oracle query $x$, it parses $x$
 568   as $p \cat K' \cat q$ with $|p| = t$, $|K'| = k$ and $|q| = \ell - k$;
 569   then, if $F_{K'}(0) = F(0) \land F_{K'}(1) = F(1) \land \cdots \land
 570   F_{K'}(n - 1) = F(n - 1)$, $B$ immediately returns $1$, claiming that
 571   its oracle $F$ is the function $F_{K'}$; if this never occurs, $B$
 572   returns $0$.  Clearly, if $B$ is given an instance $F_K$ of $F$ then
 573   it succeeds with probability $\Pr[F_2]$; however, if $F$ is a random
 574   function then $B$ returns $1$ with probability at most $q 2^{-nk}$.
 575   Hence, $\Adv{prf}{F}(B) \le \Pr[F_2] - q 2^{-nk}$.  $B$ issues $q + n$
 576   queries, and takes time $t + O(n q)$.  Wrapping everything up, we get
 577   \[ \InSec{prf}(F'; t, q) \le
 578      2\cdot\InSec{prf}(F; t + O(q n), q + n) + \frac{q}{2^{nk}}. \]%
 579   This completes the proof of generic insecurity for $\mathcal{M}^1$.
 580 \end{exercise}
 581
 582 \xcalways\subsection{Universal hashing}\x
 583
 584 \begin{slide}
 585   \topic{almost-universal hash functions}
 586   \resetseq
 587   \head{Universal hashing, \seq: definition}
 588
 589   Consider a family of hash functions $H\colon \keys H \times \dom H \to
 590   \ran H$.  We define
 591   \[ \InSec{uh}(H) =
 592      \max_{x \ne y} \Pr[K \getsr \keys H : H_K(x) = H_K(y)]. \]%
 593   If $\InSec{uh}(H) \le \epsilon$ then we say that $H$ is
 594   \emph{$\epsilon$-almost universal}.  Note that the concept of
 595   almost-universality is not quantified by running times.
 596
 597   Such families definitely exist: we don't need to make intractability
 598   assumptions.  We'll see some examples later.
 599
 600   If $H$ is $1/|{\ran H}|$-almost universal, then we say that $H$ is
 601   \emph{universal}.  Sometimes it's said that this is the best possible
 602   insecurity: this isn't true.
 603 \end{slide}
 604
 605 \begin{proof}[Counterexample]
 606   Here's a function $H\colon \{0, 1, 2\} \times \{0, 1, 2, 3\} \to \{0, 1\}$
 607   which is $\frac{1}{3}$-almost universal, though $|{\ran H}| = 2$:
 608   \begin{quote} \item
 609     \begin{tabular}[C]{c|cccc}
 610       & 0 & 1 & 2 & 3 \\ \hlx{vhv}
 611     0 & 0 & 1 & 0 & 1 \\
 612     1 & 0 & 0 & 1 & 1 \\
 613     2 & 0 & 1 & 1 & 0
 614     \end{tabular}
 615   \end{quote}
 616 \end{proof}
 617
 618 \begin{slide}
 619   \topic{dynamic view}
 620   \head{Universal hashing, \seq: a dynamic view}
 621
 622   Suppose that $H$ is $\epsilon$-almost universal.  Consider this experiment:
 623   \begin{program}
 624     Experiment $\Expt{uh}{H}(A)$: \+ \\
 625       $(x, y) \gets A$; \\
 626       $K \getsr \keys H$; \\
 627       \IF $x \ne y \land H_K(x) = H_K(y)$ \THEN \RETURN $1$; \\
 628       \ELSE \RETURN $0$;
 629   \end{program}
 630   The adversary may make random decisions before outputting its selection
 631   $x$, $y$.  We show that $\Pr[\Expt{uh}{H}(A) = 1] \le \InSec{uh}(H) =
 632   \epsilon$.
 633
 634   Let $\rho \in \{0, 1\}^*$ be $A$'s coin tosses: $A$ chooses $x$ and $y$ as
 635   functions of $\rho$.  For some \emph{fixed} $\rho$,
 636   \[ \Pr[K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \le
 637      \InSec{uh}(H). \]%
 638 \end{slide}
 639
 640 \begin{slide}
 641   \head{Universal hashing, \seq: the dynamic view (cont.)}
 642
 643   Now we treat $\rho$ as a random variable, selected from some distribution
 644   $P$ on the set $\{0, 1\}^*$.  We see that
 645   \begin{eqnarray*}[Ll]
 646     \Pr[\rho \getsr P; K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \\
 647     &= \sum_{\rho \in \{0, 1\}^*}
 648        P(\rho) \cdot \Pr[K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \\
 649     &\le \sum_{\rho \in \{0, 1\}^*} P(\rho) \cdot \InSec{uh}(H)
 650      = \InSec{uh}(H).
 651   \end{eqnarray*}
 652   Thus, no adversary can succeed in producing a collision in an
 653   $\epsilon$-almost universal hash with probability better than $\epsilon$.
 654   But obviously the adversary can ignore its coin tosses and simply return
 655   the best colliding pair.  Hence the two notions are completely equivalent.
 656 \end{slide}
 657
 658 \begin{slide}
 659   \topic{composition}
 660   \head{Universal hashing, \seq: composition}
 661
 662   Suppose that $G$ is $\epsilon$-almost universal, and $G'$ is
 663   $\epsilon'$-almost universal, and $\dom G = \ran G'$.  We define the
 664   composition $G \compose G'$ to be the family $H\colon (\keys G \times
 665   \keys G') \times \dom G' \to \ran G$ by $H_{k, k'}(m) =
 666   G_k(G'_{k'}(m))$.
 667
 668   Then $H$ is $(\epsilon + \epsilon')$-almost universal.  To see this, fix $x
 669   \ne y$, and choose $K = (k, k') \inr \keys G \times \keys G'$.  Let $x' =
 670   G'_{k'}(x)$ and $y' = G'_{k'}(y)$.  Following our previous result, we see:
 671   \begin{eqnarray*}[rl]
 672     \Pr[H_K(x) = H_K(y)]
 673     &= \Pr[G_k(G'_{k'}(x)) = G_k(G'_{k'}(y))] \\
 674     &= \Pr[G_k(x') = G_k(y')] \\
 675     &= \Pr[G_k(x') = G_k(y') \mid x' \ne y'] \Pr[G'_{k'}(x) \ne G'_{k'}(y)]\\
 676     &\le \epsilon + \epsilon'.
 677   \end{eqnarray*}
 678 \end{slide}
 679
 680 \begin{slide}
 681   \topic{domain and range extension of a PRF}
 682   \head{Universal hashing, \seq: PRF domain extension}
 683
 684   Suppose that $F\colon \keys F \times \{0, 1\}^L \to \{0, 1\}^l$ is a PRF.
 685   We'd like to have a PRF for a bigger domain $\{0, 1\}^n$.
 686
 687   If $H\colon \{0, 1\}^k \times \{0, 1\}^n \to \{0, 1\}^L$ is an
 688   almost-universal hash function.  Then we can define $F'$ by
 689   \[ F'_{K, h}(x) = F_K(H_h(x)). \]
 690   Now we can prove that
 691   \[ \InSec{prf}(F'; t, q) \le
 692        \InSec{prf}(F; t, q) +
 693        \frac{q(q - 1)}{2} \InSec{ah}(H). \]
 694   Immediately this tells us that $F'$ is a MAC for $n$-bit messages.
 695 \end{slide}
 696
 697 \begin{proof}
 698   Suppose $A$ attacks $F'$.  Consider the distinguisher $B$ which attacks
 699   $F$:
 700   \begin{program}
 701     Distinguisher $B^{F(\cdot)}$: \+ \\
 702       $h \getsr \{0, 1\}^k$; \\
 703       \RETURN $A^{F(H_h(\cdot))}()$;
 704   \end{program}
 705   Suppose $F$ is an oracle for $F_K(\cdot)$.  Then $A$ plays its standard
 706   attack game and all is well.  Conversely, suppose $F$ is a random
 707   function.  Then the only way $A$ can distinguish its oracle $F(H_h(\cdot))$
 708   from a random function is if it issues two queries $x_i \ne x_j$ such that
 709   $H_h(x_i) = H_h(x_j)$.  But for each pair $(i, j)$ this happens with
 710   probability at most $\InSec{ah}(H)$.  The stated bound follows.
 711 \end{proof}
 712
 713 \begin{exercise}
 714   Show how to extend the \emph{range} of a PRF.  Specifically, suppose you're
 715   given a PRF $F\colon \keys{F} \times \{0, 1\}^L \to \{0, 1\}^t$, but want
 716   an $l$-bit output.  Show how to achieve this.
 717 \answer
 718   There are two obvious techniques.
 719   \begin{enumerate}
 720   \item Suppose we have a PRG $G\colon \{0, 1\}^t \to \{0, 1\}^l$.  Then we
 721     can easily use $G \compose F$; it's easy to see how this is secure.
 722     Indeed, we can use $F$ itself as a PRG, by defining $G(x) = F_x(0) \cat
 723     F_x(1) \cat \cdots$.
 724   \item Let $m = \lceil \log_2 (l/t) \rceil$.  Use the construction from the
 725     slide to build a PRF $F'$ with domain $\{0, 1\}^{L+m}$.  Then we define
 726     $F''_K(x) = F'_K(x \cat 0) \cat F'_K(x \cat 1) \cat \cdots \cat F'_K(x
 727     \cat m - 1)$.
 728   \end{enumerate}
 729 \end{exercise}
 730
 731 \begin{slide}
 732   \topic{almost XOR-universality}
 733   \resetseq
 734   \head{Almost XOR-universality, \seq: definition}
 735
 736   Consider a family of hash functions $H\colon \keys H \times \dom H \to
 737   \{0, 1\}^L$.  Define
 738   \[ \InSec{xuh}(H) =
 739      \max_{x \ne y, \delta}
 740        \Pr[K \getsr \keys H : H_K(x) \xor H_K(y) = \delta]. \]%
 741   If $\InSec{xuh}(H) < \epsilon$ then we say that $H$ is
 742   \emph{$\epsilon$-almost XOR-universal}, or \emph{AXU}.  Setting $\delta =
 743   0$ shows that
 744   \begin{eqnarray*}[rl]
 745      \InSec{xuh}(H)
 746      & \ge \max_{x \ne y} \Pr[K \getsr \keys H : H_K(x) \xor H_K(y) = 0] \\
 747      & = \InSec{uh}(H).
 748   \end{eqnarray*}
 749
 750   We can take a dynamic view of almost XOR-universality using the same
 751   technique as for almost universal functions.
 752
 753   If $H$ is $2^{-L}$-almost XOR universal then we say that $H$ is
 754   \emph{XOR-universal}.  This is the best achievable.
 755 \end{slide}
 756
 757 \begin{proof}
 758   Fix some pair $x \ne y$.  Choose $\delta \inr \{0, 1\}^L$.  Then for any
 759   \emph{fixed} $K \in \keys H$, $H_K(x)$ and $H_K(y)$ are fixed, so $H_K(x)
 760   \xor H_K(y)$ is fixed, and $\Pr[H_K(x) \xor H_K(y) = \delta] = 2^{-L}$.
 761   Using the same trick as for proving the dynamic view:
 762   \begin{eqnarray*}[Ll]
 763     \Pr[\delta \getsr \{0, 1\}^L; K \getsr \keys H :
 764         H_K(x) \xor H_K(y) = \delta] \\
 765     & = \sum_{K \in \keys H} \frac{1}{|{\keys H}|}
 766           \Pr[\delta \getsr \{0, 1\}^L; K \getsr \keys H :
 767               H_K(x) \xor H_K(y) = \delta] \\
 768     & = \sum_{K \in \keys H} \frac{1}{|{\keys H}|} 2^{-L} = 2^{-L}.
 769   \end{eqnarray*}
 770   Since $H$ is arbitrary, this proves the lower bound on the almost
 771   XOR-universality.
 772 \end{proof}
 773
 774 \begin{slide}
 775   \topic{composition}
 776   \head{Almost XOR-universality, \seq: composition}
 777
 778   We extend our result about composition of almost-universal functions.
 779   Suppose that $G$ is $\epsilon$-almost XOR universal, and $G'$ is
 780   $\epsilon'$-almost universal (it doesn't have to be almost XOR-universal),
 781   and $\dom G = \ran G'$.
 782
 783   Then the composition $H = G \compose G'$ is $(\epsilon + \epsilon')$-almost
 784   XOR-universal.  The proof is simple, and very similar to the
 785   almost-universal case.
 786 \end{slide}
 787
 788 \begin{slide}
 789   \head{Almost XOR-universality, \seq: examples of AXU hash functions}
 790
 791   Let $\F = \gf{2^l}$ be a finite field.  Given a message $m = (m_0, \ldots,
 792   m_{n-1} \in \F^n$, we can hash it in several ways.
 793   \begin{itemize}
 794   \item Inner-product: choose $k = (k_0, \ldots, k_{n-1}) \in \F^n$.
 795     \[ \Xid{H}{ip}_{k}(m) = m \cdot k = \sum_{0\le i<n} k_i m_i. \]
 796     This hash is XOR-universal: $\InSec{axu}(\Xid{H}{ip}) = 1/2^l$.
 797   \item Polynomial evaluation: choose $x \in \F$.
 798     \[ \Xid{H}{pe}_{x}(m) = \sum_{0\le i<n} m_i x^{i+1}. \]
 799     Here we have only $\InSec{axu}(\Xid{H}{pe}) = n/2^l$.
 800   \end{itemize}
 801 \end{slide}
 802
 803 \begin{proof}
 804   \begin{itemize}
 805   \item Suppose we have messages $m \ne m'$ and $\delta \in \F$.  Then $m
 806     \cdot k + m' \cdot k = \delta$.  We must have some $m_j \ne m'_j$.  It
 807     follows that
 808     \[ k_j = \biggl( \delta + \sum_{\begin{script}
 809                0\le i<n \\
 810                i \ne j
 811              \end{script}
 812              k_i (m_i + m'_i)
 813              \biggr) \biggm/ (m'_i - m_j) \]
 814     But we choose $k$ uniformly at random, so the probability that this holds
 815     is $2^{-l}$.
 816   \item Again, we have distinct messages and some $\delta \in \F$.  We have a
 817     polynomial equation
 818     \[ \delta + \sum_{0\le i <n} (m_i - m'_i) x^{i+1} = 0 \]
 819     Since there is some $m_j \ne m'_j$, this has degree at most $n$, so
 820     there are at most $n$ distinct roots $x \in \F$.  But we choose $x$
 821     uniformly at random, so $x$ is one of these roots with probability at
 822     most $n/2^l$.
 823   \end{itemize}
 824 \end{proof}
 825
 826 \begin{exercise}
 827   Suppose $H$ is an $\epsilon$-AXU hash function with $l$-bit output.  Let
 828   $G_h(x)$ be the first $t$ bits of $H_h(x)$.  Show that $G$ is
 829   $2^{l-t}\epsilon$-AXU.
 830 \answer
 831   Let $m \ne m'$ be distinct messages and let $\delta \in \{0, 1\}^t$ be some
 832   XOR difference.  Then
 833   \begin{eqnarray*}
 834     \Pr[h \gets \keys{G} : G_h(m) \xor G_h(m') = \delta]
 835     &= \sum_{\delta' \in \{0, 1\}^{l-t}}
 836          \Pr[h \gets \keys{H} : H_h(m) \xor H_h(m') = \delta \cat \delta']
 837     &\le \sum_{\delta' \in \{0, 1\}^{l-t}} \InSec{axu}(H)
 838     &= 2^{l-t} \InSec{axu}(H).
 839   \end{eqnarray*}
 840 \end{exercise}
 841
 842 \begin{slide}
 843   \topic{a better MAC}
 844   \head{Almost XOR-universality, \seq: a better MAC}
 845
 846   The security result for the UH-based MAC contains a factor $q_T$, which
 847   it'd be nice to remove.  Our new scheme uses an AXU hash $H\colon \keys H
 848   \times \{0, 1\}^* \to \{0, 1\}^l$ and a PRF $F\colon \keys F \times \{0,
 849   1\}^l \to \{0, 1\}^L$.
 850
 851   We first present a stateful version $\Xid{\mathcal{M}}{XUH}^{H, F}$.
 852   Choose $(K, K') \inr \keys H \times \keys F$, and initialize a counter $i
 853   \gets 0$.  The tagging and verification algorithms are then:
 854   \begin{program}
 855     Algorithm $\Xid{T}{XUH}^{H, F}_{K, K'}(m)$: \+ \\
 856       $\tau \gets (i, H_K(m) \xor F_{K'}(i))$; \\
 857       $i \gets i + 1$; \\
 858       \RETURN $\tau$;
 859   \next
 860     Algorithm $\Xid{V}{XUH}^{H, F}_{K, K'}(m, \tau)$: \+ \\
 861       $(s, \sigma) \gets \tau$; \\
 862       \IF $\sigma = H_K(m) \xor F_{K'}(s)$ \THEN \RETURN $1$; \\
 863       \ELSE \RETURN $0$;
 864   \end{program}
 865   Note that verification is stateless.
 866 \end{slide}
 867
 868 \begin{slide}
 869   \head{Almost XOR-universality, \seq: security of AXU-based MACs}
 870
 871   For the stateful scheme presented earlier, provided $q_T \le 2^l$, we have
 872   \begin{eqnarray*}[Ll]
 873      \InSec{suf-cma}(\Xid{\mathcal{M}}{XUH}^{H, F}; t, q_T, q_V) \\
 874      & \le (q_V + 1)(\InSec{prf}(F; t, q_T + 1) + \InSec{xuh}(H) + 2^{-L}).
 875   \end{eqnarray*}
 876 \end{slide}
 877
 878 \begin{slide}
 879   \head{Almost XOR-universality, \seq: randomized AXU-based MAC}
 880
 881   We can avoid statefulness by using randomization.  This new scheme is
 882   $\Xid{\mathcal{M}}{XUH$\$$}^{H, F} = (\Xid{T}{XUH$\$$}^{H, F},
 883   \Xid{V}{XUH$\$$}^{H, F})$:
 884   \begin{program}
 885     Algorithm $\Xid{T}{XUH$\$$}^{H, F}_{K, K'}(m)$: \+ \\
 886       $s \getsr \{0, 1\}^l$; \\
 887       $\tau \gets (s, H_K(m) \xor F_{K'}(s))$; \\
 888       \RETURN $\tau$;
 889   \next
 890     Algorithm $\Xid{V}{XUH$\$$}^{H, F}_{K, K'}(m, \tau)$: \+ \\
 891       $(s, \sigma) \gets \tau$; \\
 892       \IF $\sigma = H_K(m) \xor F_{K'}(s)$ \THEN \RETURN $1$; \\
 893       \ELSE \RETURN $0$;
 894   \end{program}
 895   \begin{eqnarray*}[Ll]
 896      \InSec{suf-cma}(\Xid{\mathcal{M}}{XUH$\$$}^{H, F}; t, q_T, q_V) \\
 897      & \le (q_V + 1)
 898          \Bigl(\InSec{prf}(F; t, q_T + 1) + \InSec{xuh}(H) + 2^{-L} +
 899                \frac{q_T(q_T - 1)}{2^{l+1}}\Bigr).
 900   \end{eqnarray*}
 901 \end{slide}
 902
 903 \begin{proof}
 904   We prove the result with $q_V = 0$ and $q_T = q$, and appeal to the result
 905   on verification oracles.  Let $m_i$ be the message specified in the $i$-th
 906   tagging query ($0 \le i < q$), and let $(s_i, \sigma_i) = (s_i, H_K(m) \xor
 907   F_{K'}(s_i))$ be the tag returned.  We call the $s_i$ the \emph{nonce}.
 908
 909   We prove the result for the stateless scheme.  The bound $q \le 2^l$
 910   ensures that the nonces are all distinct (we have $s_i = i$).  The security
 911   bound for the randomized version merely has as an extra term upper bound
 912   for the probability of a nonce collision.
 913
 914   Let $A$ be an adversary attacking the MAC in time $t$, and using $q$
 915   tagging queries.  Then we present the following pair of adversaries:
 916   \begin{program}
 917     Distinguisher $D^{F(\cdot)}$: \+ \\
 918       $i \gets 0$; \\
 919       $\Xid{T}{list} \gets \emptyset$; \\
 920       $K \getsr \keys H$; \\
 921       $(m, \tau) \gets A^{\id{tag}}$; \\
 922       $(s, \sigma) \gets \tau$; \\
 923       \IF $(m, \tau) \notin \Xid{T}{list} \land
 924            \sigma = H_K(m) \xor F(s)$
 925       \THEN \RETURN $1$; \\
 926       \ELSE \RETURN $0$; \- \\[\smallskipamount]
 927     Oracle $\id{tag}(m)$: \+ \\
 928       $\tau \gets (i, H_K(m) \xor F(i))$; \\
 929       $\Xid{T}{list} \gets \Xid{T}{list} \cup \{(m, \tau)\}$; \\
 930       $i \gets i + 1$;
 931       \RETURN $\tau$;
 932   \next
 933     Collision-finder $C$: \+ \\
 934       $i \gets 0$; \\
 935       $(m, \tau) \gets A^{\id{tag}}$; \\
 936       $(s, \sigma) \gets \tau$; \\
 937       \IF $s \ge i \lor m = m_s$ \THEN \ABORT; \\
 938       \RETURN $(m, m_s, \sigma \xor \sigma_s)$; \- \\[\smallskipamount]
 939     Oracle $\id{tag}(m)$: \+ \\
 940       $m_i \gets m$; \\
 941       $\sigma_i \getsr \{0, 1\}^L$; \\
 942       $\tau \gets (i, \sigma_i)$; \\
 943       $i \gets i + 1$; \\
 944       \RETURN $\tau$;
 945   \end{program}
 946
 947   We need to find a lower bound on the advantage of $D$.  If $F$ is chosen
 948   from the PRF then $D$ returns $1$ precisely when $A$ finds a valid
 949   forgery.  We now examine the setting in which $F$ is a random function.
 950
 951   Let $S$ be the event that $A$ succeeds in returning a valid forgery when
 952   $F$ is random, and let $N$ be the event that the nonce $s$ returned by $A$
 953   is not equal to any nonce $s_i$ returned by the tagging oracle.  Suppose
 954   $N$ occurs: then the random function $F$ has never been queried before at
 955   $F$, and $\Pr[F(s) = \sigma \xor H_K(m)]$ is precisely $2^{-L}$.
 956
 957   So suppose instead that $N$ doesn't occur.  Then, since the $s_i$ are
 958   distinct, there is a unique $i$ such that $s = s_i$.  For $A$ to win, we
 959   must have $m \ne m_i$ (for if $m = m_i$ then the only valid tag is
 960   $H_K(m_i) \xor F(s_i) = \sigma_i$, which was already returned by the
 961   tagging oracle).  If $A$'s forgery is successful then
 962   \[ \sigma = H_K(m) \xor F(s)
 963      \qquad \text{and} \qquad
 964      \sigma_i = H_K(m_i) \xor F(s_i) \]%
 965   but $s = s_i$, whence
 966   \[ H_K(m_i) \xor H_K(m) = \sigma \xor \sigma_i. \]
 967   Since the $s_i$ are distinct and $F$ is a random function, the $\sigma_i$
 968   are independent uniformly-distributed random strings from $\{0, 1\}^L$.
 969   Hence the collision-finder $C$ succeeds with probability $\Pr[S \land
 970   \lnot N] \le \InSec{xuh}(H)$.
 971
 972   Wrapping up, we have
 973   \begin{eqnarray*}[rl]
 974     \Adv{prf}{F}(D)
 975     & \ge \Succ{suf-cma}{\Xid{\mathcal{M}}{XUH}^{H, F}}(A) -
 976       (\Pr[S \mid N] \Pr[N] + \Pr[S \mid \lnot N] \Pr[\lnot N]) \\
 977     & \ge \Succ{suf-cma}{\Xid{\mathcal{M}}{XUH}^{H, F}}(A) -
 978       (2^{-L} + \InSec{xuh}(H)).
 979   \end{eqnarray*}
 980   Maximizing and rearranging yields the required result.
 981 \end{proof}
 982
 983 \begin{remark*}
 984   Note that our bound has a $2^{-L}$ term in it that's missing from
 985   \cite{Goldwasser:1999:LNC}.  We believe that their proof is wrong in its
 986   handling of the XOR-collision probability.
 987 \end{remark*}
 988
 989 \endinput
 990
 991 %%% Local Variables:
 992 %%% mode: latex
 993 %%% TeX-master: "ips"
 994 %%% End: