From 2e24ecf52da0f6bd5d5873037c1b535edf32045f Mon Sep 17 00:00:00 2001
From: Mark Wooding <mdw@ncipher.com>
Date: Wed, 1 Nov 2006 14:30:42 +0000
Subject: [PATCH] auth-mac: Rewrite the stuff about universal hashing.

Give examples of universal hash functions.  Recharacterize PRF(UH) as a
PRF, and therefore a MAC.  Describe the old-UMAC PRF(UH||nonce) as an
exercise.
---
 auth-mac.tex | 205 ++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 106 insertions(+), 99 deletions(-)

diff --git a/auth-mac.tex b/auth-mac.tex
index 67489f0..31f2084 100644
--- a/auth-mac.tex
+++ b/auth-mac.tex
@@ -105,11 +105,13 @@
   \resetseq
   \head{PRFs are MACs, \seq}
   
-  If $F_K\colon \{0, 1\}^* \to \{0, 1\}^L$ is a $(t, q, \epsilon)$-secure
-  PRF, then it's also a $(t', q_T, q_V, \epsilon')$-secure MAC, with $q = q_T
-  + q_V + 1$, $t = t' + O(q)$, and $\epsilon' \le \epsilon + (q_V + 1)
-  2^{-L}$.  The constant hidden by the $O(\cdot)$ is small and depends on the
-  model of computation.
+  If $F_K\colon \{0, 1\}^* \to \{0, 1\}^L$ is a PRF then it's also a MAC.
+  Quantitatively:
+  \[ \InSec{suf-cma}(F; t, q_T, q_V) \le
+       \InSec{prf}(F; t + O(q), q_T + q_V) +
+       \frac(q_V + 1}{2^L}. \]
+  The constant hidden by the $O(\cdot)$ is small and depends on the model of
+  computation.
   
   Suppose $A$ can break $F$ used as a MAC in time $t$ and with $q_T$ and
   $q_V$ queries to its tagging and verification oracles respectively.
@@ -591,6 +593,9 @@
   If $\InSec{uh}(H) \le \epsilon$ then we say that $H$ is
   \emph{$\epsilon$-almost universal}.  Note that the concept of
   almost-universality is not quantified by running times.
+
+  Such families definitely exist: we don't need to make intractability
+  assumptions.  We'll see some examples later.
   
   If $H$ is $1/|{\ran H}|$-almost universal, then we say that $H$ is
   \emph{universal}.  Sometimes it's said that this is the best possible
@@ -673,107 +678,55 @@
 \end{slide}
 
 \begin{slide}
-  \topic{the collision game}
-  \head{Universal hashing, \seq: the collision game}
-  
-  Suppose that, instead of merely a pair $(x, y)$, our adversary was allowed
-  to return a \emph{set} $Y$ of $q$ elements, and measure the probability
-  that $H_K(x) = H_K(y)$ for some $x \ne y$ with $x, y \in Y$, and for $K
-  \inr \keys H$.
-  
-  Let $\InSec{uh-set}(H; q)$ be maximum probability achievable for sets $Y$
-  with $|Y| \le q$.  Then
-  \[ \InSec{uh-set}(H; q) \le \frac{q(q - 1)}{2} \cdot \InSec{uh}(H) .\]
+  \topic{domain and range extension of a PRF}
+  \head{Universal hashing, \seq: PRF domain extension}
+
+  Suppose that $F\colon \keys F \times \{0, 1\}^L \to \{0, 1\}^l$ is a PRF.
+  We'd like to have a PRF for a bigger domain $\{0, 1\}^n$.
+
+  If $H\colon \{0, 1\}^k \times \{0, 1\}^n \to \{0, 1\}^L$ is an
+  almost-universal hash function.  Then we can define $F'$ by
+  \[ F'_{K, h}(x) = F_K(H_h(x)). \]
+  Now we can prove that
+  \[ \InSec{prf}(F'; t, q) \le
+       \InSec{prf}(F; t, q) +
+       \frac{q(q - 1)}{2} \InSec{ah}(H). \]
+  Immediately this tells us that $F'$ is a MAC for $n$-bit messages.
 \end{slide}
 
 \begin{proof}
-  This is rather tedious.  We use the dynamic view.  Suppose $A$ returns $(x,
-  Y)$ with $|Y| = q$, and succeeds with probability $\epsilon$.  Consider
+  Suppose $A$ attacks $F'$.  Consider the distinguisher $B$ which attacks
+  $F$:
   \begin{program}
-    Adversary $A'$: \+ \\
-      $(x, Y) \gets A$; \\
-      $y \getsr Y$; \\
-      \RETURN $(x, Y \setminus \{y\})$;
+    Distinguisher $B^{F(\cdot)}$: \+ \\
+      $h \getsr \{0, 1\}^k$; \\
+      \RETURN $A^{F(H_h(\cdot))}()$;
   \end{program}
-  The worst that can happen is that $A'$ accidentally removes the one
-  colliding element from $Y$.  This occurs with probability $2/q$.  So
-  \[ \Succ{uh-set}{H}(A') \ge \frac{q - 2}{q} \Succ{uh-set}{H}(A). \]
-  Rearranging and maximizing gives
-  \[ \InSec{uh-set}(H; q) \le
-     \frac{q}{q - 2} \cdot \InSec{uh-set}(H; q - 1). \]
-  Note that $\InSec{uh-set}(H; 2) = \InSec{uh}(H)$ is our initial notion.  A
-  simple inductive argument completes the proof.
+  Suppose $F$ is an oracle for $F_K(\cdot)$.  Then $A$ plays its standard
+  attack game and all is well.  Conversely, suppose $F$ is a random
+  function.  Then the only way $A$ can distinguish its oracle $F(H_h(\cdot))$
+  from a random function is if it issues two queries $x_i \ne x_j$ such that
+  $H_h(x_i) = H_h(x_j)$.  But for each pair $(i, j)$ this happens with
+  probability at most $\InSec{ah}(H)$.  The stated bound follows.
 \end{proof}
 
-\begin{slide}
-  \topic{a MAC}
-  \head{Universal hashing, \seq: a MAC}
-  
-  Suppose that $H\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^l$ is an
-  almost universal hash function, and $F\colon \{0, 1\}^{k'} \times \{0,
-  1\}^l \to \{0, 1\}^L$ is a PRF\@.  Define a MAC $\Xid{\mathcal{M}}{UH}^{H,
-    F} = (\Xid{T}{UH}^{H, F}, \Xid{V}{UH}^{H, F})$ where:
-  \begin{eqnarray*}[rl]
-    \Xid{T}{UH}^{H, F}_{K, K'}(m) &= F_{K'}(H_K(m)) \\
-    \Xid{V}{UH}^{H, F}_{K, K'}(m, \tau) &= \begin{cases}
-      1 & if $\tau = F_{K'}(H_K(m))$ \\
-      0 & otherwise
-    \end{cases}.
-  \end{eqnarray*}
-  We have
-  \begin{eqnarray*}[Ll]
-     \InSec{suf-cma}(\Xid{\mathcal{M}}{UH}^{H, F}; t, q_T, q_V) \\
-     & \le
-     (q_V + 1) \biggl(\InSec{prf}(F; t, q_T + 1) + \frac{1}{2^L} +
-                      \frac{q_T(q_T - 1)}{2} \cdot \InSec{uh}(H)\biggr).
-  \end{eqnarray*}
-\end{slide}
-
-\begin{proof}
-  We shall prove the result for $q_V = 0$ and $q_T = q$, and appeal to the
-  earlier result on verification oracles.
-  
-  Suppose $A$ attacks the scheme $\Xid{\mathcal{M}}{UH}^{H, F}$ in time $t$,
-  issuing $q$ tagging queries.  Consider a distinguisher $D$, constructed
-  from a forger $A$:
-  \begin{program}
-    Distinguisher $D^{F(\cdot)}$: \+ \\
-      $K \getsr \{0, 1\}^k$; \\
-      $\Xid{T}{list} \gets \emptyset$; \\
-      $(m, \tau) \gets A^{\id{tag}(K, \cdot)}$; \\
-      \IF $m \notin \Xid{T}{list} \land \tau = F(H_K(m))$
-      \THEN \RETURN $1$; \\
-      \ELSE \RETURN $0$; \- \\[\smallskipamount]
-    Oracle $\id{tag}(K, m)$: \+ \\
-      $\Xid{T}{list} \gets \Xid{T}{list} \cup \{m\}$; \\
-      \RETURN $F(H_K(m))$; \- \\[\smallskipamount]
-  \end{program}
-  Note that $A$ isn't provided with a verification oracle: that's because we
-  didn't allow it any verification queries.
-
-  We can see immediately that
-  \[ \Pr[K \getsr \{0, 1\}^{k'} : D^{F_K(\cdot)} = 1] =
-     \Succ{suf-cma}{\Xid{\mathcal{M}}{UH}^{H, F}}(A). \]%
-  
-  We must now find an upper bound for $\Pr[F \getsr \Func{l}{L} :
-  D^{F(\cdot)}]$.  Suppose that the adversary returns the pair $(m^*,
-  \tau^*)$, and that its tagging oracle queries and their answers are $(m_i,
-  \tau_i)$ for $0 \le i < q$.  Consider the event $C$ that $H_K(m) =
-  H_K(m')$ for some $m \ne m'$, with $m, m' \in \{m^*\} \cup \{\,m_i \mid 0
-  \le i < q\,\}$.
-  
-  If $C$ doesn't occur, then $F$ has not been queried before at $H_K(m)$, but
-  there's a $2^{-L}$ probability that the adversary guesses right anyway.  If
-  $C$ does occur, then we just assume that the adversary wins, even though it
-  might not have guessed the right tag.
-  
-  By our result on the collision game, $\Pr[C] \le q \cdot \InSec{uh}(H)$.
-  Then
-  \[ \Succ{prf}{F}(D) \ge
-     \Succ{suf-cma}{\Xid{\mathcal{M}}{UH}^{H, F}}(A) -
-       \frac{1}{2^L} - \frac{q(q - 1)}{2} \cdot \InSec{uh}(H). \]%
-  The result follows.
-\end{proof}
+\begin{exercise}
+  Show how to extend the \emph{range} of a PRF.  Specifically, suppose you're
+  given a PRF $F\colon \keys{F} \times \{0, 1\}^L \to \{0, 1\}^t$, but want
+  an $l$-bit output.  Show how to achieve this.
+\answer
+  There are two obvious techniques.
+  \begin{enumerate}
+  \item Suppose we have a PRG $G\colon \{0, 1\}^t \to \{0, 1\}^l$.  Then we
+    can easily use $G \compose F$; it's easy to see how this is secure.
+    Indeed, we can use $F$ itself as a PRG, by defining $G(x) = F_x(0) \cat
+    F_x(1) \cat \cdots$.
+  \item Let $m = \lceil \log_2 (l/t) \rceil$.  Use the construction from the
+    slide to build a PRF $F'$ with domain $\{0, 1\}^{L+m}$.  Then we define
+    $F''_K(x) = F'_K(x \cat 0) \cat F'_K(x \cat 1) \cat \cdots \cat F'_K(x
+    \cat m - 1)$.
+  \end{enumerate}
+\end{exercise}
 
 \begin{slide}
   \topic{almost XOR-universality}
@@ -833,6 +786,60 @@
 \end{slide}
 
 \begin{slide}
+  \head{Almost XOR-universality, \seq: examples of AXU hash functions}
+
+  Let $\F = \gf{2^l}$ be a finite field.  Given a message $m = (m_0, \ldots,
+  m_{n-1} \in \F^n$, we can hash it in several ways.
+  \begin{itemize}
+  \item Inner-product: choose $k = (k_0, \ldots, k_{n-1}) \in \F^n$.
+    \[ \Xid{H}{ip}_{k}(m) = m \cdot k = \sum_{0\le i<n} k_i m_i. \]
+    This hash is XOR-universal: $\InSec{axu}(\Xid{H}{ip}) = 1/2^l$.
+  \item Polynomial evaluation: choose $x \in \F$.
+    \[ \Xid{H}{pe}_{x}(m) = \sum_{0\le i<n} m_i x^{i+1}. \]
+    Here we have only $\InSec{axu}(\Xid{H}{pe}) = n/2^l$.
+  \end{itemize}
+\end{slide}
+
+\begin{proof}
+  \begin{itemize}
+  \item Suppose we have messages $m \ne m'$ and $\delta \in \F$.  Then $m
+    \cdot k + m' \cdot k = \delta$.  We must have some $m_j \ne m'_j$.  It
+    follows that
+    \[ k_j = \biggl( \delta + \sum_{\begin{script}
+               0\le i<n \\
+               i \ne j
+             \end{script}
+             k_i (m_i + m'_i)
+             \biggr) \biggm/ (m'_i - m_j) \]
+    But we choose $k$ uniformly at random, so the probability that this holds
+    is $2^{-l}$.
+  \item Again, we have distinct messages and some $\delta \in \F$.  We have a
+    polynomial equation
+    \[ \delta + \sum_{0\le i <n} (m_i - m'_i) x^{i+1} = 0 \]
+    Since there is some $m_j \ne m'_j$, this has degree at most $n$, so
+    there are at most $n$ distinct roots $x \in \F$.  But we choose $x$
+    uniformly at random, so $x$ is one of these roots with probability at
+    most $n/2^l$.
+  \end{itemize}
+\end{proof}
+
+\begin{exercise}
+  Suppose $H$ is an $\epsilon$-AXU hash function with $l$-bit output.  Let
+  $G_h(x)$ be the first $t$ bits of $H_h(x)$.  Show that $G$ is
+  $2^{l-t}\epsilon$-AXU.
+\answer
+  Let $m \ne m'$ be distinct messages and let $\delta \in \{0, 1\}^t$ be some
+  XOR difference.  Then
+  \begin{eqnarray*}
+    \Pr[h \gets \keys{G} : G_h(m) \xor G_h(m') = \delta]
+    &= \sum_{\delta' \in \{0, 1\}^{l-t}}
+         \Pr[h \gets \keys{H} : H_h(m) \xor H_h(m') = \delta \cat \delta']
+    &\le \sum_{\delta' \in \{0, 1\}^{l-t}} \InSec{axu}(H)
+    &= 2^{l-t} \InSec{axu}(H).
+  \end{eqnarray*}
+\end{exercise}
+
+\begin{slide}
   \topic{a better MAC}
   \head{Almost XOR-universality, \seq: a better MAC}
 
-- 
2.11.0