From 41761fdc7bb8f1ed87f5e1116d389158513ee280 Mon Sep 17 00:00:00 2001 From: mdw Date: Tue, 2 Oct 2001 19:06:59 +0000 Subject: [PATCH] initial version --- .cvsignore | 2 + Makefile | 38 ++ auth-mac.tex | 962 +++++++++++++++++++++++++++++++++++ auth-sig.tex | 609 ++++++++++++++++++++++ basics.tex | 1115 ++++++++++++++++++++++++++++++++++++++++ chan.tex | 13 + enc-ies.tex | 388 ++++++++++++++ enc-intro.tex | 103 ++++ enc-pub.tex | 1576 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ enc-symm.tex | 1188 +++++++++++++++++++++++++++++++++++++++++++ ips.sty | 265 ++++++++++ ips.tex | 63 +++ mkimages.pl | 34 ++ slides.tex | 10 + zk.tex | 13 + 15 files changed, 6379 insertions(+) create mode 100644 .cvsignore create mode 100644 Makefile create mode 100644 auth-mac.tex create mode 100644 auth-sig.tex create mode 100644 basics.tex create mode 100644 chan.tex create mode 100644 enc-ies.tex create mode 100644 enc-intro.tex create mode 100644 enc-pub.tex create mode 100644 enc-symm.tex create mode 100644 ips.sty create mode 100644 ips.tex create mode 100644 mkimages.pl create mode 100644 slides.tex create mode 100644 zk.tex diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 0000000..0382342 --- /dev/null +++ b/.cvsignore @@ -0,0 +1,2 @@ +slides +ips.ps slides.ps ips.pdf slides.pdf ips.dvi slides.dvi ips.log auth-mac.aux auth-sig.aux basics.aux enc-ies.aux enc-intro.aux enc-pub.aux enc-symm.aux ips.aux ips.ans ips.toc ips.bbl ips.blg diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8630a4d --- /dev/null +++ b/Makefile @@ -0,0 +1,38 @@ +# Makefile for IPS + +SOURCES = \ + ips.tex slides.tex ips.sty \ + basics.tex \ + auth-mac.tex auth-sig.tex \ + enc-intro.tex enc-pub.tex enc-symm.tex enc-ies.tex + +all: ips.dvi ips.ps ips.pdf slides.dvi slides.ps slides.pdf + +ips.dvi: $(SOURCES) + latex ips && bibtex ips && \ + latex ips && latex ips +ips.pdf: ips.dvi + pdflatex ips.tex && cp ips.pdf .. +ips.ps: ips.dvi + dvips -o ips.ps ips.dvi + +slides.dvi: $(SOURCES) + @if [ ! -d slides ]; then \ + mkdir slides; \ + for i in $(SOURCES); do ln -s ../$$i slides; done; \ + fi + cd slides && \ + latex slides && bibtex slides && \ + latex slides && latex slides && \ + cp slides.dvi .. +slides.pdf: slides.dvi + cd slides && pdflatex slides.tex && cp slides.pdf .. +slides.ps: slides.dvi + dvips -o slides.ps slides.dvi + +clean: + rm -f ips.dvi ips.ps ips.pdf slides.dvi slides.ps slides.pdf + rm -f *.log *.bbl *.blg *.toc *.ans + rm -rf slides ips + +.PHONY: clean diff --git a/auth-mac.tex b/auth-mac.tex new file mode 100644 index 0000000..40f466d --- /dev/null +++ b/auth-mac.tex @@ -0,0 +1,962 @@ +\xcalways\section{Message authentication codes}\x + +\xcalways\subsection{Definitions and security notions}\x + +\begin{slide} + \head{Definition of a MAC} + + A MAC is a pair of algorithms $\mathcal{M} = (T, V)$: + \begin{itemize} + \item The \emph{tagging} algorithm $T\colon \{0, 1\}^k \times \{0, 1\}^* + \to \{0, 1\}^L$ is a probabilistic algorithm which, given a key and a + string, returns a \emph{tag}. We write $\tau \in T_K(m)$. + \item The \emph{verification} algorithm $V\colon \{0, 1\}^k \times \{0, + 1\}^* \times \{0, 1\}^L \to \{0, 1\}$ is a deterministic algorithm which, + given a key, a message and a tag, returns $1$ if the tag is valid, or $0$ + otherwise; i.e., we require that $V_K(m, \tau) = 1 \iff \tau \in T_K(m)$. + \end{itemize} + The basic idea is that it's hard for an adversary to \emph{forge} a tag for + a message it's not seen before. +\end{slide} + +\begin{slide} + \topic{informal security notion} + \head{Strong MACs, 1: informal security notion} + + Our standard notion of security for MACs is \emph{strong unforgeability + against chosen message attack}, or SUF-CMA, for short + \cite{Abdalla:2001:DHIES, Bellare:2000:AER}. Let $A$ be an adversary which + is attempting to attack the MAC $\mathcal{M} = (T, V)$. + + We play a game with the adversary. We invent a key $K \inr \{0, 1\}^k$. + The adversary \emph{wins} if, after requesting tags for some messages of + its choice, and checking some guesses, it can return a pair $(m, \tau)$ + such that: + \begin{itemize} + \item the tag is correct, i.e., $V_K(m, \tau) = 1$; and + \item the tag is not one returned by the adversary's tagging oracle for + that message. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{strong MACs} + \head{Strong MACs, 2: the experiment} + + We perform the following experiment with the adversary. + \begin{program} + Experiment $\Expt{suf-cma}{\mathcal{M}}(A)$: \+ \\ + $K \getsr \{0, 1\}^k$; \\ + $\Xid{T}{list} \gets \emptyset$; \\ + $(m, \tau) \gets A^{\id{tag}(\cdot), V_K(\cdot, \cdot)}$; \\ + \IF $V_K(m, \tau) \land (m, \tau) \notin \Xid{T}{list}$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $\id{tag}(m)$: \+ \\ + $\tau \gets T_K(m)$; \\ + $\Xid{T}{list} \gets \Xid{T}{list} \cup \{(m, \tau)\}$; \\ + \RETURN $\tau$; + \end{program} +\end{slide} + +\begin{slide} + \head{Strong MACs, 3: wrapping up the notation} + + The \emph{success probability} of an adversary $A$ against the MAC + $\mathcal{M}$ in the sense of SUF-CMA is + \[ \Succ{suf-cma}{\mathcal{M}}(A) = + \Pr[\Expt{suf-cma}{\mathcal{M}}(A) = 1]. \]% + The \emph{insecurity} of a MAC $\mathcal{M}$ in the SUF-CMA sense is then + \[ \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) = + \max_A \Succ{suf-cma}{\mathcal{M}}(A) \]% + where the maximum is taken over all adversaries $A$ running in time $t$ and + making $q_T$ tagging and $q_V$ verification queries. + + If $\InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) \le \epsilon$ then we say + that $\mathcal{M}$ is a \emph{$(t, q_T, q_V, \epsilon)$-secure MAC in the + SUF-CMA sense}. +\end{slide} + +\begin{slide} + \topic{other notions} + \head{Other security notions for MACs} + + There are a number of weaker security notions in use: + \begin{itemize} + \item The definition of a \emph{weak MAC} restricts the adversary from + returning any message with which it queried its tagging oracle. The + strong MAC definition considers this OK, as long as the tag is different + from any returned by the oracle for that message. + \item Some definitions of MACs don't equip the adversary with a + verification oracle. Our definition considers these to be $(t, q_T, 0, + \epsilon)$-secure. + \item You can have a MAC with a bounded domain $\{0, 1\}^L$ rather than + $\{0, 1\}^*$ as shown previously. + \item Further quantification is possible, e.g., counting the total number + of bytes queried, or the maximum size of a tagging query. + \end{itemize} +\end{slide} + +%% IFF systems +\begin{exercise} + %% A nice open-ended question. Need to sort out IFF details (e.g., + %% aircraft numbering or whatever. + An \emph{identify friend-or-foe} (IFF) system works as follows. Each + `friendly' aircraft knows a common secret key. When a new aircraft is + detected, a \emph{challenge} $x$ is sent. If a correct \emph{response} is + returned, the new aircraft is presumed friendly; otherwise it is attacked. + Discuss the security requirements of IFF systems and construct a formal + model. Decide which primitive is best suited to the job and relate the + security notions. What sorts of resource constraints can be imposed on + adversaries attacking an IFF system? + \answer% + No prizes for guessing that a MAC is what's wanted, this being the MAC + section. +\end{exercise} + +\xcalways\subsection{Basic results}\x + +\begin{slide} + \topic{PRFs are MACs} + \head{PRFs are MACs, 1} + + If $F_K\colon \{0, 1\}^* \to \{0, 1\}^L$ is a $(t, q, \epsilon)$-secure + PRF, then it's also a $(t', q_T, q_V, \epsilon')$-secure MAC, with $q = q_T + + q_V + 1$, $t = t' + O(q)$, and $\epsilon \le \epsilon' + (q_V + 1) + 2^{-L}$. The constant hidden by the $O(\cdot)$ is small and depends on the + model of computation. + + Suppose $A$ can break $F$ used as a MAC in time $t$ and with $q_T$ and + $q_V$ queries to its tagging and verification oracles respectively. + + If we can construct an adversary which distinguishes $F_K$ from a random + function using $A$ as an essential component, then we can prove the + result. +\end{slide} + +\begin{slide} + \head{PRFs are MACs, 2: the distinguisher} + + \begin{program} + Distinguisher $D^{F(\cdot)}$: \+ \\ + $\Xid{T}{list} \gets \emptyset$; \\ + $(m, \tau) \gets A^{T_F(\cdot), V_F(\cdot, \cdot)}$; \\ + \IF $m \notin \Xid{T}{list} \land \tau = F(m)$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $T_F(m)$: \+ \\ + $\Xid{T}{list} \gets \Xid{T}{list} \cup \{m\}$; \\ + \RETURN $F(m)$; \- \\[\smallskipamount] + Oracle $V_F(m, \tau)$: \+ \\ + \IF $\tau = F(m)$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} +\end{slide} + +\begin{slide} + \head{PRFs are MACs, 3: wrapping up} + + The distinguisher simulates the tagging and verification oracles for the + MAC forger, using its supplied oracle. If the forger succeeds, then the + distinguisher returns 1; otherwise it returns zero. + + The probability that the distinguisher returns 1 given an instance $F_K$ of + the PRF is precisely $\Succ{suf-cma}{F}(A)$. + + The probability that it returns 0 given a random function depends on what + $A$ does when it's given a random function. But we know that the + probability of it successfully guessing the MAC for a message for which it + didn't query $T$ can be at most $(q_V + 1) 2^{-L}$. So + \[ \Adv{prf}{F}(D) \le \Succ{suf-cma}{F}(A) - (q_V + 1) 2^{-L}. \] + Let $q = q_T + q_V + 1$; then counting, rearranging, maximizing yields + \[ \InSec{suf-cma}(F; t, q_T, q_V) \le + \InSec{prf}(F; t + O(q), q) + (q_V + 1)2^{-L}. \]% +\end{slide} + +\begin{exercise} + \begin{parenum} + \item Suppose that $F\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^L$ is + a $(t, q, \epsilon)$-secure PRF. Let $T^{(\ell)}_K(x)$ be the leftmost + $\ell$~bits of $F_K(x)$. Demonstrate the security of $T^{(\ell)}(\cdot)$ + as a MAC. + \item Discuss the merits of truncating MAC tags in practical situations. + \end{parenum} + \answer% + \begin{parenum} + \item The follows exactly the same pattern as the `PRFs are MACs' proof in + the slides: $T^{(\ell)}$ is a $(t, q_T, q_V, \epsilon + (q_V + + 1)2^{-\ell})$-secure MAC, where $q_T + q_V = q$. + \item Obviously, truncating tags saves bandwidth. There is a trade-off + between tag size and security, as the $2^{-\ell}$ term shows. Note that + this term represents the probability of the adversary guessing the + correct tag when it's actually attacking a random function, and + therefore, when this occurs, the adversary has one `by accident'. + Including sequence numbers in packets ensures that replay of accidental + forgery (or honest messages) will be detected. Hence, for some + applications, setting $\ell = 32$ or even lower is of no particular harm. + Perhaps more significantly, if the PRF isn't actually as good as it ought + to be, and (say) leaks key material very slowly, then truncating its + output can actually improve security. + \end{parenum} +\end{exercise} + +\begin{exercise} + A traditional MAC construction is the \emph{CBC-MAC}: it works like this. + Suppose $F\colon \{0, 1\}^k \times \{0, 1\}^l \to \{0, 1\}^l$ is a PRF. + Split a message~$x$ into $l$-bit blocks $x_0, x_1, \ldots, x_{n-1}$ + (applying some sort of padding if you need to). Then we define the CBC-MAC + as $F^{(n)}_K(x)$, where + \[ F^{(1)}_K(x) = F_K(x); + \qquad F^{(i+i)}(x) = F_K(x_i \xor F^{(i)}_K(x)). \]% + In \cite{Bellare:1994:SCB}, Mihir Bellare, Joe Kilian and Phil Rogaway + introduced the world to the concrete-security approach and, almost + incidentally, proved that the CBC-MAC is a PRF (and therefore a MAC) for + any \emph{fixed sized} input. + + Show that the CBC-MAC is easily broken if you're allowed to choose messages + of different lengths. + \answer% + Request tags $\tau$ for the message $x = x_0, x_1, \ldots, x_{n-1}$ and + $\tau'$ for $x' = x'_0 \xor \tau, x'_1, \ldots, x'_{n'-1}$. Let $y = x_0, + x_1, \ldots, x_{n-1}, x'_0 , x'_1, \ldots, x'_{n'-1}$. Note that + $F^{(n)}_K(y) = \tau$, and $F^{(n+1)}_K(y) = F_K(x'_0 \xor F^{(n)}_K(x)) = + F^{(1)}_K(x')$. Hence, $F^{(n+n')}_K(y) = \tau'$, and we have a correct + forgery. +\end{exercise} + +\begin{slide} + \topic{verification oracles} + \head{Verification oracles} + + We can leave verification oracles out of our analysis. This simplifies + analysis, but produces slightly less satisfactory quantitative results. + + Suppose that $\mathcal{M} = (T, V)$ is a $(t, q_T, 0, \epsilon)$-secure + MAC. Then, for any $q_V$, + \[ \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) \le + (q_V + 1)\InSec{suf-cma}(\mathcal{M}; t, q_T, 0). \]% + This bound is \emph{tight}: it's not possible for a general result like + this to do better. +\end{slide} + +\begin{proof} + Consider an adversary $A$ which uses $q_V$ verification queries. We assume + the following properties of $A$'s behaviour: + \begin{itemize} + \item No verification query contains a message and a tag for that message + received from the tagging oracle. + \item If a verification query succeeds, the message is not given in a query + to the tagging oracle. + \item Once a verification query succeeds, all subsequent verification + queries also succeed and the adversary returns a correct forgery (e.g., + by simply repeating the succeessful query). + \end{itemize} + It's clear that any adversary can be transformed into one which has these + properties and succeeds with probability at least as high. + + Let $V$ be the event that at least one verification query succeeds, and let + $S$ be the event that $A$ succeeds. Then + \begin{eqnarray*}[rl] + \Succ{suf-cma}{\mathcal{M}}(A) + &= \Pr[S \mid V] \Pr[V] + \Pr[S \mid \lnot V] \Pr[\lnot V] \\ + &= \Pr[V] + \Pr[S \mid \lnot V] \Pr[\lnot V]. + \end{eqnarray*} + Now consider these two adversaries: + \begin{program} + Adversary $A'^{T(\cdot), V(\cdot, \cdot)}$: \+ \\ + $i \gets 0$; \\ + $(m, \tau) \gets A^{T(\cdot), \Xid{V}{sim}(\cdot, \cdot)}$; \\ + \RETURN $(m^*, \tau^*)$; \- \\[\smallskipamount] + Oracle $\Xid{V}{sim}(m, \tau)$: \+ \\ + $i \gets i + 1$; \\ + \IF $i < q_V$ \THEN \RETURN $V(m, \tau)$; \\ + $(m^*, \tau^*) \gets (m, \tau)$; \\ + \RETURN $1$; + \next + Adversary $Z^{T(\cdot), V(\cdot, \cdot)}$: \+ \\ + \\ + $(m, \tau) \gets A^{T(\cdot), \Xid{V}{zero}(\cdot, \cdot)}$; \\ + \RETURN $(m, \tau)$; \- \\[\smallskipamount] + Oracle $\Xid{V}{zero}(m, \tau)$: \+ \\ + \RETURN $0$; + \end{program} + The adversary $A'$ uses $q_V - 1$ verification queries. It ignores the + output of $A$, returning instead $A$'s $q_V$-th verification query. Thus, + by our assumptions on the behaviour of $A$, we have that $A'$ succeeds + precisely whenever one of $A$'s verification queries succeeds. Thus: + \[ \Pr[V] = \Succ{suf-cma}{\mathcal{M}}(A') + \le \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V - 1). \]% + Similarly, the adversary $Z$ succeeds with precisely the same probability + as $A$, given that all of its verification queries failed; i.e., + \[ \Pr[S \mid \lnot V] \Pr[\lnot V] = \Succ{suf-cma}{\mathcal{M}}(Z) + \le \InSec{suf-cma}(\mathcal{M}; t, q_T, 0). \]% + Because $A$ was chosen arbitrarily, we can maximize: + \begin{eqnarray*}[rl] + \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V) + & \le \InSec{suf-cma}(\mathcal{M}; t, q_T, q_V - 1) + + \InSec{suf-cma}(\mathcal{M}; t, q_T, 0) \\ + & \le (q_V + 1)\InSec{suf-cma}(\mathcal{M}; t, q_T, 0) + \end{eqnarray*} + as required. + + To show that the bound is tight, consider a random function $F$ used as a + MAC, with $L$-bit output. Then we claim that $\InSec{suf-cma}(F; t, q_T, + q_V) = (q_V + 1)/2^L$. To save typing, let $e_q = \InSec{suf-cma}(F; t, + q_T, q)$. We prove the claim by induction on $q$. Firstly, note that if + $q = 0$ then necessarily $e_q = 1/2^L$. Now suppose that $e_{q-1} = + q/2^L$. We're now allowed an extra query, so rather than returning the + result, we feed it to the verification oracle. If it answers `yes' then we + return it; otherwise we guess randomly from the remaining $2^L - q$ + possibilities. Now + \begin{eqnarray*}[rl] + e_q &= e_{q-1} + (1 - e_{q-1}) \frac{1}{2^L - q} \\ + &= \frac{q}{2^L} + \frac{2^L - q}{2^L} \cdot \frac{1}{2^L - q} \\ + &= \frac{q + 1}{2^L} + \end{eqnarray*} + as claimed. +\end{proof} + +\xcalways\subsection{The HMAC construction}\x + +\begin{slide} + \head{The HMAC construction \cite{Bellare:1996:KHF}, 1: motivation} + + It ought to be possible to construct a decent MAC using a hash function. + Many attempts have failed, however. For example, these constructions are + weak if used with Merkle-Damg\aa{}rd iterated hashes: + \begin{itemize} + \item Secret prefix: $T_K(m) = H(K \cat m)$. + \item Secret suffix: $T_K(m) = H(m \cat K)$. + \end{itemize} + + It would be nice to have a construction whose security was provably related + to some plausible property of the underlying hash function. +\end{slide} + +\begin{slide} + \head{The HMAC construction, 2: definition of NMAC} + + Let $H\colon \{0, 1\}^* \to \{0, 1\}^k$ be an iterated hash, constructed + from the compression function $F\colon \{0, 1\}^k \times \{0, 1\}^L \to + \{0, 1\}^k$. We define a keyed version of $H$. Let $K \in \{0, 1\}^k$; + then we compute $H_K(x)$ as follows: + \begin{enumerate} + \item Pad and split $x$ into the $L$-bit blocks $x_0$, $x_1$, \ldots, + $x_{n-1}$ as before. + \item Set $I_0 = K$. Let $I_{i+1} = F(I_i \cat x_i)$ for $0 \le i < n$. + \item The result $H_K(x) = I_n$. + \end{enumerate} + The NMAC (nested-MAC) construction requires two independent $k$-bit keys + $K_0$ and $K_1$. The construction itself is simply: + \[ \Xid{T}{NMAC}^H_{K_0, K_1}(x) = H_{K_0}(H_{K_1}(x)). \] + NMAC is deterministic, so verification consists of computing the tag and + comparing. +\end{slide} + +\begin{slide} + \head{The HMAC construction, 3: security of NMAC} + + Consider a function $F\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^k$. + We say that $F$ is \emph{$(t, q, \epsilon)$-weakly collision resistant} if, + for any adversary $A$ constrained to run in time $t$ and permitted $q$ + oracle queries, + \[ \Pr[K \getsr \{0, 1\}^k; + (x, y) \gets A^{F_K(\cdot)}] \le \epsilon : + x \ne y \land F_K(x) = F_K(y) \]% + + If $H_K$ is a $(t, q_T, q_V, \epsilon)$-secure MAC on $k$-bit messages, and + moreover $(t, q_T + q_V, \epsilon')$-weakly collision resistant, then + $\Xid{T}{NMAC}^H$ is a $(t, q_T, q_V, \epsilon + \epsilon')$-secure MAC. +\end{slide} + +\begin{slide} + \head{The HMAC construction, 4: NMAC security proof} + + Let $A$ be an adversary which forges a $\Xid{T}{NMAC}^H$ tag in time $t$, + using $q_T$ tagging queries and $q_V$ verification queries with probability + $\epsilon$. We construct an adversary $A'$ which forces a $H$ tag for a + $k$-bit in essentially the same time. + \begin{program} + Adversary $A'^{T(\cdot), V(\cdot, \cdot)}$ \+ \\ + $K \getsr \{0, 1\}^k$; \\ + $(m, \tau) \gets A^{T(H_K(\cdot)), V(H_K(\cdot), \cdot)}$; \\ + \RETURN $(H_K(m), \tau)$; + \end{program} + $A'$ might fail even though $A$ succeeded only if the message it returns, + $H_K(m)$, collides with one of its tagging queries. But since $H_K$ is + $(t, q_T + q_V, \epsilon')$-weakly collision resistant, this happens with + at most probability $\epsilon'$. Hence, $A'$ succeeds with probability at + least $\epsilon - \epsilon'$. Rearrangement yields the required result. +\end{slide} + +\begin{slide} + \head{The HMAC construction, 5: from NMAC to HMAC} + + Implementing NMAC involves using strange initialization vectors and + generally messing about with your hash function. HMAC is an attempt to + capture the provable security properties using a plain ol' hash function. + + Suppose $H$ is an iterated hash function with a $k$-bit output and $L$-bit + blocks (with $L \ge k$). We set $\id{ipad}$ to be the byte $\hex{36}$ + repeated $L/8$ times, and $\id{opad}$ to be the byte $\hex{5C}$ repeated + $L/8$ times. Select a key $K$ of $L$ bits: if your key is shorter, pad it + by appending zero bits; if it's longer, hash it with $H$ and then pad. + + The HMAC tagging function is then defined as + \[ \Xid{T}{HMAC}^H_K(m) = + H(K \xor \id{opad} \cat H(K \xor \id{ipad} \cat m)). \]% +\end{slide} + +\begin{slide} + \head{The HMAC construction, 6: comparison with NMAC} + + Comparing the two constructions, we see that + \[ \Xid{T}{HMAC}^H_K = + \Xid{T}{NMAC}^{H'}_{F(I \cat K \xor \id{opad}), + F(I \cat K \xor \id{ipad})}. \]% + Here, $I$ is $H$'s initialization vector, $F$ is the compression function; + $H'$ denotes a keyed hash function that is `like' $H$ but performs padding + as if there were an extra initial block of message data for each message. + + The proof of NMAC assumes that the two keys are random and independent. + This isn't the case in HMAC, but a little handwaving about pseudorandomness + of the compression function makes the problem go away. +\end{slide} + +\begin{exercise} + Suppose that $F\colon \{0, 1\}^k \times \{0, 1\}^{t+\ell} \to \{0, + 1\}^t$ is a PRF. Let $x \in \{0, 1\}^*$ be a message. We define the + function $H_K(x)$ as follows: + \begin{itemize} + \item Pad $x$ to a multiple of $\ell$ bits using some injective + mapping. Break the image of $x$ under this mapping into $\ell$-bit + blocks $x_0, x_1, \ldots, x_{n-1}$. + \item For $0 \le i \le n$, define $H^{(i)}_K(x)$ by + \[ H^{(0)}_K(x) = I; \qquad + H^{(i+1)}_K(x) = F_K(H^{(i)}(x) \cat x_i) \]% + where $I$ is some fixed $t$-bit string (e.g., $I = 0^t$). + \item Then set $H_K(x) = H^{(n)}_K(x)$. + \end{itemize} + We define two (deterministic) MACs $\mathcal{M}^i = (T^i, V^i)$ (for + $i \in \{0, 1\}$) using the $H_K$ construction. Verification in each + case consists of computing the tag and comparing to the one offered. + \begin{eqlines*} + T^0_K(x) = H_K(x); \qquad T^1_K(x) = H_K(x \cat K) + V^i_K(x, \tau) = \begin{cases} + 1 & if $\tau = T^i_K(x)$ \\ + 0 & otherwise + \end{cases} + \end{eqlines*} + Decide whether each of these constructions is secure. A full proof is + rather hard: an informal justification would be good. + \answer% + $\mathcal{M}^0$ is secure; $\mathcal{M}^1$ isn't, under the sole + assumption that $F$ is a PRF. + + To see that $\mathcal{M}^0$ is secure, it suffices to show that $T^0$ + is a PRF. This is actually quite involved. Given an adversary $A$ + attacking $T^1$ as a PRF, we construct an adversary $B$ attacking $F$, + which simply computes $H$ as required, using the oracle supplied. To + complete the proof, we need to show a bound on the + information-theoretic security of $H$ when instantiated using a random + function $F$. For the sake of simplicity, we allow the adversary $A$ + to query on \emph{padded} messages, rather than the raw unpadded + messages. We count the number $q'$ of individual message blocks. + + As the game with $A$ progresses, we can construct a directed + \emph{graph} of the query results so far. We start with a node + labelled $I$. When processing an $H$-query, each time we compute $t' + = F(t \cat x_i)$, we add a node $t'$, and an edge $x_i$ from $t$ to + $t'$. The `bad' event occurs whenever we add an edge to a previously + existing node. We must show, firstly, that the + adversary cannot distinguish $H$ from a random function unless the bad + event occurs; and, secondly, that the bad event doesn't occur very + often. + + The latter is easier: our standard collision bound shows that the bad + event occurs during the game with probability at most $q'(q' - 1)/2$. + + The former is trickier. This needs a lot more work to make it really + rigorous, but we show the idea. Assume that the bad event has not + occured. Consider a query $x_0, x_1, \ldots, x_{n-1}$. If it's the + same as an earlier query, then $A$ learns nothing (because it could + have remembered the answer from last time). If it's \emph{not} a + prefix of some previous query, then we must add a new edge to our + graph; then either the bad event occurs or we create a new node for + the result, and because $F$ is a random function, the answer is + uniformly random. Finally, we consider the case where the query is a + prefix of some earlier query, or queries. But these were computed at + random at the time. + + At the end of all of this, we see that + \[ \InSec{prf}(T^0; t, q) \le + \InSec{prf}(F; t, q') + \frac{q'(q' - 1)}{2} \]% + and hence + \[ \InSec{suf-cma}(\mathcal{M}^0; t, q) \le + \InSec{prf}(F; t, q') + \frac{q'(q' - 1)}{2} + \frac{1}{2^t}. \]% + + Now we turn our attention to $T^1$. It's clear that we can't simulate + $T^1$ very easily using an oracle for $F$, since we don't know $K$ + (and indeed there might not be a key $K$). The intuitive reason why + $T^1$ is insecure is that $F$ might have leak useful information if + its input matches its key. This doesn't affect the strength of $F$ as + a PRF because you have to know the key before you can exploit this + leakage; but $T^1$ already knows the key, and this can be exploited to + break the MAC. + + To show that this is insecure formally, let $F'$ be defined as + follows: + \[ F'_K(x) = \begin{cases} + K & if $x = p \cat K \cat q$ where + $|p| = t$ and $|q| = \ell - k$ \\ + F_K(x) & otherwise + \end{cases}. \]% + We choose a simple injective padding scheme: if $x$ is a message then + we form $x' = x \cat 1 \cat 0^n$, where $0 \le n < \ell$ and $|x'|$ is + a multiple of $\ell$. If $T^1$ is instantiated with this PRF then it + is insecure as a MAC: submitting a tagging query for the empty string + $\emptystring$ reveals the key $K$, which can be used to construct a + forgery. + + To complete the proof, we must show that $F'$ is a PRF. Let $A$ be an + adversary attacking $F'$. We consider a series of games; for each + $\G{i}$, let $S_i$ be the event that $A$ returns $1$ in that game. + Game~$\G0$ is the standard attack game with $A$ given an oracle for a + random function; game~$\G1$ is the same, except that $A$ is given an + oracle for $F'_K$ for some $K \inr \{0, 1\}^k$. Then + $\Adv{prf}{F'}(A) = \Pr[S_1] - \Pr[S_0]$. Let game~$\G2$ be the same + as $\G1$, except that if $A$ makes any query of the form $p \cat K + \cat q$ with $|p| = t$ and $|q| = \ell - k$ then the game halts + immediately, and let $F_2$ be the event that this occurs. By + Lemma~\ref{lem:shoup} (page~\pageref{lem:shoup}), then, $|{\Pr[S_2]} - + \Pr[S_1]| \le \Pr[F_2]$. Let game~$\G3$ be the same as $\G2$ except + that we give $A$ an oracle for $F_K$ rather than $F'_K$. Since $F$ + and $F'$ differ only on queries of the form $p \cat K \cat q$, we have + $\Pr[S_3] = \Pr[S_2]$. But $\Pr[S_3] - \Pr[S_0] = \Adv{prf}{F}(A) \le + \InSec{prf}(F; t, q)$. Hence, $\Adv{prf}{F'}(A) \le \InSec{prf}{F}(A) + - \Pr[F_2]$. + + Finally, we bound the probability of $F_2$. Fix an integer $n$. + Consider an adversary $B$ attacking $F$ which runs as follows. It + initially requests $F(0), F(1), \ldots, F(n - 1)$ from its oracle. It + then runs $A$, except that, for each oracle query $x$, it parses $x$ + as $p \cat K' \cat q$ with $|p| = t$, $|K'| = k$ and $|q| = \ell - k$; + then, if $F_{K'}(0) = F(0) \land F_{K'}(1) = F(1) \land \cdots \land + F_{K'}(n - 1) = F(n - 1)$, $B$ immediately returns $1$, claiming that + its oracle $F$ is the function $F_{K'}$; if this never occirs, $B$ + returns $0$. Clearly, if $B$ is given an instance $F_K$ of $F$ then + it succeeds with probability $\Pr[F_2]$; however, if $F$ is a random + function then $B$ returns $1$ with probability at most $q 2^{-nk}$. + Hence, $\Adv{prf}{F}(B) \le \Pr[F_2] - q 2^{-nk}$. $B$ issues $q + n$ + queries, and takes time $t + O(n q)$. Wrapping everything up, we get + \[ \InSec{prf}(F'; t, q) \le + 2\cdot\InSec{prf}(F; t + O(q n), q + n) + \frac{q}{2^{nk}}. \]% + This completes the proof of generic insecurty for $\mathcal{M}^1$. +\end{exercise} + +\xcalways\subsection{Universal hashing}\x + +\begin{slide} + \topic{almost-universal hash functions} + \head{Universal hashing, 1: definition} + + Consider a family of hash functions $H\colon \keys H \times \dom H \to + \ran H$. We define + \[ \InSec{uh}(H) = + \max_{x \ne y} \Pr[K \getsr \keys H : H_K(x) = H_K(y)]. \]% + If $\InSec{uh}(H) \le \epsilon$ then we say that $H$ is + \emph{$\epsilon$-almost universal}. Note that the concept of + almost-universality is not quantified by running times. + + If $H$ is $1/|{\ran H}|$-almost universal, then we say that $H$ is + \emph{universal}. Sometimes it's said that this is the best possible + insecurity: this isn't true. +\end{slide} + +\begin{proof}[Counterexample] + Here's a function $H\colon \{0, 1, 2\} \times \{0, 1, 2, 3\} \to \{0, 1\}$ + which is $\frac{1}{3}$-almost universal, though $|{\ran H}| = 2$: + \begin{quote} \item + \begin{tabular}[C]{c|cccc} + & 0 & 1 & 2 & 3 \\ \hlx{vhv} + 0 & 0 & 1 & 0 & 1 \\ + 1 & 0 & 0 & 1 & 1 \\ + 2 & 0 & 1 & 1 & 0 + \end{tabular} + \end{quote} +\end{proof} + +\begin{slide} + \topic{dynamic view} + \head{Universal hashing, 2: a dynamic view} + + Suppose that $H$ is $\epsilon$-almost universal. Consider this experiment: + \begin{program} + Experiment $\Expt{uh}{H}(A)$: \+ \\ + $(x, y) \gets A$; \\ + $K \getsr \keys H$; \\ + \IF $x \ne y \land H_K(x) = H_K(y)$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + The adversary may make random decisions before outputting its selection + $x$, $y$. We show that $\Pr[\Expt{uh}{H}(A) = 1] \le \InSec{uh}(H) = + \epsilon$. + + Let $\rho \in \{0, 1\}^*$ be $A$'s coin tosses: $A$ chooses $x$ and $y$ as + functions of $\rho$. For some \emph{fixed} $\rho$, + \[ \Pr[K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \le + \InSec{uh}(H). \]% +\end{slide} + +\begin{slide} + \head{Universal hashing, 3: the dynamic view (cont.)} + + Now we treat $\rho$ as a random variable, selected from some distribution + $P$ on the set $\{0, 1\}^*$. We see that + \begin{eqnarray*}[Ll] + \Pr[\rho \getsr P; K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \\ + &= \sum_{\rho \in \{0, 1\}^*} + P(\rho) \cdot \Pr[K \getsr \keys H : H_K(x(\rho)) = H_K(y(\rho))] \\ + &\le \sum_{\rho \in \{0, 1\}^*} P(\rho) \cdot \InSec{uh}(H) + = \InSec{uh}(H). + \end{eqnarray*} + Thus, no adversary can succeed in producing a collision in an + $\epsilon$-almost universal hash with probability better than $\epsilon$. + But obviously the adversary can ignore its coin tosses and simply return + the best colliding pair. Hence the two notions are completely equivalent. +\end{slide} + +\begin{slide} + \topic{composition} + \head{Universal hashing, 4: composition} + + Suppose that $G$ is $\epsilon$-almost universal, and $G'$ is + $\epsilon'$-almost universal, and $\dom G = \ran G'$. We define the + composition $G \compose G'$ to be the family $H\colon (\keys G \times + \keys G') \times \dom G' \to \ran G$ by $H_{k, k'}(m) = + G_k(G'_{k'}(m))$. + + Then $H$ is $(\epsilon + \epsilon')$-almost universal. To see this, fix $x + \ne y$, and choose $K = (k, k') \inr \keys G \times \keys G'$. Let $x' = + G'_{k'}(x)$ and $y' = G'_{k'}(y)$. Following our previous result, we see: + \begin{eqnarray*}[rl] + \Pr[H_K(x) = H_K(y)] + &= \Pr[G_k(G'_{k'}(x)) = G_k(G'_{k'}(y))] \\ + &= \Pr[G_k(x') = G_k(y')] \\ + &= \Pr[G_k(x') = G_k(y') \mid x' \ne y'] \Pr[G'_{k'}(x) \ne G'_{k'}(y)]\\ + &\le \epsilon + \epsilon'. + \end{eqnarray*} +\end{slide} + +\begin{slide} + \topic{the collision game} + \head{Universal hashing, 5: the collision game} + + Suppose that, instead of merely a pair $(x, y)$, our adversary was allowed + to return a \emph{set} $Y$ of $q$ elements, and measure the probability + that $H_K(x) = H_K(y)$ for some $x \ne y$ with $x, y \in Y$, and for $K + \inr \keys H$. + + Let $\InSec{uh-set}(H; q)$ be maximum probability acheivable for sets $Y$ + with $|Y| \le q$. Then + \[ \InSec{uh-set}(H; q) \le \frac{q(q - 1)}{2} \cdot \InSec{uh}(H) .\] +\end{slide} + +\begin{proof} + This is rather tedious. We use the dynamic view. Suppose $A$ returns $(x, + Y)$ with $|Y| = q$, and succeeds with probability $\epsilon$. Consider + \begin{program} + Adversary $A'$: \+ \\ + $(x, Y) \gets A$; \\ + $y \getsr Y$; \\ + \RETURN $(x, Y \setminus \{y\})$; + \end{program} + The worst that can happen is that $A'$ accidentally removes the one + colliding element from $Y$. This occurs with probability $2/q$. So + \[ \Succ{uh-set}{H}(A') \ge \frac{q - 2}{q} \Succ{uh-set}{H}(A). \] + Rearranging and maximzing gives + \[ \InSec{uh-set}(H; q) \le + \frac{q}{q - 2} \cdot \InSec{uh-set}(H; q - 1). \] + Note that $\InSec{uh-set}(H; 2) = \InSec{uh}(H)$ is our initial notion. A + simple inductive argument completes the proof. +\end{proof} + +\begin{slide} + \topic{a MAC} + \head{Universal hashing, 6: a MAC} + + Suppse that $H\colon \{0, 1\}^k \times \{0, 1\}^* \to \{0, 1\}^l$ is an + almost universal hash fucntion, and $F\colon \{0, 1\}^{k'} \times \{0, + 1\}^l \to \{0, 1\}^L$ is a PRF\@. Define a MAC $\Xid{\mathcal{M}}{UH}^{H, + F} = (\Xid{T}{UH}^{H, F}, \Xid{V}{UH}^{H, F})$ where: + \begin{eqnarray*}[rl] + \Xid{T}{UH}^{H, F}_{K, K'}(m) &= F_{K'}(H_K(m)) \\ + \Xid{V}{UH}^{H, F}_{K, K'}(m, \tau) &= \begin{cases} + 1 & if $\tau = F_{K'}(H_K(m))$ \\ + 0 & otherwise + \end{cases}. + \end{eqnarray*} + We have + \begin{eqnarray*}[Ll] + \InSec{suf-cma}(\Xid{\mathcal{M}}{UH}^{H, F}; t, q_T, q_V) \\ + & \le + (q_V + 1) \biggl(\InSec{prf}(F; t, q_T + 1) + \frac{1}{2^L} + + \frac{q_T(q_T - 1)}{2} \cdot \InSec{uh}(H)\biggr). + \end{eqnarray*} +\end{slide} + +\begin{proof} + We shall prove the result for $q_V = 0$ and $q_T = q$, and appeal to the + earlier result on verification oracles. + + Suppose $A$ attacks the scheme $\Xid{\mathcal{M}}{UH}^{H, F}$ in time $t$, + issuing $q$ tagging queries. Consider a distinguisher $D$, constructed + from a forger $A$: + \begin{program} + Distinguisher $D^{F(\cdot)}$: \+ \\ + $K \getsr \{0, 1\}^k$; \\ + $\Xid{T}{list} \gets \emptyset$; \\ + $(m, \tau) \gets A^{\id{tag}(K, \cdot)}$; \\ + \IF $m \notin \Xid{T}{list} \land \tau = F(H_K(m))$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $\id{tag}(K, m)$: \+ \\ + $\Xid{T}{list} \gets \Xid{T}{list} \cup \{m\}$; \\ + \RETURN $F(H_K(m))$; \- \\[\smallskipamount] + \end{program} + Note that $A$ isn't provided with a verification oracle: that's because we + didn't allow it any verification queries. + + We can see immediately that + \[ \Pr[K \getsr \{0, 1\}^{k'} : D^{F_K(\cdot)} = 1] = + \Succ{suf-cma}{\Xid{\mathcal{M}}{UH}^{H, F}}(A). \]% + + We must now find an upper bound for $\Pr[F \getsr \Func{l}{L} : + D^{F(\cdot)}]$. Suppose that the adversary returns the pair $(m^*, + \tau^*)$, and that its tagging oracle queries and their answers are $(m_i, + \tau_i)$ for $0 \le i < q$. Consider the event $C$ that $H_K(m) = + H_K(m')$ for some $m \ne m'$, with $m, m' \in \{m^*\} \cup \{\,m_i \mid 0 + \le i < q\,\}$. + + If $C$ doesn't occur, then $F$ has not been queried before at $H_K(m)$, but + there's a $2^{-L}$ probability that the adversary guesses right anyway. If + $C$ does occur, then we just assume that the adversary wins, even though it + might not have guessed the right tag. + + By our result on the collision game, $\Pr[C] \le q \cdot \InSec{uh}(H)$. + Then + \[ \Succ{prf}{F}(D) \ge + \Succ{suf-cma}{\Xid{\mathcal{M}}{UH}^{H, F}}(A) - + \frac{1}{2^L} - \frac{q(q - 1)}{2} \cdot \InSec{uh}(H). \]% + The result follows. +\end{proof} + +\begin{slide} + \topic{almost XOR-universality} + \head{Almost XOR-universality, 1: definition} + + Consider a family of hash functions $H\colon \keys H \times \dom H \to + \{0, 1\}^L$. Define + \[ \InSec{xuh}(H) = + \max_{x \ne y, \delta} + \Pr[K \getsr \keys H : H_K(x) \xor H_K(y) = \delta]. \]% + If $\InSec{xuh}(H) < \epsilon$ then we say that $H$ is + \emph{$\epsilon$-almost XOR-universal}, or \emph{AXU}. Setting $\delta = + 0$ shows that + \begin{eqnarray*}[rl] + \InSec{xuh}(H) + & \ge \max_{x \ne y} \Pr[K \getsr \keys H : H_K(x) \xor H_K(y) = 0] \\ + & = \InSec{uh}(H). + \end{eqnarray*} + + We can take a dynamic view of almost XOR-universality using the same + technique as for almost universal functions. + + If $H$ is $2^{-L}$-almost XOR universal then we say that $H$ is + \emph{XOR-universal}. This is the best acheivable. +\end{slide} + +\begin{proof} + Fix some pair $x \ne y$. Choose $\delta \inr \{0, 1\}^L$. Then for any + \emph{fixed} $K \in \keys H$, $H_K(x)$ and $H_K(y)$ are fixed, so $H_K(x) + \xor H_K(y)$ is fixed, and $\Pr[H_K(x) \xor H_K(y) = \delta] = 2^{-L}$. + Using the same trick as for proving the dynamic view: + \begin{eqnarray*}[Ll] + \Pr[\delta \getsr \{0, 1\}^L; K \getsr \keys H : + H_K(x) \xor H_K(y) = \delta] \\ + & = \sum_{K \in \keys H} \frac{1}{|{\keys H}|} + \Pr[\delta \getsr \{0, 1\}^L; K \getsr \keys H : + H_K(x) \xor H_K(y) = \delta] \\ + & = \sum_{K \in \keys H} \frac{1}{|{\keys H}|} 2^{-L} = 2^{-L}. + \end{eqnarray*} + Since $H$ is arbitrary, this proves the lower bound on the almost + XOR-universality. +\end{proof} + +\begin{slide} + \topic{composition} + \head{Almost XOR-universality, 2: composition} + + We extend our result about composition of almost-universal functions. + Suppose that $G$ is $\epsilon$-almost XOR universal, and $G'$ is + $\epsilon'$-almost universal (it doesn't have to be almost XOR-universal), + and $\dom G = \ran G'$. + + Then the composition $H = G \compose G'$ is $(\epsilon + \epsilon')$-almost + XOR-universal. The proof is simple, and very similar to the + almost-universal case. +\end{slide} + +\begin{slide} + \topic{a better MAC} + \head{Almost XOR-universality, 3: a better MAC} + + The security result for the UH-based MAC contains a factor $q_T$, which + it'd be nice to remove. Our new scheme uses an AXU hash $H\colon \keys H + \times \{0, 1\}^* \to \{0, 1\}^l$ and a PRF $F\colon \keys F \times \{0, + 1\}^l \to \{0, 1\}^L$. + + We first present a stateful version $\Xid{\mathcal{M}}{XUH}^{H, F}$. + Choose $(K, K') \inr \keys H \times \keys F$, and initialize a counter $i + \gets 0$. The tagging and verification algorithms are then: + \begin{program} + Algorithm $\Xid{T}{XUH}^{H, F}_{K, K'}(m)$: \+ \\ + $\tau \gets (i, H_K(m) \xor F_{K'}(i))$; \\ + $i \gets i + 1$; \\ + \RETURN $\tau$; + \next + Algorithm $\Xid{V}{XUH}^{H, F}_{K, K'}(m, \tau)$: \+ \\ + $(s, \sigma) \gets \tau$; \\ + \IF $\sigma = H_K(m) \xor F_{K'}(i)$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + Note that verification is stateless. +\end{slide} + +\begin{slide} + \head{Almost XOR-universality, 4: security of AXU-based MACs} + + For the stateful scheme presented earlier, provided $q_T \le 2^l$, we have + \begin{eqnarray*}[Ll] + \InSec{suf-cma}(\Xid{\mathcal{M}}{XUH}^{H, F}; t, q_T, q_V) \\ + & \le (q_V + 1)(\InSec{prf}(F; t, q_T + 1) + \InSec{xuh}(H) + 2^{-L}). + \end{eqnarray*} +\end{slide} + +\begin{slide} + \head{Almost XOR-universality, 5: randomized AXU-based MAC} + + We can avoid statefulness by using randomization. This new scheme is + $\Xid{\mathcal{M}}{XUH$\$$}^{H, F} = (\Xid{T}{XUH$\$$}^{H, F}, + \Xid{V}{XUH$\$$}^{H, F})$: + \begin{program} + Algorithm $\Xid{T}{XUH$\$$}^{H, F}_{K, K'}(m)$: \+ \\ + $s \getsr \{0, 1\}^l$; \\ + $\tau \gets (s, H_K(m) \xor F_{K'}(s))$; \\ + \RETURN $\tau$; + \next + Algorithm $\Xid{V}{XUH$\$$}^{H, F}_{K, K'}(m, \tau)$: \+ \\ + $(s, \sigma) \gets \tau$; \\ + \IF $\sigma = H_K(m) \xor F_{K'}(i)$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + \begin{eqnarray*}[Ll] + \InSec{suf-cma}(\Xid{\mathcal{M}}{XUH$\$$}^{H, F}; t, q_T, q_V) \\ + & \le (q_V + 1) + \Bigl(\InSec{prf}(F; t, q_T + 1) + \InSec{xuh}(H) + 2^{-L} + + \frac{q_T(q_T - 1)}{2^{l+1}}\Bigr). + \end{eqnarray*} +\end{slide} + +\begin{proof} + We prove the result with $q_V = 0$ and $q_T = q$, and appeal to the result + on verification oracles. Let $m_i$ be the message specified in the $i$-th + tagging query ($0 \le i < q$), and let $(s_i, \sigma_i) = (s_i, H_K(m) \xor + F_{K'}(s_i))$ be the tag returned. We call the $s_i$ the \emph{nonce}. + + We prove the result for the stateless scheme. The bound $q \le 2^l$ + ensures that the nonces are all distinct (we have $s_i = i$). The security + bound for the randomized version merely has as an extra term upper bound + for the probability of a nonce collision. + + Let $A$ be an adversary attacking the MAC in time $t$, and using $q$ + tagging queries. Then we present the following pair of adversaries: + \begin{program} + Distinguisher $D^{F(\cdot)}$: \+ \\ + $i \gets 0$; \\ + $\Xid{T}{list} \gets \emptyset$; \\ + $K \getsr \keys H$; \\ + $(m, \tau) \gets A^{\id{tag}}$; \\ + $(s, \sigma) \gets \tau$; \\ + \IF $(m, \tau) \notin \Xid{T}{list} \land + \sigma = H_K(m) \xor F(s)$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $\id{tag}(m)$: \+ \\ + $\tau \gets (i, H_K(m) \xor F(i))$; \\ + $\Xid{T}{list} \gets \Xid{T}{list} \cup \{(m, \tau)\}$; \\ + $i \gets i + 1$; + \RETURN $\tau$; + \next + Collision-finder $C$: \+ \\ + $i \gets 0$; \\ + $(m, \tau) \gets A^{\id{tag}}$; \\ + $(s, \sigma) \gets \tau$; \\ + \IF $s \ge i \lor m = m_s$ \THEN \ABORT; \\ + \RETURN $(m, m_s, \sigma \xor \sigma_s)$; \- \\[\smallskipamount] + Oracle $\id{tag}(m)$: \+ \\ + $m_i \gets m$; \\ + $\sigma_i \getsr \{0, 1\}^L$; \\ + $\tau \gets (i, \sigma_i)$; \\ + $i \gets i + 1$; \\ + \RETURN $\tau$; + \end{program} + + We need to find a lower bound on the advantage of $D$. If $F$ is chosen + from the PRF then $D$ returns $1$ precisely when $A$ finds a valid + forgery. We now examine the setting in which $F$ is a random function. + + Let $S$ be the event that $A$ succeeds in returning a valid forgery when + $F$ is random, and let $N$ be the event that the nonce $s$ returned by $A$ + is not equal to any nonce $s_i$ returned by the tagging oracle. Suppose + $N$ occurs: then the random function $F$ has never been queried before at + $F$, and $\Pr[F(s) = \sigma \xor H_K(m)]$ is precisely $2^{-L}$. + + So suppose instead that $N$ doesn't occur. Then, since the $s_i$ are + distinct, there is a unique $i$ such that $s = s_i$. For $A$ to win, we + must have $m \ne m_i$ (for if $m = m_i$ then the only valid tag is + $H_K(m_i) \xor F(s_i) = \sigma_i$, which was already returned by the + tagging oracle). If $A$'s forgery is successful then + \[ \sigma = H_K(m) \xor F(s) + \qquad \text{and} \qquad + \sigma_i = H_K(m_i) \xor F(s_i) \]% + but $s = s_i$, whence + \[ H_K(m_i) \xor H_K(m) = \sigma \xor \sigma_i. \] + Since the $s_i$ are distinct and $F$ is a random function, the $\sigma_i$ + are independent uniformly-distributed random strings from $\{0, 1\}^L$. + Hence the collision-finder $C$ succeeds with probability $\Pr[S \land + \lnot N] \le \InSec{xuh}(H)$. + + Wrapping up, we have + \begin{eqnarray*}[rl] + \Adv{prf}{F}(D) + & \ge \Succ{suf-cma}{\Xid{\mathcal{M}}{XUH}^{H, F}}(A) - + (\Pr[S \mid N] \Pr[N] + \Pr[S \mid \lnot N] \Pr[\lnot N]) \\ + & \ge \Succ{suf-cma}{\Xid{\mathcal{M}}{XUH}^{H, F}}(A) - + (2^{-L} + \InSec{xuh}(H)). + \end{eqnarray*} + Maximizing and rearranging yields the required result. +\end{proof} + +\begin{remark*} + Note that our bound has a $2^{-L}$ term in it that's missing from + \cite{Goldwasser:1999:LNC}. We believe that their proof is wrong in its + handling of the XOR-collision probability. +\end{remark*} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/auth-sig.tex b/auth-sig.tex new file mode 100644 index 0000000..b3322ac --- /dev/null +++ b/auth-sig.tex @@ -0,0 +1,609 @@ +\xcalways\section{Digital signatures}\x + +\xcalways\subsection{Definitions and security notions}\x + +\begin{slide} + \topic{syntax} + \head{Syntax of digital signature schemes} + + A \emph{digital signature scheme} is a triple $\mathcal{S} = (G, S, V)$ of + algorithms: + \begin{itemize} + \item The \emph{key-generation} algorithm $G$ is a probabilistic algorithm + which, given no arguments (or a security parameter $1^k$ represented in + unary, in the asymptotic setting) returns a pair $(P, K)$ of public and + private values. + \item The \emph{signing} algorithm $S$ is a probabilistic algorithm. + If $(P, K) \in G$, and $m \in \{0, 1\}^*$ is some message, then $S(K, m)$ + (usually written $S_K(m)$) returns a \emph{signature} $\sigma$ on $m$. + \item The \emph{verification} algorithm $V$ is a deterministic algorithm + returning a single bit. If $(P, K) \in G$ then $V(P, m) = 1$ iff $\sigma + \in S_K(m)$. We usually write $V_P(M)$ instead of $V(P, M)$. + \end{itemize} + There are many other similar formulations of digital signature schemes. +\end{slide} + +\begin{slide} + \topic{forgery goals} + \head{Forgery goals} + + We recognize several different types of forgeries which can be made: + \begin{itemize} + \item An \emph{existiential forgery} occurs when an adversary creates a + valid signature for some arbitrary message not of its choosing. + \item An \emph{selective forgery} occurs when an adversary creates a valid + signature for a message that it chooses. + \item A \emph{universal forgery} occurs when an adversary creates a valid + signature for some specific given message. + \item A \emph{total break} occurs when the adversary acquires the secret + key $K$, or equivalent information, and can continue to sign given + messages. + \end{itemize} + The formal definitions don't quite match up to these categories. +\end{slide} + +\begin{slide} + \topic{attack types} + \head{Types of attacks} + + We recognize a number of different types of attack: + \begin{itemize} + \item In a \emph{key-only} attack, the adversary is given only a public + key. + \item In a \emph{known-signature} attack, the adversary given the public + key and a collection of messages with valid signatures. + \item In a \emph{chosen-message} attack, the adversary is provided with an + oracle which signs messages of the adversary's choosing. + \end{itemize} + In order to have `won' the game, we require that the adversary return a + signature on a message which wasn't one of the known-signature messages or + a query to the signing oracle. +\end{slide} + +\begin{slide} + \topic{funny abbreviations} + \head{Funny abbreviations} + + For each goal/attack type pair, we have a notion of security for digital + signature schemes. To save typing, there are standard (ish) abbreviations + for these notions, e.g., EUF-CMA. + + The first part is the forger's goal: EUF, SUF, UUF for existentially, + selectively and universally unforgeable, respectively. + + The second part is the attack time: KOA, KSA, CMA for key-only, + known-signature and chosen-message attack, respectively. + + The strongest notion is EUF-CMA, and that's what we concentrate on. +\end{slide} + +\begin{slide} + \topic{formal definitions} + \head{Formal definition of EUF-CMA} + + Consider this experiment involving a signature scheme $\mathcal{S} = (G, S, + V)$ and an adversary: + \begin{program} + Experiment $\Expt{euf-cma}{\mathcal{S}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $\Xid{m}{list} \gets \emptyset$; \\ + $(m, \sigma) \gets A^{\id{sign}(\cdot)}(P)$; \\ + \IF $m \notin \Xid{m}{list} \land V_P(m, \sigma) = 1$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $\id{sign}(m)$: \+ \\ + $\Xid{m}{list} \gets \Xid{m}{list} \cup \{m\}$; \\ + \RETURN $S_K(m)$; + \end{program} +\end{slide} + +\begin{slide} + \head{Formal definition of EUF-CMA (cont.)} + + We define: + \begin{eqnarray*}[rl] + \Succ{euf-cma}{\mathcal{S}}(A) + &= \Pr[\Expt{euf-cma}{\mathcal{S}}(A) = 1]; \\ + \InSec{euf-cma}(\mathcal{S}; t, q) + &= \max_A \Succ{euf-cma}{\mathcal{S}}(A). + \end{eqnarray*} + where the maximum is taken over adversaries $A$ running in time $t$ and + issuing $q$ signing queries. + + If $\InSec{euf-cma}(\mathcal{S}; t, q) \le \epsilon$ then we say that + $\mathcal{S}$ is a \emph{$(t, q, \epsilon)$-secure EUF-CMA digital + signature scheme}. +\end{slide} + +\xcalways\subsection{Signature schemes from trapdoor one-way functions}\x + +\begin{slide} + \topic{RSA} + \head{RSA as a digital signature scheme} + + RSA is, we assume, a one-way trapdoor permutation. We'd like to be able to + turn this into a signature scheme. + + The obvious thing to do is just define $S_{(n, d)}(m) = m^d \bmod n$ and + $V_{(n, e)}(m, \sigma) = 1 \iff \sigma^e \equiv m \pmod{n}$. But this + doesn't work. + \begin{itemize} + \item It allows key-only existential forgery. Suppose we're given a public + key $(n, e)$. Choose a signature $\sigma \in \Z/n\Z$. Compute $m = + \sigma$. Then $(m, \sigma)$ is a valid signed message. + \item It allows universal forgery under chosen message attack. Suppose + we're given a public key $(n, e)$, and asked to forge a signature for + message $m$. Choose $k \inr (\Z/n\Z)^*$, and request a signature + $\sigma'$ of $m' = m k^e$. Then $\sigma' = m^d k^{ed} = m^d k$, so + $\sigma = \sigma' k^{-1}$ is a valid signature on $m$. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{hash-then-sign} + \head{Hash-then-sign} + + We could fix the problems with RSA if we preprocessed the message before + signing. The first idea is to use a collision-resistant hash function $H$, + and to produce a signature on a message $m$, you compute $\sigma = H(m)^d + \bmod n$. + + This doesn't work either. + \begin{itemize} + \item Collision-resistance isn't sufficient for security. Just because $H$ + is collision-resistant doesn't mean that $H(x) H(y) \ne H(x y)$ with + non-negligible probability. If $H$ is partially multiplicative then + universal or selective forgery is still possible. + \item Recall the definition of a one-way function: if $x \inr \dom f$ then + finding $x'$ such that $f(x') = f(x)$ is hard, given only $f(x)$. But + the range of a hash function like SHA-1 is only a tiny portion of the + domain of RSA. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{\PKCS1 padding} + \head{\PKCS1 signature padding \cite{RSA:1993:PRE}} + + Let $n = p q$ be an RSA modulus, with $2^{8(k-1)} < n < 2^{8k)}$ -- i.e., + $n$ is a $k$-byte number. Let $m$ be the message to be signed. + + We compute $\hat{m} = 0^8 \cat T \cat 1^z \cat 0^8 \cat I_H \cat H(m)$ for + some hash function $m$, where: + \begin{itemize} + \item $|\hat{m}| = 8k$, i.e., $\hat{m}$ is a $k$-byte string; + \item $0^8$ is the string of 8 zero-bits; + \item $T = 00000001$ is an 8-bit \emph{type} code; + \item $1^z$ is a padding string of one-bits (\PKCS1 requires $z \ge 64$); + and + \item $I_H$ is an identifier for the chosen hash function $H$. + \end{itemize} + This bit string is then converted into an integer, using a big-endian + representation. Note that $\hat{m} < n$, since its top byte is zero. +\end{slide} + +\begin{slide} + \head{Fixing RSA, 2: \PKCS1 padding (cont.)} + + Diagramatically, \PKCS1 signature looks like this: + \begin{tabular}[C]{r|c|c|c|c|c|} \hlx{c{2-6}v} + \hex{00} & \hex{01} & + \hex{FF} \hex{FF} \ldots \hex{FF} & + \hex{00} & + $I_H$ & + $H(m)$ + \\ \hlx{vc{2-6}} + \end{tabular} + + This partially obscures the multiplicative structure of RSA, but doesn't + expand the range of the transform at all. \PKCS1 padding only uses a tiny + part of the domain of the RSA function. + + In \cite{Brier:2001:CRS}, Brier, Clavier, Coron and Naccache analyse + general affine padding schemes, of which the \PKCS1 scheme is an example, + and show that if the `message' -- the hash -- is more than a third the size + of the modulus. In particular, existential forgery is possible in + polynomial time, and selective forgery in subexponential time (though much + faster than factoring the modulus). +\end{slide} + +\xcalways\subsection{Full-domain hashing}\x + +\begin{slide} + \topic{description} + \head{Full-domain hashing} + + Suppose we have an \emph{ideal hash}, whose range covers the whole of the + domain of our RSA transformation. Would this be secure? If we model the + ideal hash as a random oracle, the answer is yes. + + Let $\mathcal{T} = (G, f, T)$ be a trapdoor one-way function generator. + Then we define the signature scheme $\Xid{\mathcal{S}}{FDH}^{\mathcal{T}, + H} = (\Xid{G}{FDH}^{\mathcal{T}, H(\cdot)}, \Xid{S}{FDH}^{\mathcal{T}, + H(\cdot)}, \Xid{V}{FDH}^{\mathcal{T}, H(\cdot)})$ as follows: + \begin{itemize} + \item $\Xid{G}{FDH}^{\mathcal{T}, H(\cdot)}(\cdot) = G(\cdot)$, i.e., we + use $\mathcal{T}$'s key generator directly. + \item $\Xid{S}{FDH}^{\mathcal{T}, H(\cdot)}_K(m) = T_K(H(m))$: we + implicitly (and deterministically) coerce $H$'s output so that $H(\cdot)$ + is uniformly distributed over $\ran f_P$. + \item $\Xid{V}{FDH}^{\mathcal{T}, H(\cdot)}_P(m, \sigma) = 1 \iff + f_P(\sigma) = H(M)$. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{security results} + \head{Security results for FDH} + + The full-domain hash signature scheme $\Xid{\mathcal{S}}{FDH}^{\mathcal{T}, + H}$ is EUF-CMA if $\mathcal{T} = (G, f, T)$ is a trapdoor one-way + permutation. The standard quantitative result is: + \[ \InSec{euf-cma}(\Xid{\mathcal{S}}{FDH}^{\mathcal{T}, H}; t, q_S, q_H) + \le + q_H \InSec{owf}(\mathcal{T}; t + O(q_H t_f)). \]% + Here, $t_f$ is the length of time required to compute $f$. The additional + $q_H$ term is the number of random oracle (hashing) queries. This doesn't + count queries made by the signing oracle. + + This isn't a terribly promising bound. The problem comes because there is + a conflict between getting the adversary to invert the required point, and + being able to answer signing queries. +\end{slide} + +\begin{proof} + Let $A$ be an adversary against $\Xid{\mathcal{S}}{FDH}^{\mathcal{T}}$ + making $q_S$ signing and $q_H$ random oracle queries: we present an + inverter $I$ for $\mathcal{T}$. + \begin{program} + Inverter $I(P, y)$: \+ \\ + $n \getsr \{0, 1, \ldots, q_H - 1\}$; \\ + $i \gets 0$; \\ + $\Xid{I}{map} \gets \emptyset$; \\ + $\Xid{H}{map} \gets \emptyset$; \\ + $(m, \sigma) \gets A^{\id{sign}(\cdot), \id{hash}(\cdot)}(P)$; \\ + \RETURN $\sigma$; + \newline + Oracle $\id{sign}(m)$: \+ \\ + \IF $m \notin \dom \Xid{H}{map}$ \THEN \\ \quad \= \+ \kill + $h' \getsr \dom f_P$; \\ + $h \gets f_P(h')$; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{m \mapsto h\}$; \\ + $\Xid{I}{map} \gets \Xid{I}{map} \cup \{h \mapsto h'\}$; \- \\ + $h \gets \Xid{h}{map}(m)$; \\ + \IF $h \notin \dom \Xid{I}{map}$ \THEN \ABORT; \\ + \RETURN $\Xid{I}{map}(h)$; + \next + Oracle $\id{hash}(x)$: \+ \\ + \IF $x \in \dom \Xid{H}{map}$ \THEN \RETURN $\Xid{H}{map}(x)$; \\ + \IF $i = n$ \THEN $h \gets y$; \\ + \ELSE \\ \quad \= \+ \kill + $h' \getsr \dom f_P$; \\ + $h \gets f_P(h')$; \\ + $\Xid{I}{map} \gets \Xid{I}{map} \cup \{h \mapsto h'\}$; \- \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{x \mapsto h\}$; \\ + \RETURN $h$; + \end{program} + Note that the inverter `cheats' a little. One of its random oracle replies + is chosen to be the inverter's input $y$. This is OK, because the rules of + the inverter's game say that that $y = f(x)$ for some $x$ chosen uniformly + at random from $\dom f_P$, and hence $y$ is also uniformly distributed and + independent, as required. + + The inverter also `cheats' because it ensures that it knows inverses for + all of the queries except for the $n$-th. + + Let the $q_H$ query strings to the oracle \id{hash} be $M = \{m_0, m_1, + \ldots, m_{q_H-1}\}$. Let $m$ be the message returned by $A$ and let + $\sigma$ be the returned signature. + + Consider the event $N$ that $m \notin M$; i.e., the random oracle was not + queried on the returned message. Then, since $A$ has knowledge of neither + $H(M)$ nor $y$, and both are uniformly distributed over $\dom f_P$, the + probability that it succeeds in producing a valid forgery is equal to its + probability of successfully returning a correct inversion of $y$. + + Now consider the complement event $\lnot N$. Then there is at least a + $1/q_H$ probability that $m = m_n$ -- the one which the inverter must + invert -- in which case $I$ succeeds if $A$ does. (The probability might + be greater in the event that the adversary queries on the same string + several times, though doing so doesn't make a great deal of sense.) + + Let $F$ be the event that $A$ returns a correct forgery, and let $V$ be the + event that $I$ returns a correct inversion. Obviously, + \[ \Pr[F] = \Pr[F \land N] + \Pr[F \land \lnot N] + \quad \text{so} \quad + \Pr[F \land \lnot N] = \Pr[F] - \Pr[F \land N]. \]% + From the above discussion, we ahave + \[ \Pr[V \land N] = \Pr[F \land N] + \quad \text{and} \quad + \Pr[V \land \lnot N] \ge \frac{1}{q_H} \Pr[F \land \lnot N]. \]% + Finally, + \begin{eqnarray*}[rl] + \Pr[V] &= \Pr[V \land N] + \Pr[V \land \lnot N] \\ + &\ge \Pr[F \land N] + \frac{1}{q_H} \Pr[F \land \lnot N] \\ + &\ge \frac{1}{q_H} + (\Pr[F \land N] + \Pr[F] - \Pr[F \land \lnot N]) \\ + &= \frac{1}{q_H} \Pr[F]. + \end{eqnarray*} + The result follows. +\end{proof} + +\begin{remark*} + Most statements of this result seem to charge the adversary for hash + queries made by the experiment and by the constructed inverter, which seems + unreasonable. The above result shows that the standard security bound + still holds, even under more generous accounting. +\end{remark*} + +\xcalways\subsection{The Probabilistic Signature Scheme}\x + +\begin{slide} + \head{The Probabilistic Signature Scheme \cite{Bellare:1996:ESD}} + + We can improve the security bounds on FDH by making the signing operation + randomized. + + Let $\mathcal{T} = (G, f, T)$ be a trapdoor one-way function generator, as + before, but this time we require that the problem of inverting $f$ be + \emph{self-reducible}. + + For some public parameter $P$, choose $n$ such that $2^{8n} \le + |{\dom f_P}| < 2^{8n+1}$, and fix a security parameter $k$. Then we need + two random oracles $H\colon \{0, 1\}^* \to \{0, 1\}^k$ and $H'\colon \{0, + 1\}^k \to \{0, 1\}^{8n-k}$. +\end{slide} + +\begin{slide} + \topic{description} + \head{Definition of PSS} + + We define the signature scheme $\Xid{\mathcal{S}}{PSS}^{\mathcal{T}} = + (\Xid{G}{PSS}^{\mathcal{T}, H(\cdot), H'(\cdot)}, + \Xid{S}{PSS}^{\mathcal{T}, H(\cdot), H'(\cdot)}, \Xid{V}{PSS}^{\mathcal{T}, + H(\cdot), H'(\cdot)})$: + \begin{program} + $\Xid{G}{PSS}^{\mathcal{T}, H(\cdot), H'(\cdot)}(x)$: \+ \\ + $(P, K) \gets G(x)$; \\ + \RETURN $(P, K)$; + \newline + $\Xid{S}{PSS}^{\mathcal{T}, H(\cdot), H'(\cdot)}(K, m)$: \+ \\ + $r \getsr \{0, 1\}^k$; \\ + $h \gets H(m \cat r)$; \\ + $h' \gets (r \cat 0^{8n-2k}) \xor H'(x)$; \\ + $y \gets 0^8 \cat h \cat h'$; \\ + \RETURN $T_K(y)$; + \next + $\Xid{V}{PSS}^{\mathcal{T}, H(\cdot), H'(\cdot)} + (P, m, \sigma)$: \+ \\ + $y \gets f_P(\sigma)$; \\ + \PARSE $y$ \AS $z \cat k\colon h \cat 8n - k\colon h'$; \\ + \PARSE $h' \xor H'(h)$ \AS $k\colon r \cat z$; \\ + \IF $z = 0 \land z' = 0 \land h = H(m \cat r)$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} +\end{slide} + +\begin{slide} + \head{Diagram of PSS} + + %% m <- {0,1}^* r <-R {0, 1}^k + %% | | + %% | | + %% v | + %% [H]<---------------------------| + %% | | + %% | | + %% | v + %% | [||]<---- 0^{8n-2k} + %% | | + %% | | + %% v v + %% |-----------[H']------------>(+) + %% | | + %% | | + %% | v + %% < 00 > < s = H(m || r) > < (r || 0^{8n-2k}) (+) H'(s) > + + \vfil + \[ \begin{graph} + []!{0; <1cm, 0cm>:} + {m \in \{0, 1\}^*}="m" [rrrr] {r \inr \{0, 1\}^k}="r" + "m" :[d] *+[F]{H}="h" + :[ddd] *+=(2, 0)+[F:thicker]{\strut s = H(m \cat r)} + []!{-(2.5, 0)} *+=(1, 0)+[F:thicker]{\strut \hex{00}} + "r" :[dd] *{\ocat}="cat" [r] *+[r]{\mathstrut 0^{8n-2k}} :"cat" + :[d] *{\xor}="x" [uu] :"h" + "m" [ddd] :[rr] *+[F]{H'} :"x" + :[d] *+=(4, 0)+[F:thicker]{\strut t = (r \cat 0^{8n-2k}) \xor H'(s)} + \end{graph} \] + \vfil +\end{slide} + +\begin{slide} + \topic{security results} + \head{Security results for PSS} + + For the PSS scheme we've seen, we have + \begin{eqnarray*}[Ll] + \InSec{euf-cma}(\Xid{\mathcal{S}}{PSS}^{\mathcal{T}}; + t, q_S, q_H, q_{H'}) \le \\ + & \InSec{owf}(\mathcal{T}; t + O(t_n (q_S + q_H) + q_{H'})) + + \frac{3(q_H + q_{H'} + 2)}{2^k}. + \end{eqnarray*} + Here, $q_H$ and $q_H$ are the number of queries to the random oracles $H$ + and $H'$, and $t_n$ is a magical constant relating to, among other things, + the time required for the self-reduction on the original problem and the + difference between $|{\dom f_P}|$ and $2^{8n}$. For RSA or Rabin with a + multiple-of-8-bit modulus, $t_n$ will be some small multiple of $k$ times + the length of time for a public key operation. +\end{slide} + +\begin{proof} + Suppose $A$ can forge a signature under the + $\Xid{\mathcal{S}}{PSS}^{\mathcal{T}}$ scheme. We present an inverter $I$ + which inverts $\mathcal{T}$. + + We shall need to make use of the self-reducibility property of inverting + $\mathcal{T}$. To do this in an abstract way, we require two algorithms: + \begin{itemize} + \item $\id{sr-choose}(P, y)$ returns a pair $(s, y')$, where $s$ is some + state information, and $y'$ is a new problem instance uniformly selected + from the domain of $f_P$; and + \item $\id{sr-solve}(s, x)$ solves an instance of the problem given a + solution to the instance created by \id{sr-choose}. + \end{itemize} + If $P$ is a public parameter for $f$, $y$ is some point in $\ran f_P$, + $\id{sr-choose}(P, y)$ returns $(s, y')$ and $f_P(x') = y'$ then we require + that $\id{sr-solve}(s, x')$ returns an $x$ such that $f_P(x) = y$. + + By way of example, we present implementations of these algorithms for the + RSA function, where $P = (n, e)$: + \begin{program} + Algorithm $\id{sr-choose}(P, y)$: \+ \\ + $(n, e) \gets P$; \\ + \REPEAT $z \getsr \{1, 2, \ldots, n - 1\}$; \\ + \UNTIL $\gcd(z, n) = 1$; \\ + $s \gets (n, z^{-1} \bmod n)$; \\ + \RETURN $(s, y z^e \bmod n)$; + \next + Algorithm $\id{sr-solve}(s, x')$: \+ \\ + $(n, i) \gets s$; \\ + \RETURN $x' i \bmod n$; + \end{program} + (The loop in \id{sr-choose} hardly ever needs more than one iteration. + Indeed, if the condition $\gcd{z, n} = 1$ fails, we've found a factor, + and could use that to solve the problem instance directly.) We shall + assume that the \id{sr-choose} algorithm effectively completes in + deterministic time.) + + We shall also need to be able to convert between elements of $\ran f_P$ and + binary strings. We shall assume that there is a `natural' mapping between + the integers $\{0, 1, \ldots, |{\ran f_P}| - 1\}$ and the elements of $\ran + f_P$, and omit explicit conversions. + + The inverter algorithm is shown in figure~\ref{fig:pss-inverter}. + + \begin{figure} + \begin{program} + Inverter $I(P, y)$: \+ \\ + $\Xid{H}{map} \gets \emptyset$; \\ + $\Xid{H'}{map} \gets \emptyset$; \\ + $\Xid{I}{map} \gets \emptyset$; \\ + $\Xid{m}{list} \gets \emptyset$; \\ + $(m, \sigma) \gets A^{\id{sign}(\cdot), H(\cdot), H'(\cdot)}(P)$; \\ + \IF $m \in \Xid{m}{list}$ \THEN \ABORT; \\ + $y' \gets f_P(\sigma)$; \\ + \PARSE $y'$ \AS $z \cat k\colon h \cat 8n - k\colon g$; \\ + \PARSE $g \xor H'(h)$ \AS $k\colon r \cat z'$; \\ + $x \gets m \cat r$; \\ + \IF $z \ne 0 \lor z' \ne 0$ \THEN \ABORT; \\ + \IF $h \ne H(x)$ \THEN \ABORT; \\ + \IF $x \notin \dom \Xid{I}{map}$ \THEN \ABORT; \\ + \RETURN $\id{sr-solve}(\Xid{I}{map}(x), \sigma)$; + \newline + Oracle $\id{sign}(m)$: \+ \\ + $r \getsr \{0, 1\}^k$; \\ + $x \gets m \cat r$; \\ + \IF $x \in \dom \Xid{H}{map}$ \THEN \ABORT; \\ + \REPEAT \\ \quad \= \+ \kill + $\sigma' \getsr \ran f_P$; \\ + $y' \gets f_P(\sigma')$; \- \\ + \UNTIL $y' < 2^{8n}$; \\ + \PARSE $y'$ \AS $z \cat k\colon h \cat 8n - k\colon g$; \\ + \PARSE $g$ \AS $k\colon r' \cat g'$; \\ + $h' \gets (r \xor r') \cat g'$; \\ + \IF $h \in \dom \Xid{H'}{map}$ \THEN \ABORT; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{x \mapsto h\}$; \\ + $\Xid{H'}{map} \gets \Xid{H'}{map} \cup \{h \mapsto h'\}$; \\ + $\Xid{m}{list} \gets \Xid{m}{list} \cup \{m\}$; \\ + \RETURN $\sigma'$; + \next + Oracle $H(x)$: \+ \\ + \IF $x \in \dom \Xid{H}{map}$ \THEN \RETURN $\Xid{H}{map}(x)$; \\ + \REPEAT \\ \quad\=\+\kill + $(s, y') \gets \id{sr-choose}(P, y)$; \\ + \PARSE $x$ \AS $m \cat k\colon r$; \- \\ + \UNTIL $y' < 2^{8n}$; \\ + \PARSE $y'$ \AS $z \cat k\colon h \cat 8n - k\colon g$; \\ + \PARSE $g$ \AS $k\colon r' \cat g'$; \\ + $h' \gets (r \xor r') \cat g'$; \\ + \IF $h \in \dom \Xid{H'}{map}$ \THEN \ABORT; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{x \mapsto h\}$; \\ + $\Xid{H'}{map} \gets \Xid{H'}{map} \cup \{h \mapsto h'\}$; \\ + $\Xid{I}{map} \gets \Xid{I}{map} \cup \{x \mapsto s\}$; \\ + \RETURN $h$; \- \\[\smallskipamount] + Oracle $H'(x)$: \+ \\ + \IF $x \in \dom \Xid{H'}{map}$ \THEN \RETURN $\Xid{H'}{map}(x)$; \\ + $h' \gets \{0, 1\}^{8n - k}$ \\ + $\Xid{H'}{map} \gets \Xid{H'}{map} \cup \{h \mapsto h'\}$; \\ + \RETURN $h'$; + \end{program} + \caption{From the security proof of PSS -- the inverter for the trapdoor + one-way function} + \label{fig:pss-inverter} + \end{figure} + + The oracle $H$ constructs related instances of the original inversion + problem and uses them to build the random oracle results. Because the + subroutine \id{sr-choose} selects problem instances uniformly over $\{0, + 1, \ldots, |{\ran f_P}| - 1\}$, and we iterate until $y' < 2^{8n}$, the + selected instance $y'$ is uniform over $\{0, 1\}^{8n}$. We then construct + $H$ and $H'$ replies as appropriate for the adversary to construct $y'$ as + a message representative for its chosen message $m$, such that if it + responds with a signature for $m$ then we can use \id{sr-solve} to solve + our initial problem. + + The oracle \id{sign} chooses oracle responses in order to fit in with a + previously chosen inverse (computed the easy way, by choosing a point in + the domain of $f_P$ and computing its image). + + The oracle $H'$ just chooses random values. + + Most of the \ABORT statements in the main inverter routine detect incorrect + signatures. The final one, asserting $x \notin \Xid{I}{map}$, can't happen + unless the signaure is a duplicate of one we already gave. + + The \ABORT{}s in $H$ and \id{sign} detect conditions in which the + adversary has successfully distinguished its simulated environment from + that of the standard forging experiment. + + The inverter succeeds whenever a correct forgery is made. To conclude the + proof, we must bound the probability that an \ABORT occurs in $H$ or + \id{sign}. + + The \ABORT in $H$ occurs when the chosen $H(x)$ collides with a value for + which $H'$ is already defined. Since $H(x)$ values are chosen uniformly + from $\{0, 1\}^k$, this occurs with probability no more than $(q_H + + q_{H'} + 2)/2^k$. + + The first \ABORT in \id{sign} occurs when the chosen point $x = m \cat r$ + already has an image defined in $H$: this happens with probability at most + $q_H/2^k$. The second occurs when $h$ already has an image in $H'$, and + occurs with probability at most $(q_H + q_{H'})/2^k$. + + The loops in $H$ and \id{sign} don't complete in a deterministic amount + of time. But we can abort if they appear to be taking too long, and choose + the maximum number of iterations to achieve any given failure probability. + Let $d = |{\dom f_P}|$. Then the probability that any particular iteration + fails to select an appropriate problem instance is $1 - 2^{8n}/d$. For RSA + or Rabin with a modulus bit length which is a multiple of 8, this is less + than $\frac{1}{2}$. We choose the number of iterations in the entire + program to be such that we have a $2^{-k}$ probability of failure. Thus, + we set our maximum as $-k/\log_2 (1-2^{8n}/d)$ iterations in the entire + program. + + Fitting all of this together, we see that + \[ \Succ{owf}{\mathcal{T}}(I) \ge + \Succ{euf-cma}{\Xid{\mathcal{S}}{PSS}^{\mathcal{T}}} - + \frac{3(q_H + q_{H'} + 2)}{2^k}. \]% + completing the proof. +\end{proof} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/basics.tex b/basics.tex new file mode 100644 index 0000000..6d5be48 --- /dev/null +++ b/basics.tex @@ -0,0 +1,1115 @@ +\xcalways\section{Basics}\x + +\xcalways\subsection{Introduction and motivation}\x + +\begin{slide} + \topic{joke} + \head{Mandatory joke} + + An elderly Frenchman rises every morning at 5, goes out to the street in + front of his house, and sprinkles a white powder up and down the street. + + One day, a neighbour, who has watched his routine for many years, confronts + him. `What is this powder you sprinkle on the street every morning, + Pierre?' + + `It is elephant powder, \emph{mon ami},' the gentleman says. `It keeps the + elephants away.' + + `But Pierre,' says the neighbour. `Everybody knows that there are no + elephants in France.' + + Says the older man matter-of-factly, `I guess it must be working, then.' +\end{slide} + +The joke has its mandatory corresponding serious message. Cryptography is +like elephant powder. If it seems to work, and keeps the attackers -- our +elephants -- away, then we trust it. This isn't really very satisfactory. +When the elephant powder fails, a 2-ton elephant turns up, with three of its +chums in a Mini, and you notice it hiding upside-down in your bowl of +custard. Let's face it: elephants aren't good at being surreptitious. + +But if your cryptography is no good, you may never know. + +\begin{slide} + \topic{serious message} + \head{Cryptography is elephant powder} + + So, what can we do about the situtation? + \begin{itemize} + \item Design simple cryptographic primitives, e.g., block ciphers, hash + functions. Develop techniques for analysing them, and attempt to stay + ahead of the bad guy. + \item Build useful constructions from trusted primitives in a modular way. + \emph{Prove that the constructions are secure.} + \end{itemize} + Here we look at this second part of the approach. +\end{slide} + +\xcalways\subsection{Approach}\x + +\begin{slide} + \head{The provable security approach} + + \begin{itemize} + \item Define notions of security. Now we know what to aim for, and what to + expect of our components. + \item Prove how the security of a construction relates to the security of + its primitives. If it breaks, we know who to blame. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{adversaries} + \head{Adversaries} + + We model our adversary as a \emph{probabilistic algorithm}; i.e., the + algorithm is allowed to \emph{flip coins} to make decisions. The + adversary's output (for some input) follows a probability distribution. We + define what it means for an adversary to \emph{break} our construction, and + examine the probability with which this happens. + + We provide the adversary with \emph{oracles}: + \begin{itemize} + \item Oracles compute using secrets hidden from the adversary. + \item We count the number of \emph{queries} made to an oracle. + \item We can restrict the types of queries the adversary makes. + \end{itemize} + + Oracles are written as superscripts. For example, an adversary given a + chosen-plaintext oracle might be written as $A^{E_K(\cdot)}$. +\end{slide} + +\begin{slide} + \topic{the asymptotic approach} + \head{The asymptotic approach, 1} + + A function $\nu\colon \N \to \R$ is \emph{negligible} if, for any integer + $c$, there exists an $n \in \N$ such that $0 \le \nu(k) < k^{-c}$ for all + $k \ge n$. That is, $\nu(k)$ is `eventually' less than any polynomial + function of $k$. + + We examine families of constructions, with a \emph{security parameter} $k$. + We say that a function is (asymptotically) secure in some sense if, for any + polynomial $p(k)$ there is a negligible function $\nu(k)$ such that, for + any construction in the family, parameterized by $k$, no adversary which + runs for time $p(k)$ has success probability better than $\nu(k)$. +\end{slide} + +\begin{slide} + \head{The asymptotic approach, 2} + + Suppose we build an encryption scheme from a one-way function. We'd like + to prove that the encryption is good if the one-way function is secure. We + do this by contradiction: + \begin{enumerate} + \item Suppose an adversary $A$ breaks the encryption scheme with + better-than-negligible probability. + \item Show a polynomial-time \emph{reduction}: an algorithm which uses $A$ + to break the one-way function, in polynomial time, and with + better-than-negligible probability, + \item Claim that this violates the assumption of a secure one-way function. + \end{enumerate} + + This doesn't work with real constructions. We don't know where the + asymptotics set in, and they can conceal large constants. It's still + better than nothing. +\end{slide} + +\begin{slide} + \topic{the concrete (quantitative) approach} + \head{The concrete (quantitative) approach} + + We constrain the resources we allow the adversary: + \begin{itemize} + \item Running time (including program size). + \item Number of oracle queries. + \item Maximum size of oracle queries. + \end{itemize} + Write that something is \emph{$(t, q, \epsilon)$-secure} if no adversary + which runs in time $t$ and makes $q$ oracle queries can break it with + probability better than $\epsilon$. + + We make statements like `foo is $(t, q, 2 q \epsilon)$-secure if bar is $(t + + O(q), 2 q, \epsilon)$-secure'. + + This is a much more satisfactory approach. However, we still have to + \emph{assume} the security of our primitive operations. +\end{slide} + +\xcalways\subsection{Notation}\x + +\begin{slide} + \topic{boolean operators} + \head{Notation, 1: boolean operators} + + If $P$ and $Q$ are \emph{predicates} -- i.e., either true or false -- then: + \begin{itemize} + \item $P \land Q$ is true if both $P$ \emph{and} $Q$ is true; + \item $P \lor Q$ is true if either $P$ \emph{or} $Q$ (or both) is true; + \item $\lnot P$ is true if $P$ is false; and + \item $P \implies Q$ is true if $Q$ is true or $P$ is false. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{sets} + \head{Notation, 2: sets} + + For our purposes, we can think of sets as being collections of objects. + + We use the usual notations for set membership ($x \in X$), intersection ($X + \cap Y$), union ($X \cup Y$) and subset containment ($X \subseteq Y$). The + \emph{empty set}, which contains no elements, is written $\emptyset$. + + The notation $\{f(x) \mid P(x)\}$ describes the set containing those items + $f(x)$ for which the predicate $P(x)$ is true. + + The \emph{cardinality} $|X|$ of a (finite) set $X$ is the number of + elements in the set. + + The power set $\powerset(X)$ of a set $X$ is the set of all subsets of $X$. +\end{slide} + +\begin{slide} + \head{Notation, 3: sets (cont.)} + + The \emph{cartesian product} of two sets $X \times Y$ is the set of all + ordered pairs $\{ (x, y) \mid x \in X \land y \in Y \}$. We use exponents + to indicate the product of a set with itself: hence, $X^2 = X \times X$. + + A \emph{relation} $R$ is a subset of a cartesian product. We write $R(x, + y)$ if $(x, y) \in R$. Relations between two sets are often written as + infix symbols: e.g., $x \sim y$. +\end{slide} + +\begin{slide} + \topic{functions} + \head{Notation, 4: functions} + + A \emph{function} $f\colon X \to Y$ is a mapping which assigns every + element $x$ in the \emph{domain} $X$ a corresponding element $f(x)$ in the + \emph{range} (or sometimes \emph{codomain}) $Y$. The notation $\dom f$ + describes the domain of an arbitrary function; $\ran f$ describes its + range. + + We sometimes apply the function notation to sets, indicating that the + function should be applied pointwise; i.e., $f(Z) = \{ f(z) \mid z \in Z + \}$. The \emph{image} of a function $f$ is the set $f(\dom f)$. + + If $f\colon X \to Y$ preserves equality, i.e., $f(x) = f(x') \implies x = + x'$ for all $x, x' \in X$, then we say $f$ is \emph{injective} (or + \emph{1-to-1}). If $f(X) = Y$ then we say that it is \emph{surjective} (or + \emph{onto}). If $f$ is both injective and surjective then we say that it + is \emph{bijective}. In this case, there is a well-defined inverse + $f^{-1}\colon Y \to X$ defined by $f(f^{-1}(y)) = y$ for all $y \in Y$. +\end{slide} + +\begin{slide} + \head{Notation, 5: functions (cont.)} + + We can consider a function $f\colon X \to Y$ to be a particular sort of + relation $f \subseteq X \times Y$, subject to the constraint that if $(x, + y) \in f$ and $(x, y') \in f$ then $y = y'$. + + We shall use this view in some of the algorithms we present. In addition, + we shall use the \emph{maplet} notation $x \mapsto y$ rather than the + ordered pair notation $(x, y)$ to reinforce the notion of a mapping. + + We might write, for example, + \begin{program} + $f \gets f \cup \{ x \mapsto y \}$; + \end{program} + to augment a function by the addition of a new mapping. This is clearly + only valid if $x \notin \dom f$ (or $f(x) = y$) initially. +\end{slide} + +\begin{slide} + \topic{strings} + \head{Notation, 6: strings} + + An \emph{alphabet} is a finite set of \emph{symbols}. The one we'll use + most of the time is the set $\Sigma = \{0, 1\}$ of \emph{bits}. + + Suppose $A$ is an alphabet. The set of sequences of exactly $n$ symbols + from $A$ is written $A^n$. Hence, $\{0, 1\}^{64}$ is the set of all 64-bit + sequences. The set of (finite) \emph{strings} over an alphabet $A$ is $A^* + = \bigcup_{i \in \N} A^n$. The empty string is named $\emptystring$. + + The \emph{length} of a string $a \in A^*$, written $|a|$, is the natural + number $n$ where $a \in A^n$. + + If $x, y \in A^*$ are strings then their \emph{concatenation}, written $x + \cat y$, or sometimes just $x y$, is the result of writing the symbols in + $x$ followed by the symbols in $y$, in order. We have $|x y| = |x| + |y|$. +\end{slide} + +\begin{slide} + \head{Notation, 7: strings (cont.)} + + There are natural (bijective) mappings between bit strings and natual + numbers. + + If $x = x_0 x_1 \ldots x_{n-1} \in \{0, 1\}^*$ then we can associate with + it the natural number + \[ \overrightarrow{x} = \sum_{0 \le i < n} 2^i x_i. \] + + The other natural mapping is + \[ \overleftarrow{x} = \sum_{0 \le i < n} 2^{n-i-1} x_i. \] + It doesn't matter which you choose, as long as you're consistent. + + For simplicity's sake, we shall tend to switch between strings and the + numbers (and occasionally more exotic objects) they represent implictly. +\end{slide} + +\begin{slide} + \topic{parsing} + \head{Notation, 8: parsing} + + We'll find it useful to be able to break up strings in the algorithms we + present. We use the statement + \begin{program} + \PARSE $x$ \AS $n_0\colon x_0, n_1\colon x_1, \ldots, n_k\colon x_k$; + \end{program} + to mean that the string $x$ is broken up into individual strings $x_0$, + $x_1$, \ldots, $x_k$, such that + \begin{itemize} + \item $x = x_0 \cat x_1 \cat \cdots \cat x_k$; and + \item $|x_i| = n_i$ for all $0 \le i \le k$. + \end{itemize} + We may omit one of the $n_i$ length indicators, since it can be deduced + from the length of the string $x$ and the other $n_j$. +\end{slide} + +\begin{slide} + \topic{vectors} + \head{Notation, 9: vectors} + + A \emph{vector} $\vect{x}$ is a finite ordered collection of elements from + some set $X$. If $\vect{x}$ contains $n$ elements then we write $\vect{x} + \in X^n$, and that $|\vect{x}| = n$. We write the individual elements as + $\vect{x}[0], \vect{x}[1], \ldots, \vect{x}[n - 1]$. + + We shall use set membership notation for vectors; i.e., we write $x \in + \vect{x}$ if there is an $i$ ($0 \le i < |\vect{x}|$) such that + $\vect{x}[i] = x$. + + When we apply functions to vectors, we mean that they are applied + pointwise, as for sets. Thus, if we write that $\vect{y} = + f(\vect{x})$ then $|\vect{y}| = |\vect{x}|$ and $\vect{y}[i] = + f(\vect{x}[i])$ for all $0 \le i < |\vect{x}|$. +\end{slide} + +\begin{slide} + \topic{distributions and randomness} + \head{Notation, 10: distributions and randomness} + + A \emph{probability distribution} over a (countable) set $X$ is a + function $\mathcal{D}\colon X \to [0, 1]$ such that + \[ \sum_{x \in X} \mathcal{D}(x) = 1. \] + + The \emph{support} of $\mathcal{D}$, written $\supp \mathcal{D}$, is the + set $\{ x \in X \mid \mathcal{D}(x) \ne 0 \}$; i.e., those elements of $X$ + which occur with nonzero probability. + + We write $x \getsr \mathcal{D}$ in algorithms to indicate that $x$ is to be + chosen independently at random, according to the distribution + $\mathcal{D}$. The notation $x \inr \mathcal{D}$ indicates that $x$ has + been chosen at independently at random according to $\mathcal{D}$. + + The \emph{uniform distribution} over a (finite) set $X$ is + $\mathcal{U}_X\colon X \to [0, 1]$ defined by $\mathcal{U}_X(x) = 1/|X|$ + for all $x \in X$. We shall write $x \getsr X$ and $x \inr X$ as + convenient shorthands, meaning $x \getsr \mathcal{U}_X$ and $x \inr + \mathcal{U}_X$ respectively. +\end{slide} + +\xcalways\subsection{Background}\x + +\begin{slide} + \topic{distinguishability} + \head{Distinguishability, 1} + + Suppose that $\mathcal{X}$ and $\mathcal{Y}$ are two probability + distributions. + + Let $X$ be a random variable distributed according to $\mathcal{X}$, and + let $Y$ be a random variable distributed according to $\mathcal{Y}$. We + say that $\mathcal{A}$ and $\mathcal{B}$ are \emph{identically + distributed} if, for all possible values $x$ of $X$, we have + \[ \Pr[X = x] = \Pr[Y = x]. \] + + Equivalently, we require that, for all $x \in \supp \mathcal{X}$ we have + \[ x \in \supp \mathcal{Y} \land \mathcal{X}(x) = \mathcal{Y}(y). \] +\end{slide} + +\begin{slide} + \head{Distinguishability, 2: statistical closeness} + + Now we generalize the setting slightly. Consider two \emph{families} of + distributions, $\{\mathcal{X}_k\}_{k \in \N}$ and $\{\mathcal{Y}_k\}_{k \in + \N}$, parameterized by a security parameter $k$. + + Fix a value of $k$. Again, let $X$ be distributed according to + $\mathcal{X}_k$, and let $Y$ be distributed according to $\mathcal{Y}_k$. + We say that $\{\mathcal{X}_k\}_{k \in \N}$ and $\{\mathcal{Y}_k\}_{k \in + \N}$ are \emph{statistically close} if there is a negligible function + $\nu(\cdot)$ such that for any $k \in \N$, and for all $x \in + \supp \mathcal{X}_k$, + \[ |{\Pr[X = x]} - \Pr[Y = x]| \le \nu(k). \] + + (Reminder: Saying that $\nu\colon \N \to \R$ is \emph{negligible} means + that, for any $c \in \Z$ there is an $n \in N$ such that $\nu(k) < + k^{-c}$.) +\end{slide} + +\begin{slide} + \head{Distinguishability, 3: computational indistinguishability} + + We say that two families of distributions are computationally + indistinguishable if no `efficient' algorithm can tell them apart with + better than `negligible' probability. + + So, we say that $\{\mathcal{X}_k\}_{k \in \N}$ and $\{\mathcal{Y}_k\}_{k + \in \N}$ are \emph{computationally indistinguishable} if, for any + probabilistic polynomial-time algorithm $A$, there is a negligible function + $\nu(\cdot)$ such that, for any $k$: + \[ \Pr[x \getsr \mathcal{X}_k; b \gets A(x) : b = 1] - + \Pr[x \getsr \mathcal{Y}_k; b \gets A(x) : b = 1] \le \nu(k). \]% +\end{slide} + +\begin{slide} + \topic{collisions} + \head{Collisions} + + Suppose we throw $q$ balls into $n$ bins at random (with $q \le n$). Let + $C_{q, n}$ be the event that, at the end of this, we have a bin containing + more than one ball -- a \emph{collision}. + + Let $B_{q, n}$ be the event that the $q$-th ball collides with a + previous one. Obviously, the worst case for this is when none of the other + balls have collided, so + \[ \Pr[B_{q, n}] \le \frac{q - 1}{n}. \] + Then + \begin{eqnarray*}[rl] + \Pr[C_{q, n}] + &\le \Pr[B_{2, n}] + \Pr[B_{3, n}] + \cdots + \Pr[B_{q, n}] \\ + &= \frac{1}{n} + \frac{2}{n} + \cdots + \frac{q - 1}{n} \\ + &= \frac{q(q - 1)}{2 n}. + \end{eqnarray*} + This is an extremely useful result, and we shall need it often. +\end{slide} + +\xcalways\subsection{Primitives}\x + +\begin{slide} + \topic{one-way functions} + \head{One-way functions, 1: introduction} + + Intuition: a one-way function is easy to compute but hard to invert. + + Choose a function $f\colon X \to Y$. Let $I$ be a prospective inverter. + Now we play a game: + \begin{enumerate} + \item Choose $x \in X$ at random. Compute $y = f(x)$. + \item Let $x'$ be the output of $I$ when run on input $y$. + \item We say that $I$ `wins' if $f(x') = y$; otherwise it loses. + \end{enumerate} + Note that we don't care whether $x = x'$. + + Examples: SHA-1, or $x \mapsto g^x \bmod p$. +\end{slide} + +\begin{slide} + \head{One-way functions, 2: formalism} + + The \emph{success probability} of an inverter $I$ against the function $f$ + is + \[ \Succ{owf}{f}(I) = + \Pr[x \getsr X; + y \gets f(x); + x' \gets I(y) : + f(x') = y] \]% + where the probability is taken over all choices of $x$ and all coin flips + made by $I$. + + We measure the \emph{insecurity} of a one-way function (OWF) by maximizing + over possible inverters: + \[ \InSec{owf}(f; t) = \max_I \Succ{owf}{f}(I) \] + where the maximum is taken over all $I$ running in time $t$. + + If $\InSec{owf}(f; t) \le \epsilon$ then we say that $f$ is a \emph{$(t, + \epsilon)$-secure one-way function}. +\end{slide} + +\begin{slide} + \head{One-way functions, 3: trapdoors} + + Intuition: a \emph{trapdoor} is secret information which makes inverting a + one-way function easy. A trapdoor one-way function generator $\mathcal{T} + = (G, f, T)$ is a triple of algorithms: + + \begin{itemize} + \item The probabilistic algorithm $G$ is given some parameter $k$ and + returns a pair $(P, K)$, containing the public parameters $P$ for + computing an instance of the one-way function, and the secret trapdoor + information $K$ for inverting it. We write $(P, K) \in G(k)$. + + \item The algorithm $f$ implements the one-way function. That is, if $(P, + K) \in G(k)$ then $f(P, \cdot)$ (usually written $f_P(\cdot)$) is a + one-way function. + + \item The algorithm $T$ inverts $f$ using the trapdoor information. That + is, if $(P, K) \in G(k)$ and $y = f_P(x)$ for some $x$, then $x = + T(K, y)$. We usually write $T_K(\cdot)$ instead of $T(K, \cdot)$. + \end{itemize} +\end{slide} + +\begin{slide} + \topic{pseudorandom generators (PRGs)} + \head{Pseudorandom generators (PRGs)} + + A pseudorandom generator (PRG) `stretches' an input seed into a longer + string which `looks' random. + + Let $G\colon \{0, 1\}^k \to \{0, 1\}^L$ be a function from $k$-bit strings + to $L$-bit strings. The \emph{advantage} of a distinguisher $D$ against + $G$ is: + \begin{eqnarray*}[rl] + \Adv{prg}{G}(D) = & + \Pr[x \getsr \{0, 1\}^k; y \gets G(x) : D(y) = 1] - {}\\ + & \Pr[y \getsr \{0, 1\}^L : D(y) = 1]. + \end{eqnarray*} + The \emph{insecurity} is simply the maximum advantage: + \[ \InSec{prg}(G; t) = \max_D \Adv{prg}{G}(D) \] + where the maximum is taken over all distinguishers $D$ running in time + $t$. If $\InSec{prg}(G; t) \le \epsilon$ then we also say that $G$ is a + $(t, \epsilon)$-secure PRG\@. +\end{slide} + +\begin{exercise} + We say that a PRG $g\colon \{0, 1\}^k \to \{0, 1\}^L$ is \emph{trivial} if + $k \ge L$. + \begin{enumerate} + \item Show that trivial PRGs exist. + \item Show that if $g$ is nontrivial, then $g$ is also a one-way function, + with + \[ \InSec{owf}(g; t) \le \InSec{prg}(g; t) + 2^{k-L}. \] + \end{enumerate} + \answer% + \begin{parenum} + \item The identity function $I(x) = x$ is a trivial PRG, with + $\InSec{prg}(I, t) = 0$, as is easily seen from the definition. + \item Suppose $A$ inverts $g$. Then consider adversary $B(y)$: \{ $x \gets + A(y)$; \IF $g(x) = y$ \THEN \RETURN $1$; \ELSE \RETURN $0$;~\}. If $y$ + is the output of $g$, then $A$ inverts $y$ with probability + $\Succ{owf}{g}(A)$; if $y$ is random in $\{0, 1\}^L$ then there is a + probability at least $1 - 2^{k-L}$ that $y$ has \emph{no} inverse, + proving the result. Note that \cite{Wagner:2000:PSU} provides a better + security bound than this simplistic analysis. + \end{parenum} +\end{exercise} + +\begin{exercise} + \label{ex:dbl-prg} + Suppose that we have a \emph{length-doubling} PRG $g\colon \{0, 1\}^k \to + \{0, 1\}^{2k}$. Let $g_0(\cdot)$ be the first $k$ bits of $g(x)$ and + $g_1(\cdot)$ be the second $k$ bits. Define the sequence of generators + $g^{(i)}$ (for $i >= 1$) by + \[ g^{(1)}(x) = g(x); \qquad + g^{(i+1)}(x) = g_0(x) \cat g^{(i)}(g_1(x)). \]% + Relate the security of $g^{(i)}$ to that of $g$. + \answer% + Let $A$ be an adversary running in time $t$ and attacking $g^{(i+1)}$. + Firstly, we attack $g$: consider adversary $B(y)$: \{ \PARSE $y$ \AS $y_0, + k\colon y_1$; $z \gets g^{(i)}$; $b \gets A(y_0 \cat z)$; \RETURN $b$;~\}. + Then $\Adv{prg}{g}(B) \ge \Adv{prg}{g^{(i+1)}}(A) + \delta$, so + $\InSec{prg}(g^{(i+1)}; t) \le \InSec{prg}(g; t) + \delta$, where + \begin{eqnarray*}[rl] + \delta = &\Pr[x_0 \gets \{0, 1\}^k; x_1 \gets \{0, 1\}^k; + y \gets g^{(i)}(x) : A(x_0 \cat y) = 1] - \\ + &\Pr[y \getsr \{0, 1\}^{(i+2)k} : A(y) = 1]. + \end{eqnarray*} + We attack $g^{(i)}$ to bound $\delta$: consider adversary $C(y)$: \{ $x_0 + \getsr \{0, 1\}^k$; $b \gets A(x_0 \cat y)$; \RETURN $b$;~\}. Now $\delta + \le \Adv{prg}{g^{(i)}}(C) \le \InSec{prg}(g^{(i)}; t)$. So by induction, + \[ \InSec{prg}(g^{(i)}; t) \le i \cdot \InSec{prg}(g; t). \] +\end{exercise} + +\begin{slide} + \topic{advantage} + \head{Notes about advantage} + + Advantage is a concept used in many definitions of security notions: + \begin{eqnarray*}[rl] + \Adv{}{}(A) = & + \Pr[\text{$A$ returns 1 in setting $a$}] - {} \\ + & \Pr[\text{$A$ returns 1 in setting $b$}]. + \end{eqnarray*} + \begin{enumerate} + \item We have $-1 \le \Adv{}{}(A) \le +1$. + \item Zero means that the adversary couldn't distinguish. + \item Negative advantage means the adversary got them the wrong way + around. There is another adversary which uses the same resources but has + positive advantage. + \item \label{item:adv-guess} If $A$ is attempting to guess some hidden bit + $b^* \inr \{0, 1\}$, we have + \[ \Pr[b \gets A : b = b^*] = \frac{\Adv{}{}(A)}{2} + \frac{1}{2}. \] + \end{enumerate} +\end{slide} + +\begin{proof} + Let $b$ be the bit that $A$ returns, and let $b^*$ be the experiment's + hidden bit. Then + \[ \Adv{}{}(A) = \Pr[b = 1 \mid b^* = 1] - \Pr[b = 1 \mid b^* = 0]. \] + Addressing the above claims in order: + \begin{enumerate} + \item By definition of probability, $0 \le \Pr[b = 1 \mid b^* = 1] \le 1$ + and $0 \le \Pr[b = 1 \mid b^* = 0]$, so their absolute difference can be + at most 1. + \item This is a corollary of \ref{item:adv-guess}. + \item Consider the adversary $\bar{A}$ which runs $A$ and returns the + complement bit $\bar{b} = b \xor 1$. Then + \begin{eqnarray*}[rl] + \Adv{}{}(\bar{A}) + &= \Pr[\bar{b} = 1 \mid b^* = 1] - \Pr[\bar{b} = 1 \mid b^* = 0] \\ + &= \Pr[b = 0 \mid b^* = 1] - \Pr[b = 0 \mid b^* = 0] \\ + &= (1 - \Pr[b = 1 \mid b^* = 1]) - (1 - \Pr[b = 1 \mid b^* = 0]) \\ + &= \Pr[b = 1 \mid b^* = 0] - \Pr[b = 1 \mid b^* = 1] \\ + &= -\Adv{}{}(A). + \end{eqnarray*} + \item Note that $\Pr[b^* = 1] = \Pr[b^* = 0] = \frac{1}{2}$. Then + \begin{eqnarray*}[rl] + \Pr[b = b^*] + &= \Pr[b = 1 \land b^* = 1] + \Pr[b = 0 \land b^* = 0] \\ + &= \frac{1}{2}(\Pr[b = 1 \mid b^* = 1] + \Pr[b = 1 \mid b^* = 0]) \\ + &= \frac{1}{2}(\Pr[b = 1 \mid b^* = 1] + + (1 - \Pr[b = 0 \mid b^* = 0])) \\ + &= \frac{1}{2}(1 + \Pr[b = 1 \mid b^* = 1] - + \Pr[b = 0 \mid b^* = 0]) \\ + &= \frac{\Adv{}{}(A)}{2} + \frac{1}{2}. + \end{eqnarray*} + \end{enumerate} + All present and correct. +\end{proof} + +\begin{slide} + \topic{pseudorandom functions (PRFs)} + \head{Pseudorandom functions (PRFs)} + + A \emph{pseudorandom function family} (PRF) is a collection of functions + $F_K\colon \{0, 1\}^l \to \{0, 1\}^L$, where $K$ is some index, typically + from a set of fixed-size strings $\{0, 1\}^k$. + + We want to say that $F$ is a strong PRF if adversaries find it hard to + distinguish an instance $F_K$ from a function chosen completely at random + with the same `shape'. + + We provide the adversary with an \emph{oracle}, either for a randomly + selected $F_K$, or for completely random function, and ask it to try and + say which it is given. + + We write $\Func{l}{L}$ as the set of \emph{all} functions from $\{0, 1\}^l$ + to $\{0, 1\}^L$. +\end{slide} + +\begin{slide} + \head{Pseudorandomness, 3: PRFs (cont.)} + + We define the advantage of a distinguisher $D$ against the PRF $F$ as + follows: + \begin{eqnarray*}[rl] + \Adv{prf}{F}(D) = & + \Pr[K \getsr \{0, 1\}^k : D^{F_K(\cdot)} = 1] - {}\\ + & \Pr[R \getsr \Func{l}{L} : D^{R(\cdot)} = 1]. + \end{eqnarray*} + The insecurity of the PRF is then measured as + \[ \InSec{prf}(F; t, q) = \max_D \Adv{prf}{F}(D) \] + where the maximum is taken over all distinguishers $D$ which run for time + $t$ and make $q$ oracle queries. As is usual, if $\InSec{prf}(F; t, q) + \le \epsilon$ then we say that $F$ is a $(t, q, \epsilon)$-secure PRF. +\end{slide} + +\begin{slide} + \topic{pseudorandom permutations (PRPs)} + \head{Pseudorandom permutations (PRPs)} + + We define a \emph{pseudorandom permutation family} (PRP) in a similar way + to the PRFs we've already seen. Suppose $F_K\colon \{0, 1\}^L \to \{0, + 1\}^L$ is a family of permutations, indexed by elements of some finite set, + e.g., $\{0, 1\}^k$. Let $\Perm{L}$ be the set of \emph{all} permutations + over the set of $L$-bit strings $\{0, 1\}^L$. + + The advantage of a distinguisher $D$ against the PRP $F$ is + \begin{eqnarray*}[rl] + \Adv{prp}{F}(D) = & + \Pr[K \getsr \{0, 1\}^k : D^{F_K(\cdot)} = 1] - {}\\ + & \Pr[R \getsr \Perm{L} : D^{R(\cdot)} = 1]. + \end{eqnarray*} + + We define $\InSec{prp}(F; t, q) = \max_D \Adv{prp}{F}(D)$ exactly as for + PRFs, and the notion of $(t, q, \epsilon)$-security is the same. +\end{slide} + +\begin{slide} + \head{Pseudorandomness, 5: super PRPs} + + PRPs are bijective. A \emph{super PRP} is a PRP which remains secure when + the distinguisher is allowed to make inverse queries: + \begin{eqnarray*}[rl] + \Adv{sprp}{F}(D) = & + \Pr[D^{F_K(\cdot), F_K^{-1}(\cdot)} = 1 \mid + K \getsr \{0, 1\}^k] - {} \\ + & \Pr[R \getsr \Perm{L} : D^{R(\cdot), R^{-1}(\cdot)} = 1]. + \end{eqnarray*} + Since there are two oracles, we count queries to both when evaluating the + insecurity: + \[ \InSec{sprp}(F; t, q, q') = \max_D \Adv{sprp}{F}(D) \] + where the maximum is taken over all distinguishers $D$ which run for time + $t$, make $q$ queries to the standard oracle, and $q'$ queries to the + inverse oracle. If $\InSec{sprp}(F; t, q, q') \le \epsilon$ then we say + $F$ is a $(t, q, q', \epsilon)$-secure super PRP\@. +\end{slide} + +\begin{exercise} + Note that the key length hasn't appeared anywhere in the definition of + insecurity for a PRP. Derive lower bounds for the insecurity of a PRP with + a $k$-bit key. + \answer% + Let $E\colon \{0, 1\}^k \times \{0, 1\}^L \to \{0, 1\}^L$ be a PRP. Fix + $n$ and $c$. Then consider adversary $S^{E(\cdot)}$: \{ \FOR $i = 0$ \TO + $c - 1$ \DO $y[i] \gets E(i)$; \FOR $K = 0$ \TO $n - 1$ \DO \{ $i \gets 0$; + $\id{good} \gets 1$; \WHILE $i < c \land \id{good} = 1$ \DO \{ \IF $E_K(i) + \ne y[i]$ \THEN $\id{good} \gets 0$;~\} \IF $\id{good} = 1$ \THEN \RETURN + $1$;~\}~\}. Then $\Adv{prp}{E}(S) \ge n(2^{-k} - 2^{-Lc})$. +\end{exercise} + +\begin{slide} + \head{Block ciphers: PRPs are PRFs} + + We model block ciphers as families of PRPs (not super PRPs). Most of the + analysis works best on PRFs, though. We show that a PRP makes a `pretty + good' PRF, as long as it's not over-used. + + Let $F$ be any PRP family. Then + \[ \InSec{prf}(F; t, q) \le + \InSec{prp}(F; t, q) + \frac{q(q - 1)}{2^{L+1}}. \]% + This is a useful result. As long as $q^2$ is small compared to $2^L$ -- + the block size -- then a PRP makes a good PRF. + + The value $2^{L/2}$ is often called the \emph{Birthday bound} (after the + birthday `paradox'). We shall meet it often when we examine modes of + operation. +\end{slide} + +\begin{proof} + Let $F$ be a PRP family, and $K$ a randomly chosen key from the keyspace of + $F$; let $R \inr \Func{L}{L}$ be a random function, and let $P \inr + \Perm{L}$ be a random permutation. + + Let $A$ be any distinguisher running in time $t$, and making $q$ oracle + queries. Then: + \begin{eqnarray*}[rl] + \Adv{prf}{F}(A) + & = \Pr[A^{F_K(\cdot)} = 1] - \Pr[A^{R(\cdot)} = 1] \\ + & = \Pr[A^{F_K(\cdot)} = 1] - \Pr[A^{P(\cdot)} = 1] + + \Pr[A^{P(\cdot)} = 1] - \Pr[A^{R(\cdot)} = 1] \\ + & = \Adv{prp}{F}(A) + \Pr[A^{P(\cdot)} = 1] - \Pr[A^{R(\cdot)} = 1] \\ + & = \Adv{prp}{F}(A) + \Pr[A^{R(\cdot)} = 0] - \Pr[A^{P(\cdot)} = 0]. + \end{eqnarray*} + Now we need to bound the quantity $\Pr[A^{R(\cdot)} = 0] - \Pr[A^{P(\cdot)} + = 0]$. Let $D$ be the event that all of the \emph{distinct} oracle queries + to $R$ yield distinct results. Then + \begin{eqnarray*}[rl] + \Pr[A^{R(\cdot)} = 0] + & = \Pr[A^{R(\cdot)} = 0 \mid D] \Pr[D] + + \Pr[A^{R(\cdot)} = 0 \mid \lnot D] \Pr[\lnot D] \\ + & = \Pr[A^{P(\cdot)} = 0] \Pr[D] + + \Pr[A^{R(\cdot)} = 0 \mid \lnot D] \Pr[\lnot D] \\ + & \le \Pr[A^{P(\cdot)} = 0] + \Pr[\lnot D] \\ + & \le \Pr[A^{P(\cdot)} = 0] + \frac{q(q - 1)}{2^{L+1}}. + \end{eqnarray*} + Substituting this into the above equation yields + \[ \Adv{prf}{F}(A) \le \Adv{prp}{F}(A) + \frac{q(q - 1)}{2^{L+1}}. \] + Select $A$ such that $\Adv{prf}{F}(A) = \InSec{prf}(F; t, q)$. We know + $\Adv{prp}{F}(A) \le \InSec{prp}(F; t, q)$ by definition. The result + follows. +\end{proof} + +\begin{slide} + \topic{hash functions} + \head{Hash functions, 1: properties} + + Hash functions like MD5 and SHA-1 are extremely useful primitives. What + properties do we expect of them? This out to be an extremely difficult + question to answer. + \begin{itemize} + \item One-wayness. We've seen a definition for this already. But it's not + enough. + \item Collision-resistance. This is the property usually claimed as the + requirement for use in digital signature systems. We'll look at this + later. + \item Randomness. What does this mean, when the function is completely + public? A distinguishability criterion is clearly hopeless. + \end{itemize} +\end{slide} + +\begin{slide} + \head{Hash functions, 2: Merkle-Damg\aa{}rd iterated hashing + \cite{Damgaard:1990:DPH, Merkle:1991:FSE}} + + Let $F\colon \{0, 1\}^{k+L} \to \{0, 1\}^k$ be a \emph{compression} + function. Now consider the function $H\colon \{0, 1\}^* \to \{0, 1\}^k$ + which transforms an input string $x$ as follows: + \begin{enumerate} + \item Pad $x$ to a multiple of $L$ bits in some injective way. Divide the + padded message into $L$-bit blocks $x_0$, $x_1$, \ldots, $x_{n-1}$. + \item Fix some $L$-bit constant $I$. Let $I_0 = I$. Define $I_{i+1} = + F(I_i \cat x_i)$ for $0 \le i < n$. + \item The result $H(x) = I_n$. + \end{enumerate} + + Suppose we have two strings $x \ne y$, such that $H(x) = H(y)$; i.e., a + \emph{collision}. Then \emph{either} we can find a collision for $F$ + \emph{or} a string $z$ for which $F(z) = I$. (This is why initialization + vectors for hash functions have such obviously regular forms.) +\end{slide} + +\begin{proof} + Let $x_0, x_1, \ldots, x_{n-1}$ and $x'_0, x'_1, \ldots, x'_{n'-1}$ be + the $l$-bit blocks of two distinct (padded) messages, and without loss + of generality suppose that $n \ge n'$. Let $I_0 = I'_0 = I$, let + $I_{i+1} = F(I_i \cat x_i)$, and $I'_{i+1} = F(I'_i \cat x'_i)$. We + have $I_n = I'_{n'}$. + + We prove the result by induction on $n$. The case $n = 0$ is trivially + true, since there is only one zero-block message. Suppose, then, that the + result is true for $n$-block messages. There are three cases to consider. + Firstly, if $n' = 0$ then $F(I_n \cat x_n) = I$. Secondly, if $I_n \ne + I'_{n'}$ or $x_n \ne x'_{n'}$, then we have a collision, for $F(I_n \cat + x_n) = I_n = I'_{n'} = F(I'_{n'} \cat x'_{n'})$. Finally, if $I_n = + I'_{n'}$ and $x_n = x'_{n'}$ then we remove the final block from both + messages, and because the remaining messages must differ in at least one + block, we can apply the inductive hypothesis on these shorter messages to + complete the proof. +\end{proof} + +\begin{slide} + \head{Hash functions, 2: any-collision resistance} + + The statement usually made about a `good' hash function $h$ is that it + should be `difficult' to find a collision: i.e., two preimages $x \ne y$ + where $H(x) = H(y)$. How do we formalize this? Here's one attempt: + \begin{eqlines*} + \Succ{acr}{H}(A) = \Pr[(x, y) \gets A : x \ne y \land H(x) = H(y)]; \\ + \InSec{acr}(H; t) = \max_A \Succ{acr}{H}(A). + \end{eqlines*} + But this doesn't work. There clearly \emph{exists} an adversary which + already `knows' the a collision for $H$ and just outputs the right answer. + It succeeds very quickly, and with probability 1. So this definition is + impossible to satisfy. +\end{slide} + +\begin{slide} + \head{Hash functions, 3: targetted collision resistance} + + The concept of targetted collision resistance is relatively new, but quite + promising. It replaces a single hash function with a \emph{family} of hash + functions. They're not really keyed, because the indices aren't kept + secret. + + When making a signature, an index $i$ is chosen at random, and the + signature for message $m$ is formed over the pair $(i, H_i(M))$. + + TCR-hash functions are the subject of ongoing research. No practical + designs exist at the moment. +\end{slide} + +\begin{slide} + \head{Hash functions, 4: targetted collision resistance (cont.)} + + Consider the following experiment: + \begin{program} + $\Expt{tcr}{H}(A)$: \+ \\ + $(x, s) \gets A(\cookie{find})$; \\ + $i \getsr \keys H$; \\ + $y \gets A(\cookie{collide}, i, s)$; \\ + \IF $x \ne y \land H_i(x) = H_i(y)$ + \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + The security of a TCR-hash function is measured as: + \[ \InSec{tcr}(H; t) = \max_A \Pr[\Expt{tcr}{H}(A) = 1] \] + where the maximum is taken over all adversaries running in time $t$. We + define $(t, \epsilon)$-security as usual. +\end{slide} + +\begin{slide} + \head{Hash functions, 5: random oracles \cite{Bellare:1993:ROP}} + + In practice, we expect much more than just collision resistance from hash + functions: we expect a certain amount of `random' behaviour. But this is + hard to quantify. + + One approach is to model a hash function as a `random oracle', i.e., an + oracle containing a function chosen at random, used by the construction + under consideration, and made available to the adversary. The idea is that + reductions can `capture' the knowledge of an adversary by examining the + queries it makes to its random oracle. + + Hash functions \emph{aren't} random oracles. But a random oracle proof is + better than nothing. +\end{slide} + +\xcalways\subsection{Standard assumptions}\x + +\begin{slide} + \head{Standard assumptions} + + There are a number of `standard' assumptions that are made about the + difficulty of various problems: + \begin{itemize} + \item IFP, the Integer Factorization Problem; + \item QRP, the Quadratic Residuosity Problem; + \item DLP, the Discrete Logarithm Problem; + \item RSAP, the RSA Problem; and + \item CDH, the Computational Diffie-Hellman problem and its variants + \end{itemize} + \cite{Menezes:1997:HAC} has excellent material on the above. +\end{slide} + +\begin{slide} + \topic{integer factorization} + \head{The Integer Factorization Problem, 1} + + We often assume that large integers of the form $n = p q$, where $p$ and + $q$ are primes of roughly the same size, are `hard' to factor. + Specifically, there is no algorithm which will factor such an $n$ in time + bounded by a polynomial function of $\log n$. + + The difficulty of various other problems, e.g., Quadratic Residuosity, or + RSA, depend on the difficulty of factoring; however, it is not yet known + whether the ability to solve QRP or RSAP can be used to factor. +\end{slide} + +\begin{slide} + \head{The Integer Factorization Problem, 2: square roots} + + The problem of extracting square roots modulo $n = p q$ is provably as hard + as factoring. This is the basis of Rabin's public key encryption and + digital signature schemes. We shall analyse these later. + + Suppose $Q(n, y)$ is an algorithm which returns an $x$ such that $x^2 + \equiv y \pmod{n}$, provided such an $x$ exists. Then we can find a + nontrivial factor of $n$ as follows: + \begin{program} + Algorithm $\id{find-factor}(n)$: \+ \\ + \REPEAT \\ \quad\=\+\kill + $x \getsr \{1, 2, \ldots, n - 1\}$; \\ + $y \gets x^2 \bmod n$; \\ + $x' \gets Q(n, y)$; \\ + $p \gets \gcd(x + x', n)$; \\ + \IF $p \notin \{1, n\}$ \THEN \RETURN $p$; \- \\ + \FOREVER; + \end{program} +\end{slide} + +\begin{proof} + The program attempts to find two square roots of $y$ mod $n$. It's easy to + see that this might lead to factors of $n$. If $x^2 \equiv x'^2 \pmod{n}$ + then $x^2 - x'^2 = k n$ for some constant $k$. Then $(x + x')(x - x')$ is + a factorization of $k n$. It remains to prove the probability bound on $x + + x'$ being a nontrivial factor of $n$. + + Let $n$ be an odd composite. Then, if $x \not\equiv \pm y \pmod{n}$ but + $x^2 \equiv y^2 \pmod{n}$, then $\gcd(x + y, n)$ is a nontrivial factor of + $n$. + + Firstly, we claim that, if $p$ is an odd prime then the congruence $x^2 + \equiv y \pmod{p}$ has precisely two solutions $x$, $x'$ such that $x + \equiv -x' \pmod{p}$. Let $g$ be primitive mod $p$, with $x = g^\alpha$, + $x' = g^\beta$. Then $g^{2 \alpha} \equiv g^{2 \beta} \pmod{p}$, so $2 + \alpha \equiv 2 \beta \pmod{p - 1}$. But $p - 1$ is even, by hypothesis, + so $\alpha \equiv \beta \pmod{(p - 1)/2}$. But $g^{(p-1)/2} \equiv -1 + \pmod{p}$; hence $x \equiv \pm x' \pmod{p}$, proving the claim. + + There must exist odd primes $p$, $q$, such that $p|n$ and $q|n$, and $x + \equiv -y \pmod{p}$ and $x \equiv y \pmod{q}$, for if not, then $x \equiv + \pm y \pmod{n}$ contrary to hypothesis. But then $x + y \equiv 0 + \pmod{p}$, so $p|(x + y)$; but $x + y \equiv 2 x \not\equiv 0 \pmod{q}$, + since $q$ is odd. Hence, $p$ divides $x + y$, but $q$ does not, so $\gcd(x + + y, n)$ is a nontrivial factor of $n$, completing the proof. +\end{proof} + +\begin{slide} + \topic{quadratic residuosity} + \head{The Quadratic Residuosity Problem} + + If there is an $x$ such that $x^2 \equiv y \pmod{n}$ then $y$ is a + \emph{quadratic residue modulo $n$}, and we write $y \in Q_n$; if there is + no such $x$ then $y$ is a \emph{quadratic nonresidue modulo $n$}. + + If $p$ is prime, then we can use the \emph{Legendre symbol} to decide + whether $x$ is a quadratic residue mod $p$: + \[ \jacobi{x}{p} = x^{(p-1)/2} \bmod p = + \begin{cases} + 0 & if $p$ divides $x$ \\ + -1 & if $x$ is a quadratic nonresidue mod $p$ \\ + +1 & if $x$ is a quadratic residue mod $p$ + \end{cases}. \]% + The \emph{Jacobi symbol} (written the same way) is defined for odd~$n$: if + $n = p_1^{a_1} p_2^{a_2} \cdots p_k^{a_k}$ where the $p_i$ are prime, then + \[ \jacobi{x}{n} = + \jacobi{x}{p_1}^{a_1} \jacobi{x}{p_2}^{a_2} \cdots + \jacobi{x}{p_k}^{a_k}. \]% + This can be efficiently computed without knowledge of the factors of $n$ + \cite[Section~2.4.5]{Menezes:1997:HAC}. +\end{slide} + +\begin{slide} + \head{The Quadratic Residuosity Problem (cont.)} + + If $\jacobi{x}{n} = -1$ then $x$ is certainly \emph{not} a quadratic + residue mod $n$; however, if $\jacobi{x}{n} = 1$ then $x$ might be a + quadratic residue or it might not; if not, then we say that $x$ is a + \emph{pseudosquare}. + + If $n = p q$ is a product of two primes and $x \inr (\Z/n\Z)^*$ is chosen + at random, then + \[ \Pr\Bigl[x \in Q_n \Bigm| \jacobi{x}{n} = 1\Bigr] = \frac{1}{2}. \] + + The problem of distinguishing pseudosquares from quadratic residues is + called the Quadratic Residuosity Problem (QRP). It is not known how to + solve this problem without factoring $n$. +\end{slide} + +\begin{slide} + \topic{discrete logarithms} + \head{The Discrete Logarithm Problem} + + The (Integer) Discrete Logarithm Problem asks for the solution $x$ given a + congruence of the form $g^x \equiv y \pmod{n}$. This seems to be about as + difficult as factoring. (The ability to solve discrete logs modulo $n$ is + sufficient to factor $n$. The best known algorithms for IFP and DLP have + the same running time.) + + The problem generalizes to other cyclic groups, e.g., elliptic curves over + finite fields. +\end{slide} + +\begin{slide} + \topic{self-reducibility} + \head{Self-reducibility, 1} + + The problems of square-root extraction, deciding quadratic residuosity, the + RSA problem, and finding discrete logarithms share the property of being + \emph{randomly self-reducible}; i.e., an instance of the problem can be + transformed into many different derived instances \emph{without skewing the + probability distribution of problem instances}, such that a solution to + one of the derived instances yields a solution to the original one. + + This is a good property to have. It implies that `most' problem instances + are as hard as the hardest instances. +\end{slide} + +\begin{slide} + \head{Self-reducibility, 2: the RSA problem \cite{Rivest:1978:MOD}} + + The RSA problem is to compute $e$-th roots modulo $n = p q$, where $e$ is + relatively prime to $n$. Suppose that the algorithm $S(n, e, y)$ returns a + value $x$ such that $x^e \equiv y \pmod{n}$ for `many' choices of $y$, or + the special symbol $\bot$ otherwise. The following probabilistic algorithm + then solves the RSA problem for arbitary $y$: + \begin{program} + Algorithm $\id{solve-rsa}(n, e, y)$: \+ \\ + \REPEAT \\ \quad\=\+\kill + $x' \getsr \{1, 2, \ldots, n - 1\}$; \\ + \IF $\gcd(x', n) = 1$ \THEN \\ \quad\=\+\kill + $y' \gets y x'^e \bmod n$; \\ + $x \gets S(n, e, y')$; \\ + \IF $x \ne \bot$ \THEN \RETURN $x x'^{-1} \bmod n$; \-\- \\ + \FOREVER; + \end{program} +\end{slide} + +\begin{slide} + \topic{the Diffie-Hellman problem} + \head{The Diffie-Hellman problem \cite{Diffie:1976:NDC}} + + Let $G = \langle g \rangle$ be a cyclic group or order $q$. Let $\alpha$ + and $\beta$ be indices, $\alpha, \beta \in \Z/q\Z$. + + The (computational) \emph{Diffie-Hellman} problem is, given $g^\alpha$ and + $g^\beta$, to find $g^{\alpha\beta}$. + + The (computational) Diffie-Hellman \emph{assumption} is that there is no + probabilistic algorithm which solves the computational Diffie-Hellman + problem in time polynomial in $q$. + + Obviously, being able to compute discrete logs in $G$ efficiently would + yield a solution to the Diffie-Hellman problem. But it may be the case + that the Diffie-Hellman problem is easier than the discrete log problem. + + The Diffie-Hellman problem is self-reducible. +\end{slide} + +\begin{slide} + \head{The Decisional Diffie-Hellman assumption \cite{Boneh:1998:DDP}} + + The computational Diffie-Hellman assumption makes a statement only about + algorithms which compute the \emph{entire} answer $g^{\alpha\beta}$. Since + Diffie-Hellman is frequently used for key-exchange, what can we say about + the ability of an adversary to guess any of the bits? + + The Decisional Diffie-Hellman (DDH) assumption asserts that, if you don't + know $\alpha$ or $\beta$, then it's hard to tell $g^{\alpha\beta}$ from a + random element of $G$; that is, that the distributions of the following + experiments are computationally indistinguishable: + \begin{program} + $\alpha \getsr \Z/q\Z;$ \\ + $\beta \getsr \Z/q\Z;$ \\ + \RETURN $(g^\alpha, g^\beta, g^{\alpha\beta})$; + \next + $\alpha \getsr \Z/q\Z;$ \\ + $\beta \getsr \Z/q\Z;$ \\ + $\gamma \getsr \Z/q\Z;$ \\ + \RETURN $(g^\alpha, g^\beta, g^\gamma)$; + \end{program} +\end{slide} + +\begin{slide} + \head{The Decisional Diffie-Hellman assumption (cont.)} + + If $A$ is an algorithm attempting to solve DDH in the group $G$, then we + write its advantage as + \begin{eqnarray*}[rl] + \Adv{ddh}{G}(A) = + & \Pr[\alpha \getsr \Z/q\Z; \beta \getsr \Z/q\Z : + A(g^\alpha, g^\beta, g^{\alpha\beta}) = 1] - {} \\ + & \Pr[\alpha \getsr \Z/q\Z; \beta \getsr \Z/q\Z; + \gamma \getsr \Z/q\Z : + A(g^\alpha, g^\beta, g^\gamma) = 1] + \end{eqnarray*} + and the insecurity of DDH in $G$ as + \[ \InSec{ddh}(G; t) = \max_A \Adv{ddh}{G}(A) \] + with the maximum over all algorithms $A$ running in time $t$. +\end{slide} + +\begin{slide} + \head{The Decisional Diffie-Hellman assumption (cont.)} + + If you can solve the computational Diffie-Hellman problem, you can solve + the decisional version. If you can compute discrete logs, you can solve + both. + + There \emph{are} groups in which the computation problem is (believed to + be) hard, but the decisional problem is easy. In particular, if the group + order $q$ has small factors then the decisional problem isn't hard. +\end{slide} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/chan.tex b/chan.tex new file mode 100644 index 0000000..6e43f21 --- /dev/null +++ b/chan.tex @@ -0,0 +1,13 @@ +\xcalways\section{Secure channels}\x + +%%% * Secure channels: unauthenticated and authenticated links +%%% models; authenticators and simulation; the message transfer +%%% protocol and MT-authenticators; key-exchange, SK-security, +%%% perfect forward secrecy and Diffie-Hellman. + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/enc-ies.tex b/enc-ies.tex new file mode 100644 index 0000000..cd0f5c6 --- /dev/null +++ b/enc-ies.tex @@ -0,0 +1,388 @@ +\xcalways\section{Integrated public-key encryption schemes}\x + +The formulation here is original work by the author. I've tried to +generalize the work by (among others), Shoup, and Abdalla, Bellare and +Rogaway. The final proof is from a Usenet article prompted by David +Hopwood, but based on the DHAES proof by ABR. + +\xcalways\subsection{Introduction and definitions}\x + +\begin{slide} + \topic{motivation} + \head{Motivation for integrated encryption} + + Public-key encryption schemes are very useful, but there are disadvantages: + \begin{itemize} + \item they tend not to be very fast; and + \item they don't tend to be able to encrypt arbitrarily large messages. + \end{itemize} + Symmetric schemes don't have these problems, but key distribution is + harder. It seems sensible to construct `hybrid' schemes which use + public-key techniques for exchanging keys for symmetric encryption schemes. + + Throughout the following, we'll consider our objective to be resistance to + \emph{adaptive chosen-ciphertext} attacks. +\end{slide} + +\begin{slide} + \head{An obvious approach} + + A simple approach would be to generate a random key for some secure (i.e., + IND-CCA) symmetric scheme, encrypt the message under that key, and, encrypt + the key under the recipient's public key (using some IND-CCA2 public-key + scheme). + + This is obviously secure. But the security results for most public-key + schemes are less than encouraging: the reductions, even for OAEP+, are + rather `flabby'. + + Can we do \emph{better} than this na\"{\i}ve approach? +\end{slide} + +\begin{slide} + \topic{key encapsulation} + \head{Key-encapsulation mechanisms \cite{Shoup:2001:PIS}} + + A \emph{key-encapsulation mechanism} (KEM) $\mathcal{K} = (G, X, R)$ is a + triple of algorithms: + \begin{itemize} + \item a probabilistic \emph{key-generation} algorithm~$G$, which takes no + input (or a security parameter for asymptotic types) and returns a pair + $(P, K)$; + \item a probabilistic \emph{exchange} algorithm~$E$, which is given a + as input a public key~$P$ and returns a \emph{public value}~$y$ and a + \emph{hash} $h \in \{0, 1\}^k$; and + \item a deterministic \emph{recovery} algorithm~$R$, which is given as + input a private key~$K$ and a public value~$y$ and returns a hash $h$. + \end{itemize} + We require for correctness that, if $(P, K) \in G$ and $(y, h) \in X_P$, + then $R_K(y) = h$. + + The idea is that the key-encapsulation mechanism invents a key and public + value simultaneously. We'll look at such mechanisms constructed from both + trapdoor one-way functions like RSA, and schemes like Diffie-Hellman. +\end{slide} + +\begin{slide} + \topic{oracle hash decision problems} + \head{Oracle hash decision problems} + + We say that a key-encapsulation mechanisms is secure if the corresponding + \emph{oracle hash decision problem} is hard. Consider this experiment: + \begin{program} + Experiment $\Expt{ohd-$b$}{\mathcal{K}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $h_0 \getsr \{0, 1\}^k$; $(y, h_1) \gets X_P$; \\ + $b' \gets A^{R_K(\cdot)}(P, y, h_b)$; \\ + \RETURN $b'$; + \end{program} + We forbid the adversary from querying its $R_K$ oracle at $y$. We define + advantage and insecurity in the usual way: + \begin{eqnarray*}[rl] + \Adv{ohd}{\mathcal{K}}(A) &= + \Pr[\Expt{ohd-$1$}{\mathcal{K}}(A) = 1] - + \Pr[\Expt{ohd-$0$}{\mathcal{K}}(A) = 1] + \\ + \InSec{ohd}(\mathcal{K}; t, q) &= + \max_A \Adv{ohd}{\mathcal{K}}(A) + \end{eqnarray*} + where the maximum is taken over adversaries $A$ running in time $t$ and + issuing $q$ recovery queries. +\end{slide} + +\xcalways\subsection{Constructions for key-encapsulation mechanisms}\x + +\begin{slide} + \topic{trapdoor one-way permutations} + \head{Constructing a KEM from a trapdoor OWP} + + Why might key-encapsulation mechanisms exist, and how do we construct them? + + In the random oracle model, we can construct a KEM from any trapdoor + one-way permutation. Let $\mathcal{T} = (G, f, T)$ be such a one-way + permutation, and let $H\colon \{0, 1\}^* \to \{0, 1\}^k$ be a public random + oracle. We can construct $\Xid{\mathcal{K}}{OWF}^{\mathcal{T}, H} = (G, + \Xid{X}{OWF}^{\mathcal{T}, H}, \Xid{R}{OWF}^{\mathcal{T}, H})$ as follows: + \begin{program} + Algorithm $\Xid{X}{OWF}^{\mathcal{T}, H(\cdot)}_P$: \+ \\ + $x \getsr \dom f_P$; \\ + $h \gets H(x)$; \\ + \RETURN $(f_P(x), h)$; + \next + Algorithm $\Xid{R}{OWF}^{\mathcal{T}, H(\cdot)}_K(y)$: \+ \\ + $x \gets T_K(y)$; \\ + $h \gets H(x)$; \\ + \RETURN $f_P(x)$; + \end{program} + The security of this scheme is then given by: + \[ \InSec{ohd}(\Xid{\mathcal{K}}{OWF}^{\mathcal{T}, H}; t, q_R, q_H) \le + 2 \cdot \InSec{owf}(\mathcal{T}; t + O(q_H) + O(q_R)). \]% +\end{slide} + +\begin{proof} + Let $A$ be an adversary solving the oracle hash decision problem. Let $y^* + = f_P(x^*)$ be the challenge public value, and let $h^*$ be the challenge + hash. Suppose that $A$ never queries its random oracle $H$ at $x^*$: then + obviously $h = H(x^*)$ is independent of $A$'s view, and $A$ has no + advantage, and its probability of guessing the hidden bit is precisely + $\frac{1}{2}$. + + Suppose that we choose which OHD experiment uniformly at random. Let $S$ + be the event that the adversary wins, i.e., it correctly guesses the hidden + bit $b$. Then + \[ \Pr[S] = + \frac{\Adv{ohd}{\Xid{\mathcal{K}}{OWF}^{\mathcal{T}, H}}(A)}{2} + + \frac{1}{2}. \]% + Let $F$ be the event that $A$ queries $H$ at $x^*$. Then by Shoup's Lemma + (lemma~\ref{lem:shoup}, page~\pageref{lem:shoup}), + \[ \left|\Pr[S] - \frac{1}{2}\right| \le \Pr[F]. \] + + Now consider this adversary $I$, attempting to invert the one-way function. + \begin{program} + Inverter $I(P, y^*)$: \+ \\ + $\Xid{H}{map} \gets \emptyset$; \\ + $h^* \gets \{0, 1\}^k$; $x^* \gets \bot$; \\ + $b \gets A^{\Xid{R}{sim}(\cdot), \Xid{H}{sim}(\cdot)}(P, y^*, h^*)$; \\ + \RETURN $x^*$; \- \\[\smallskipamount] + Oracle $\Xid{R}{sim}(y)$: \+ \\ + \IF $y \in \dom\Xid{H}{map}$ \THEN \RETURN $\Xid{H}{map}(y)$; \\ + $h \gets \{0, 1\}^k$; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{ y \mapsto h \}$; \\ + \RETURN $h$; \- \\[\smallskipamount] + Oracle $\Xid{H}{sim}(x)$: \+ \\ + $y \gets f_P(x)$; \\ + \IF $y = y^*$ \THEN \\ \quad \= \+ \kill + $x^* \gets x$; \\ + \IF $y \notin \dom\Xid{H}{map}$ \THEN \\ \quad \= \+ \kill + $b^* \getsr \{0, 1\}$; \\ + \IF $b^* = 1$ \THEN + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{ y \mapsto h^* \}$ \-\-\\ + \RETURN $\Xid{R}{sim}(y)$; + \end{program} + The simulation of the random and `recovery' oracles is a little subtle, but + from the adversary's point of view perfect. Clearly, then, $I$ inverts + $f_P$ whenever $F$ occurs, and + \[ \Succ{owf}{\mathcal{T}}(I) + \ge \Pr[F] + \ge \frac{1}{2} \Adv{ohd}{\Xid{\mathcal{K}}{OWF}^{\mathcal{T}, H}}(A). \]% + proving the result. +\end{proof} + +\begin{slide} + \topic{Diffie-Hellman} + \head{A KEM based on Diffie-Hellman \cite{Abdalla:2001:DHIES}} + + Let $G = \langle g \rangle$ be a cyclic group, with $q = |G|$. Then we can + construct a KEM based on the difficulty of the Diffie-Hellman problem in + $G$. Let $H\colon G^2 \to \{0, 1\}^k$ be a public random oracle. + + We define $\Xid{\mathcal{K}}{DH}^{G, H} = (\Xid{G}{DH}^{G, H}, + \Xid{X}{DH}^{G, H}, \Xid{R}{DH}^{G, H})$ as follows: + \begin{program} + Algorithm $\Xid{G}{DH}^{G, H(\cdot, \cdot)}$: \+ \\ + $\alpha \getsr \Z/q\Z$; \\ + \RETURN $(g^\alpha, \alpha)$; + \next + Algorithm $\Xid{X}{DH}^{G, H(\cdot, \cdot)}_a$: \+ \\ + $\beta \getsr \Z/q\Z$; \\ + $h \gets H(g^\beta, a^\beta)$; \\ + \RETURN $(g^\beta, h)$; + \next + Algorithm $\Xid{R}{DH}^{G, H(\cdot, \cdot)}_\alpha(b)$: \+ \\ + $h \gets H(b, b^\alpha)$; \\ + \RETURN $h$; + \end{program} + + But the question of this scheme's security is tricky. It doesn't seem to + fit any of our standard problems. We therefore have to invent a new one! +\end{slide} + +\begin{slide} + \head{The Gap Diffie-Hellman problem} + + The `Gap Diffie-Hellman' problem is essentially the Computational problem, + but given an oracle which can answer the Decisional problem. + + Consider this experiment. + \begin{program} + Experiment $\Expt{gdh}{G}(A)$: \+ \\ + $\alpha \getsr \Z/q\Z$; $\beta \getsr \Z/q\Z$; \\ + $c \gets A^{C(\cdot, \cdot)}(g^\alpha, g^\beta)$; \\ + \IF $c = g^{\alpha\beta}$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \next + Oracle $C(x, y)$: \+ \\ + \IF $y = x^\alpha$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + We define the success probability of an adversary $A$ as + \[ \Succ{gdh}{G}(A) = \Pr[\Expt{gdh}{G}(A) = 1] \] + and the insecurity, against algorithms running in time $t$ and making $q_C$ + oracle queries, is + \[ \InSec{gdh}(G; t, q_C) = \max_A \Succ{gdh}{G}(A). \] +\end{slide} + +\begin{slide} + \head{Security of the Diffie-Hellman KEM} + + Now that we have our new problem, the security result is relatively easy. + \[ \InSec{ohd}(\Xid{\mathcal{K}}{DH}^{G, H}; t, q_R, q_H) + \le 2\cdot \InSec{gdh}(G; t + O(q_R + q_H), q_H). \]% + + Note that it's very important that the random oracle is given \emph{both} + the public value $b$ and the shared secret $b^\alpha$! Otherwise, the + scheme is \emph{malleable} in a group of composite order. For example, + suppose that $q = 2r$ for some $r$; then if $\alpha$ is even, we have + $(b\cdot g^r)^\alpha = b^\alpha$. Passing $b$ to the oracle ensures that + these queries given independent answers, even if the shared secret is the + same. +\end{slide} + +\begin{proof} + Suppose that $A$ solves the oracle hash decision problem for + $\Xid{\mathcal{K}}{DH}^{G, H}$. Let the input to~$A$ be the triple $(a, b, + h^*)$, where $a = g^\alpha$ and $b = g^\beta$; and let $h^* = + H(g^{\alpha\beta})$. $A$ must decide whether $h = h^*$. Clearly, if $A$ + never queries $H$ at $(g^\beta, g^{\alpha\beta})$ then its advantage is + zero, since it has no information about $h^*$. + + As in the one-way function case, we have + \[ \Pr[F] \ge \frac{1}{2} \Adv{ohd}{\Xid{\mathcal{K}}{DH}^{G, H}}(A) \] + where $F$ is the event that $A$ queries $H$ at $(g^\beta, + g^{\alpha\beta})$. + + We present an algorithm~$B$ which can solve an instance of the Gap + Diffie-Hellman problem in~$G$ whenever event $F$ occurs. + \begin{program} + Algorithm $B^{C(\cdot, \cdot)}(a^*, b^*)$: \+ \\ + $\Xid{R}{map} \gets \emptyset$; $\Xid{H}{map} \gets \emptyset$; \\ + $h^* \gets \{0, 1\}^k$; \\ + $c^* \gets \bot$; \\ + $b \gets A^{\Xid{R}{sim}(\cdot), \Xid{H}{sim}(\cdot)} + (a^*, b^*, h^*)$; \\ + \RETURN $c^*$; \- \\[\smallskipamount] + Oracle $\Xid{R}{sim}(b)$: \+ \\ + \IF $b \in \dom\Xid{R}{map}$ \\ + \THEN \RETURN $\Xid{R}{map}(b)$; \\ + $h \gets \{0, 1\}^k$; \\ + $\Xid{R}{map} \gets \Xid{R}{map} \cup \{ b \mapsto h \}$; \\ + \RETURN $h$; + \next + Oracle $\Xid{H}{sim}(b, c)$: \+ \\ + \IF $C(b, c) = 0$ \THEN \\ \quad \= \+ \kill + \IF $(b, c) \in \dom\Xid{H}{map}$ \THEN + \RETURN $\Xid{H}{map}(b, c)$; \\ + $h \gets \{0, 1\}^k$; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{ (b, c) \mapsto h \}$; \\ + \RETURN $h$; \- \\ + \IF $b = b^*$ \THEN \\ \quad \= \+ \kill + $c^* \gets c$; \\ + \IF $c \notin \dom\Xid{R}{map}$ \THEN \\ \quad \= \+ \kill + $b^* \getsr \{0, 1\}$; \\ + \IF $b^* = 1$ \THEN + $\Xid{R}{map} \gets \Xid{R}{map} \cup \{ y \mapsto h^* \}$ \-\-\\ + \RETURN $\Xid{R}{sim}(c)$; \- \\ + \end{program} + Hence, we have + \[ \Succ{gdh}{G}(A) + \ge \frac{1}{2} \Adv{ohd}{\Xid{\mathcal{K}}{DH}^{G, H}}(A) \]% + as required. +\end{proof} + +\xcalways\subsection{An integrated encryption scheme}\x + +\begin{slide} + \head{An integrated encryption scheme} + + Let $\mathcal{K} = (G, X, R)$ be a key-encapsulation mechanism, and let + $\mathcal{E} = (E, D)$ be a symmetric encryption scheme. We define the + \emph{integrated encryption scheme} $\Xid{\mathcal{E}}{IES}^{\mathcal{K}, + \mathcal{E}} = (\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}, + \Xid{E}{IES}^{\mathcal{K}, \mathcal{E}}, \Xid{D}{IES}^{\mathcal{K}, + \mathcal{E}})$ as follows: + \begin{program} + Algorithm $\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}$: \+ \\ + $(P, K) \gets G$; \\ + \RETURN (P, K); + \next + Algorithm $\Xid{E}{IES}^{\mathcal{K}, \mathcal{E}}_P(x)$: \+ \\ + $(y_0, h) \gets X_P$; \\ + $y_1 \gets E_h(x)$; \\ + \RETURN $(y_0, y_1)$; + \next + Algorithm $\Xid{D}{IES}^{\mathcal{K}, \mathcal{E}}_K(y_0, y_1)$: \+ \\ + $h \gets R_K(y_0)$; \\ + $x \gets D_h(y_1)$; \\ + \RETURN $x$; + \end{program} + The security of this scheme relates tightly to that of the individual + components: + \begin{eqnarray*}[Ll] + \InSec{ind-cca2}(\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}; t, q_D) \\ + & \le + 2 \cdot \InSec{ohd}(\mathcal{K}; t + O(q_D), q_D) + + \InSec{ftg-cca}(\mathcal{E}; t + O(q_D), 0, q_D). + \end{eqnarray*} + Note how weak the security requirements on the encryption scheme are: no + chosen-plaintext queries are permitted! +\end{slide} + +\begin{proof} + Suppose $A$ attacks $\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}$ in the + IND-CCA2 sense. We construct an adversary $B$ which attacks the KEM. + \begin{program} + Adversary $B^{R(\cdot)}(P, y, h)$: \+ \\ + $(x_0, x_1, s) \gets A^{\Xid{D}{sim}(\cdot)}(\cookie{find}, P)$; \\ + $b \gets \{0, 1\}$; \\ + $z \gets E_h(x_b)$; \\ + $b' \gets A^{\Xid{D}{sim}(\cdot)}(\cookie{guess}, (y, z), s)$; \\ + \IF $b = b'$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \next + Oracle $\Xid{D}{sim}(y_0, y_1)$: \+ \\ + \IF $y_0 = y$ \THEN \RETURN $D_h(y_1)$; \\ + $h' \gets R(y_0)$; \\ + \RETURN $D_{h'}(y_1)$; + \end{program} + If the $h$-value matches the public value $y$ then $B$ provides a correct + simulation of $A$'s attack game, and hence wins with probability + \[ \frac{\Adv{ind-cca2}{\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}}}{2} + + \frac{1}{2}. \]% + We construct a new adversary $C$, attacking $\mathcal{E}$ in the FTG-CCA + sense, to help us bound $B$'s probability of success when $h$ is chosen + randomly. + \begin{program} + Adversary $C^{E(\cdot), D(\cdot)}(\cookie{find})$: \+ \\ + $(P, K) \gets G$; \\ + $y' \gets \bot$; \\ + $(x_0, x_1, s_A) \gets A^{\Xid{D}{sim}(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, (P, K, s_A))$; + \next + Adversary $C^{E(\cdot), D(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $(P, K, s_A) \gets s$; \\ + $(h', y') \gets X_P$; \\ + $b <- A^{\Xid{D}{sim}(\cdot)}(\cookie{guess}, (y', y), s_A)$; \\ + \RETURN $b$; + \newline + Oracle $\Xid{D}{sim}(y_0, y_1)$: \+ \\ + \IF $y_0 = y'$ \THEN \RETURN $D(y_1)$; \\ + $h \gets R_K(y_0)$; \\ + \RETURN $D_h(y_1)$; + \end{program} + Clearly, $C$ provides the same environment as $B$ does when $h$ is random; + hence, in this case, $B$ wins with probability at most + \[ \frac{\Adv{ftg-cca2}{\mathcal{E}}(C)}{2} + \frac{1}{2}. \] + But + \[ \Adv{ohd}{\mathcal{K}}(B) = + \frac{1}{2}\left( + \Adv{ind-cca2}{\Xid{G}{IES}^{\mathcal{K}, \mathcal{E}}} + + \Adv{ftg-cca2}{\mathcal{E}}(C) \right). \]% + Rearranging yields the required result. +\end{proof} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/enc-intro.tex b/enc-intro.tex new file mode 100644 index 0000000..6199d4f --- /dev/null +++ b/enc-intro.tex @@ -0,0 +1,103 @@ +\xcalways\section{Introduction to Encryption}\x + +\xcalways\subsection{Security notions and attacks}\x + +%%% * Security notions and attacks: semantic security and find-then- +%%% guess indistinguishability; left-or-right and real-or-random +%%% indistinguishability; chosen plaintext and chosen ciphertext +%%% (lunchtime and adaptive) attacks; non-malleability; plaintext +%%% awareness; funny abbreviations (e.g., IND-CPA, NM-CCA2). + +\begin{slide} + \head{Security notions for encryption} + + What does it mean to say that an encryption scheme is secure? +\end{slide} + +\begin{slide} + \topic{adversarial goals} + \head{Encryption: adversarial goals 1} + + \begin{description} + \item [Indistinguishability (find-then-guess)] The adversary chooses two + plaintexts. One is selected at random, and the ciphertext is returned. + The adversary cannot guess which plaintext was chosen with probability + significantly better than $\frac{1}{2}$. + \item [Semantic security] An adversary given a ciphertext cannot compute + anything about the plaintext that it couldn't compute given only its + length. + \end{description} +\end{slide} + +\begin{slide} + \head{Encryption: adversarial goals 2} + + \begin{description} + \item [Indistinguishability (left-or-right)] The adversary is given an + oracle which accepts two plaintexts. Before the game begins, a decision + is taken as to whether the oracle returns the result of encrypting the + `left' plaintext, or the `right' one. The adversary cannot guess which + with probability significantly better than $\frac{1}{2}$. + \item [Indistinguishability (real-or-random)] The adversary is given an + oracle. Before the game begins, a decision is taken as to whether the + oracle correctly encrypts the plaintexts it is given (`real') or whether + it returns a ciphertext for a randomly chosen plaintext of the same + length (`random'). The adversary cannot guess which with probability + significantly better than $\frac{1}{2}$. + \end{description} +\end{slide} + +\begin{slide} + \head{Encryption: adversarial goals 3} + + \begin{description} + \item [Non-malleability] An adversary cannot transform a ciphertext such + that the plaintexts of the two ciphertexts are related, with better than + negligible probability. + \item [Plaintext awareness] An adversary cannot create a ciphertext without + `knowing' (or easily being able to find out) the corresponding plaintext + (or knowing that the ciphertext is invalid), except with negligible + probability. + \end{description} +\end{slide} + +\begin{slide} + \topic{types of attacks} + \head{Encryption: types of attacks} + + \begin{description} + \item [Chosen plaintext] The adversary may encrypt plaintexts of its + choice. In the asymmetric setting, it is given a public key; in the + symmetric setting, it is provided with an encryption oracle. + \item [Chosen ciphertext (lunchtime)] (Find-then-guess, semantic security + and non-malleability) As with chosen plaintext, but the adversary is + given an oracle which can decrypt ciphertexts during its first stage. + \item [Adaptive chosen ciphertexts] As with standard chosen ciphertexts, + except that the adversary is given the decryption oracle for its entire + run. The adversary is forbidden from using the oracle to decrypt + ciphertexts which it is required to distinguish. + \end{description} +\end{slide} + +\begin{slide} + \topic{funny abbreviations} + \head{Funny abbreviations} + + The attack goals are given abbreviations: IND, NM, PA for + indistinguishability, non-malleability and plaintext awareness. + + The attack types are given abbreviations too: CPA, CCA1, CCA2 for chosen + plaintext, chosen ciphertext and adaptive chosen ciphertext. + + Hence, IND-CPA means `indistinguishable under chosen plaintext attack', + NM-CCA2 means `non-malleable under chosen ciphertext attack'. + + PA stands on its own (but there are two different meanings). +\end{slide} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/enc-pub.tex b/enc-pub.tex new file mode 100644 index 0000000..985690c --- /dev/null +++ b/enc-pub.tex @@ -0,0 +1,1576 @@ +\xcalways\section{Public-key encryption}\x + +\xcalways\subsection{Syntax}\x + +\begin{slide} + \head{Syntax of public-key encryption schemes} + + A public-key encryption scheme $\mathcal{E} = (G, E, D)$ is a triple of + algorithms: + \begin{itemize} + \item A probabilistic \emph{key-generation} algorithm $G(k)$ which returns + a matching public/private key pair $(P, K)$. + \item A probabilistic \emph{encryption} algorithm $E(P, m)$ which returns a + ciphertext $c$. We write $E_P(m)$ for $E(P, m)$. + \item A deterministic \emph{decryption} algorithm $D(K, c)$. If $(P, K) + \in G(k)$ and $c \in E(P, m)$ then $m = D(K, c)$. We write $D_K(c)$ for + $D(K, c)$. + \end{itemize} + On those occasions it matters, we write $\mathcal{P} = \dom E_P$ as the + \emph{plaintext space}, and $\mathcal{C} = \dom D_K$ as the + \emph{ciphertext space}. Hence, $E_P\colon \mathcal{P} \to \mathcal{C}$ + and $D_K\colon \mathcal{C} \to \mathcal{P} \cup \{ \bot \}$ (allowing an + `invalid ciphertext' return). +\end{slide} + +\xcalways\subsection{Semantic security and indistinguishability}\x + +\begin{slide} + \head{Notation for oracles} + + We'll use the following decryption oracles throughout, to abbreviate the + properties of the various attacks: + \begin{tabular}[C]{l Mc Mc } + \hlx*{hv} + Attack & D_0(c) & D_1(c) \\ \hlx{vhv} + CPA & \bot & \bot \\ + CCA1 & D_K(c) & \bot \\ + CCA2 & D_K(c) & D_K(c) \\ \hlx*{vh} + \end{tabular} + In all cases, we forbid the adversary from querying a decryption oracle on + a challenge ciphertext, i.e., one returned to it by the experiment. +\end{slide} + +\begin{slide} + \topic{semantic security} + \head{Semantic security} + + Semantic security for $\mathcal{E} = (G, E, D)$, against an adversary + $A$ and attack $\id{atk} \in \{\text{cpa}, \text{cca1}, \text{cca2}\}$ + is measured using the following game \cite{Bellare:2000:CST}: + \begin{program} + Experiment $\Expt{sem-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $(\mathcal{M}, s) \gets A^{D_0(\cdot)}(\cookie{select}, P)$; \\ + $x_0 \getsr \mathcal{M}$; $x_1 \getsr \mathcal{M}$; \\ + $y \gets E_P(x_1)$; \\ + $(f, \alpha) \gets A^{D_1(\cdot)}(\cookie{predict}, y, s)$; \\ + \IF $f(x_b) = \alpha$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + Here, $\mathcal{M}\colon \mathcal{P} \to [0, 1]$ is a \emph{distribution} + over the plaintext space, and $f\colon \mathcal{P} \to \ran f$ is a + \emph{function} on plaintexts, with $\alpha \in \ran f$. +\end{slide} + +\begin{slide} + \head{Semantic security (cont.)} + + We include the time required to sample the distribution $\mathcal{M}$ and + to compute the function $f$ in the adversary's running time. + + We require that $\mathcal{M}$ is \emph{valid}: i.e., that all messages in + $\supp \mathcal{M}$ have the same length. +\end{slide} + +\begin{slide} + \topic{indistinguishability} + \head{Indistinguishability} + + Indistinguishability for $\mathcal{E} = (G, E, D)$, against an adversary + $A$ and attack $\id{atk} \in \{\text{cpa}, \text{cca1}, \text{cca2}\}$ + is measured using the following game: + \begin{program} + Experiment $\Expt{ind-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $(x_0, x_1, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; \\ + \IF $|x_0| \ne |x_1|$ \THEN \RETURN $0$; \\ + $y \gets E_P(x_b)$; \\ + $b' \gets A^{D_1(\cdot)}(\cookie{guess}, y, s)$; \\ + \RETURN $b'$; + \end{program} + In the first stage, the adversary has to choose two plaintexts. One is + then chosen by the experimenter, encrypted, and the corresponding + ciphertext given to the adversary. The adversary must decide which + plaintext was encrypted. +\end{slide} + +\begin{slide} + \topic{advantage and insecurity} + \head{Advantage and insecurity} + + For a public-key encryption scheme $\mathcal{E}$, under attack $\id{atk} + \in \{\text{cpa}, \text{cca1}, \text{cca2}\}$ by an adversary $A$, we + define $A$'s advantage by: + \begin{eqnarray*}[rl] + \Adv{ind-\id{atk}}{\mathcal{E}}(A) &= + \Pr[\Expt{ind-\id{atk}-$1$}{\mathcal{E}}(A) = 1] - + \Pr[\Expt{ind-\id{atk}-$0$}{\mathcal{E}}(A) = 1]; + \\ + \Adv{sem-\id{atk}}{\mathcal{E}}(A) &= + \Pr[\Expt{sem-\id{atk}-$1$}{\mathcal{E}}(A) = 1] - + \Pr[\Expt{sem-\id{atk}-$0$}{\mathcal{E}}(A) = 1]. + \end{eqnarray*} + We define insecurities for $\id{goal} \in \{\text{ind}, \text{sem}\}$ under + chosen plaintext attacks, and chosen ciphertext attacks $\id{cca} \in + \{\text{cca1}, \text{cca2}\}$ by: + \begin{eqnarray*} + \InSec{\id{goal}-cpa}(\mathcal{E}; t) &= + \max_A \Adv{\id{goal}-cpa}{\mathcal{E}}(A); + \\ + \InSec{\id{goal}-\id{cca}}(\mathcal{E}; t, q_D) &= + \max_A \Adv{\id{goal}-\id{cca}}{\mathcal{E}}(A). + \end{eqnarray*} + where the maxima are taken over adversaries $A$ which run in time $t$ and + issue $q_E$ encryption and $q_D$ decryption queries. +\end{slide} + +\begin{slide} + \topic{good news} + \head{Some good news} + + Now there's some good news: \emph{semantic security and (find-then-guess) + indistinguishability are almost equivalent}. + + More precisely, if we fix a collection of resource constraints $R$, we have + \[ \InSec{sem-\id{atk}}(\mathcal{E}; R) \le + \InSec{ind-\id{atk}}(\mathcal{E}; R) \le + 2 \cdot \InSec{sem-\id{atk}}(\mathcal{E}; R). \]% + It's useful to realise that a relatively strong notion like semantic + security is actually equivalent to a simpler notion like + indistinguishability. The latter is certainly easier to work with in + proofs of security. +\end{slide} + +\begin{proof} + \label{pf:pub-ind-eq-sem} + Proving that $\text{IND-\id{atk}} \implies \text{SEM-\id{atk}}$ is + pretty easy, so we do that first. Suppose that $A'$ attacks $\mathcal{E}$ + in the semantic security sense. Consider the find-then-guess adversary + $A$: + \begin{program} + Adversary $A^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(\mathcal{M}, s') \gets A'^{D(\cdot)}(\cookie{select}, P)$; \\ + $x_0 \getsr \mathcal{M}$; $x_1 \getsr \mathcal{M}$; \\ + \RETURN $(x_0, x_1, (x_1, s'))$; + \next + Adversary $A^{D(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $(x_1, s') \gets s$; \\ + $(f, \alpha) \gets A'^{D(\cdot)}(\cookie{predict}, y, s')$; \\ + \IF $f(x_1) = \alpha$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + Here, $A$ is simulating the semantic security experiment itself, drawing + plaintexts from $A'$'s distribution $\mathcal{M}$ and evaluating its chosen + function. + + We study the result of the experiment + $\Expt{ind-\id{atk}-$b$}{\mathcal{E}}(A)$. Firstly, suppose $b = 1$. + Then $y \in E_P(x_1)$, and $A$ succeeds with the same probability that $A'$ + wins in $\Expt{sem-\id{atk}-$1$}{\mathcal{E}}(A')$. Secondly, suppose $b + = 0$. Then $y \in E_P(x_0)$, but we still compare $A'$'s answer against + $x_1$, as in $\Expt{sem-\id{atk}-$0$}{\mathcal{E}}(A')$, up to + relabelling of $x_0$ and $x_1$. Hence, + \[ \Adv{ind-\id{atk}}{\mathcal{E}}(A) = + \Adv{sem-\id{atk}}{\mathcal{E}}(A'). \]% + + Now we show that $\text{SEM-\id{atk}} \implies \text{IND-\id{atk}}$. + Suppose that $A$ attacks $\mathcal{E}$ in the indistinguishability sense. + Then consider the adversary $A'$ attacking its semantic security: + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{select}, P)$: \+ \\ + $(x'_0, x'_1, s) \gets A^{D(\cdot)}(\cookie{find}, P)$; \\ + $\mathcal{M} \gets + \{ x'_0 \mapsto \frac{1}{2}, x'_1 \mapsto \frac{1}{2} \}$; \\ + \RETURN $(x'_0, x'_1, s')$; + \next + Adversary $A'^{D(\cdot)}(\cookie{predict}, y, s')$: \+ \\ + $(x'_0, x'_1, s) \gets s'$; \\ + $b \gets A^{D(\cdot)}(\cookie{guess}, y, s)$; \\ + \RETURN $(\lambda x.x, x'_b)$; + \end{program} + Here $A'$ is simply running the indistinguishability experiment. In the + $\cookie{select}$ stage, it runs $A$'s $\cookie{find}$ stage, and returns + the uniform distribution over $A$'s two chosen plaintexts. In the + $\cookie{predict}$ stage, $A'$ collects $A$'s $\cookie{guess}$ and returns + the identity function $\lambda x.x$ and the guessed plaintext. + + In the case of experiment $\Expt{sem-\id{atk}-$1$}{\mathcal{E}}(A')$, the + rules are fair, and we win with probability + \[ p = \frac{1}{2} + \frac{\Adv{ind-\id{atk}}{\mathcal{E}}(A)}{2}. \] + In the case of $\Expt{sem-\id{atk}-$0$}{\mathcal{E}}(A')$, however, we + \emph{lose} in the event that $x_0 = x'_b$. This happens if \emph{either} + $x_0 = x_1$ and we guess correctly (probability $p/2$), \emph{or} if $x_0 + \ne x_1$ and we guess incorrectly (probability $(1 - p)/2$). Hence, we see + that have + \[ \Adv{sem-\id{atk}}{\mathcal{E}}(A') = + \frac{1}{2} \Adv{ind-\id{atk}}{\mathcal{E}}(A) \]% + completing the proof. +\end{proof} + +\xcalways\subsection{Non-malleability}\x + +\begin{slide} + \topic{definition} + \head{Non-malleability} + + The intuition behind non-malleability is that an adversary can't easily + take some ciphertext and turn it into some other ciphertext such that the + plaintexts have a simple relationship to each other. + + Here's a relatively standard definition of non-malleability, from + \cite{Bellare:1998:RAN, Bellare:1999:NEE}: + \begin{program} + Experiment $\Expt{cnm-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $(\mathcal{M}, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; + $x_0 \getsr \mathcal{M}$; $x_1 \getsr \mathcal{M}$; + $y \gets E_P(x_1)$; \\ + $(R, \vect{y}) \gets A^{D_1(\cdot)}(\cookie{guess}, y, s)$; + $\vect{x} \gets D_K(\vect{y})$; \\ + \IF $y \notin \vect{y} \land + R(x_b, \vect{x})$ \THEN \RETURN $1$; + \ELSE \RETURN $0$; + \end{program} + In the above, $\mathcal{M}$ is a valid distribution on plaintexts, and $R$ + is an $n$-ary relation on plaintexts. The adversary's advantage is + \[ \Adv{cnm-\id{atk}}{\mathcal{E}}(A) = + \Pr[\Expt{cnm-\id{atk}-$1$}{\mathcal{E}}(A) = 1] - + \Pr[\Expt{cnm-\id{atk}-$0$}{\mathcal{E}}(A) = 1]. \]% +\end{slide} + +\begin{slide} + \topic{more good news} + \head{Non-malleability (more good news)} + + The previous definition involved all sorts of nasties like distributions + and relations. Fortunately, there's an approximately equivalent notion, + based on indistinguishability with a single \emph{parallel} + chosen-ciphertext query: + \begin{program} + Experiment $\Expt{nm-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $(P, K) \gets G$; \\ + $(x_0, x_1, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; + $y \gets E_P(x_b)$; \\ + $(\vect{y}, t) \gets A^{D_1(\cdot)}(\cookie{choose}, y, s)$; + \IF $y \in \vect{y}$ \THEN \RETURN $0$; \\ + $\vect{x} \gets D_K(\vect{y})$; + $b \gets A^{D_1(\cdot)}(\cookie{guess}, \vect{x}, t)$; + \RETURN $b$; + \end{program} + We define advantage by + \[ \Adv{nm-\id{atk}}{\mathcal{E}}(A) = + \Pr[\Expt{nm-\id{atk}-$1$}{\mathcal{E}}(A) = 1] - + \Pr[\Expt{nm-\id{atk}-$0$}{\mathcal{E}}(A) = 1]. \]% +\end{slide} + +\begin{proof} + Firstly, $\text{NM} \implies \text{CNM}$. Suppose $A'$ attacks + $\mathcal{E}$ in the CNM sense. Then we construct $A$ in the obvious way: + \begin{program} + Adversary $A^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(\mathcal{M}, s') \gets A'^{D(\cdot)}(\cookie{find}, P)$; \\ + $x_0 \getsr \mathcal{M}$; $x_1 \getsr \mathcal{M}$; \\ + \RETURN $(x_0, x_1, (x_1, s'))$; + \next + Adversary $A^{D(\cdot)}(\cookie{choose}, y, s)$: \+ \\ + $(x_1, s') \gets s$; \\ + $(R, \vect{y}) \gets A'^{D(\cdot)}(\cookie{guess}, y, s')$; \\ + \RETURN $(\vect{y}, (x_1, R))$; + \next + Adversary $A^{D(\cdot)}(\cookie{guess}, \vect{x}, s)$: \+ \\ + $(x_1, R) \gets s$; \\ + \IF $R(x_1, \vect{x})$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + Studying the behaviour of $A$, we see that it succeeds precisely when $A'$ + succeeds. Hence + \[ \Adv{nm-\id{atk}}{\mathcal{E}}(A) = + \Adv{cnm-\id{atk}}{\mathcal{E}}(A'). \]% + + Secondly, suppose that $A$ attacks $\mathcal{E}$ in the NM sense. Then we + construct $A'$ in a somewhat tricky manner: $A'$ aims to run the final + stage of $A$ from its relation $R$. + + We can suppose, without loss of generality, that $A$ doesn't require its + chosen ciphertext oracle $D$ during its final $\cookie{guess}$ phase. If + it is allowed an adaptive chosen ciphertext oracle, it can make its + parallel query through $D$ (admittedly at the cost of $|\vect{y}|$ + additional decryption queries). Hence, we don't need to provide + $A(\cookie{guess})$ with a decryption oracle. + + The relation isn't allowed to be probabilistic. Hence, we choose the coins + $\rho \in \{0, 1\}^n$ that $A(\cookie{guess})$ requires in advance. Since + $A$'s running time is bounded, $n$ must also be bounded. We encode the + values $(x'_0, x'_1, t, \rho)$ into the description of $R$ output by + $A'(\cookie{guess})$. + + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x'_0, x'_1, s) \gets A^{D(\cdot)}(\cookie{find}, P)$; \\ + $\mathcal{M} \gets + \{ x'_0 \mapsto \frac{1}{2}, x'_1 \mapsto \frac{1}{2} \}$; \\ + \RETURN $(\mathcal{M}, (x'_0, x'_1, P, s))$; + \next + Adversary $A'^{D(\cdot)}(\cookie{guess}, y, s')$: \+ \\ + $(x'_0, x'_1, P, s) \gets s'$; \\ + $(\vect{y}, t) \gets A^{D(\cdot)}(\cookie{choose}, y, s)$; \\ + $\rho \getsr \{0, 1\}^n$; \\ + \RETURN $(R, \vect{y})$; \- \\[\smallskipamount] + Relation $R(x, \vect{x})$: \+ \\ + $b' \gets A(\cookie{guess}, \vect{x}, t)$ (with coins $\rho$); \\ + \IF $x = x'_{b'}$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + + The analysis of $A'$'s success probability is very similar to the proof + that $\text{SEM} \implies \text{IND}$ above. When the CNM experiment's + hidden bit $b = 1$, $A'$ succeeds whenever $A$ correctly guesses which + plaintext was encrypted, which occurs with probability + \[ p = \frac{1}{2} + \frac{\Adv{nm-\id{atk}}{\mathcal{E}}(A)}{2}. \] + When $b = 0$, $A'$ fails when $A$ guesses correctly and $x_0 = x_1$ + (probability $p/2$), or when $A$ guesses wrongly and $x_0 \ne x_1$ + (probability $(1 - p)/2$). Hence, finally, + \[ \Adv{cnm-\id{atk}}{\mathcal{E}}(A) = + \frac{\Adv{nm-\id{atk}}{\mathcal{E}}(A)}{2}. \]% + + For any arbitrary resource bounds $R$, we have + \[ \InSec{cnm-\id{atk}}(\mathcal{E}; R) \le + \InSec{nm-\id{atk}}(\mathcal{E}; R) \le + 2 \cdot \InSec{cnm-\id{atk}}(\mathcal{E}; R'), \]% + where $R'$ is related to $R$, but may need to allow additional decryption + queries, to cope with NM-CCA2 adversaries which use decryption queries in + their final $\cookie{guess}$ stages. + + We conclude that the two notions are almost equivalent, as claimed. +\end{proof} + +\xcalways\subsection{Relations between security notions}\x + +\begin{slide} + \head{Relations between security notions \cite{Bellare:1998:RAN}} + + %% 3 3 + %% NM-CPA <--------- NM-CCA1 <--------- NM-CCA2 + %% | \__ | \__ ^ ^ + %% | \__ 6 | \__ 7 | | + %% 1| \/_ |1 \/_ 4| |1 + %% | 5/ \__ | / \_ | | + %% v __\ v __\ v | + %% IND-CPA <-------- IND-CCA1 <------- IND-CCA2 + %% 2 2 + + \[ \xymatrix @=2cm { + \txt{NM-CPA} \ar[d]_1 + \POS !<1ex, 0pt> \ar[dr]!<1ex, 0pt>|-@{/}^6 & + \txt{NM-CCA1} \ar[d]^1 \ar[l]_3 \ar[dr]|-@{/}^7 & + \txt{NM-CCA2} \ar[l]_3 \ar@<0.5ex>[d]^1 \\ + \txt{IND-CPA} & + \txt{IND-CCA1} \ar[l]^2 + \POS !<-1ex, 0pt> \ar[ul]!<-1ex, 0pt>|-@{/}^5 & + \txt{IND-CCA2} \ar[l]^2 \ar@<0.5ex>[u]^4 \\ + } \] + + \begin{list}{}{ + \settowidth{\labelwidth}{\textbf{Key}} + \leftmargin\labelwidth\advance\leftmargin\labelsep + \itemindent0pt\let\makelabel\textbf} + \item[Key] \begin{itemize} + \item An arrow $\xy\ar*+{A};<1.5cm, 0cm>*+{B}\endxy$ indicates an + \emph{implication}: if a scheme is secure in notion $A$ then it is also + secure in notion $B$. + \item A crossed arrow $\xy\ar|-@{/}*+{A};<1.5cm, 0cm>*+{B}\endxy$ + indicates a \emph{separation}: there exists a scheme secure in notion + $A$ but not in $B$. + \item The numbers refer to sections of the proof provided in the notes. + \end{itemize} + \end{list} +\end{slide} + +\begin{proof} + Most of these are fairly simple. We use the indistinguishability-based + characterization of non-malleability that we showed earlier, because it + makes life much easier. We assume that schemes meeting each of the + definitions exist, else the theorems are all vacuously true. + + \begin{enumerate} + + \item We show $\text{NM-\id{atk}} \implies \text{IND-\id{atk}}$ for + $\id{atk} \in \{\text{CPA}, \text{CCA1}, \text{CCA2}\}$. Let $[]$ denote + the empty vector. Suppose that $A$ attacks $\mathcal{E}$ in the + $\text{IND-\id{atk}}$ sense. + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x_0, x_1, s) \gets A^{D(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, s)$; + \newline + Adversary $A'^{D(\cdot)}(\cookie{choose}, y, s)$: \+ \\ + \RETURN $([], (y, s))$; + \next + Adversary $A'^{D(\cdot)}(\cookie{guess}, \vect{x}, s')$: \+ \\ + $(y, s) \gets s'$; \\ + $b' \gets A^{D(\cdot)}(y, s)$; \\ + \RETURN $b'$; + \end{program} + Evidently $\Adv{nm-\id{atk}}{\mathcal{E}}(A') = + \Adv{ind-\id{atk}}{\mathcal{E}}(A)$, and hence + \[ \InSec{ind-\id{atk}}(\mathcal{E}; t, q_D) \le + \InSec{nm-\id{atk}}(\mathcal{E}; t, q_D), \]% + proving the implication. + + \item We show that $\text{IND-$\id{atk}'$} \implies \text{IND-\id{atk}}$ + for $(\id{atk}', \id{atk}) \in \{(\text{CCA1}, \text{CPA}), (\text{CCA2}, + \text{CCA1})\}$. Suppose that $A$ attacks $\mathcal{E}$ in the + $\text{IND-\id{atk}}$ sense. + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x_0, x_1, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, s)$; + \next + Adversary $A'^{D(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $b' \gets A^{D_1(\cdot)}(\cookie{guess}, y, s)$; \\ + \RETURN $b'$; + \end{program} + The oracles $D_0$ and $D_1$ are defined according to \id{atk}, as + shown in table~\ref{tab:se-rel-oracle}. + \begin{table} + \begin{tabular}[C]{l Mc Mc} \hlx*{hv} + \id{atk} & D_0(x) & D_1(x) \\ \hlx{vhv} + CPA & \bot & \bot \\ + CCA1 & D(x) & \bot \\ \hlx*{vh} + \end{tabular} + \caption{Relations between notions of security for public key + encryption: decryption oracles} + \label{tab:se-rel-oracle} + \end{table} + Evidently $\Adv{ind-$\id{atk}'$}{\mathcal{E}}(A') = + \Adv{ind-\id{atk}}{\mathcal{E}}(A)$, and hence + \[ \InSec{ind-\id{atk}}(\mathcal{E}; t, q_D) \le + \InSec{ind-$\id{atk}'$}(\mathcal{E}; t, q_D), \]% + proving the implication. + + \item We show that $\text{NM-$\id{atk}'$} \implies \text{NM-\id{atk}}$ + for $(\id{atk}', \id{atk}) \in \{(\text{CCA1}, \text{CPA}), (\text{CCA2}, + \text{CCA1})\}$. Suppose that $A$ attacks $\mathcal{E}$ in the + $\text{NM-\id{atk}}$ sense. + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x_0, x_1, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, s)$; + \newline + Adversary $A'^{D(\cdot)}(\cookie{choose}, y, s)$: \+ \\ + $(\vect{y}, t) \gets A^{D_1(\cdot)}(\cookie{choose}, y, s)$; \\ + \RETURN $(\vect{y}, t)$; + \next + Adversary $A'^{D(\cdot)}(\cookie{guess}, \vect{x}, t)$: \+ \\ + $b' \gets A^{D_1(\cdot)}(\cookie{guess}, \vect{x}, t)$; \\ + \RETURN $b'$; + \end{program} + The oracles $D_0$ and $D_1$ are defined according to $\id{atk}'$, as + shown in table~\ref{tab:se-rel-oracle}. Evidently + $\Adv{nm-$\id{atk}'$}{\mathcal{E}}(A') = + \Adv{nm-\id{atk}}{\mathcal{E}}(A)$, and hence + \[ \InSec{nm-\id{atk}}(\mathcal{E}; t, q_D) \le + \InSec{nm-$\id{atk}'$}(\mathcal{E}; t, q_D), \]% + proving the implication. + + \item We show that $\text{IND-CCA2} \implies \text{NM-CCA2}$. Suppose that + $A$ is an adversary attacking $\mathcal{E}$ in the NM-CCA2 sense. + \begin{program} + Adversary $A'^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x_0, x_1, s) \gets A^{D_0(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, s)$; + \next + Adversary $A'^{D(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $(\vect{y}, t) \gets A^{D(\cdot)}(\cookie{choose}, y, s)$; \\ + $\vect{x} = D(\vect{y})$; \\ + $b' \gets A^{D(\cdot)}(\cookie{guess}, \vect{x}, t)$; \\ + \RETURN $b'$; + \end{program} + Evidently $\Adv{ind-cca2}{\mathcal{E}}(A') = + \Adv{nm-cca2}{\mathcal{E}}(A)$, and hence + \[ \InSec{nm-cca2}(\mathcal{E}; t, q_D) \le + \InSec{ind-cca2}(\mathcal{E}; t, q_D), \]% + proving the implication. + + \item We show that $\text{IND-CCA1} \not\implies \text{NM-CPA}$. Suppose + that $\mathcal{E} = (G, E, D)$ is secure in the IND-CCA1 sense. Consider + the encryption scheme $\mathcal{E}' = (G', E', D')$: + \begin{program} + Algorithm $G'(k)$: \+ \\ + $(P, K) \gets G$; \\ + \RETURN $(P, K)$; + \next + Algorithm $E'_P(x)$: \+ \\ + $y \gets E_P(x)$; \\ + \RETURN $(0, y)$; + \next + Algorithm $D'_K(y')$: \+ \\ + $(b, y) \gets y'$; \\ + $x \gets D_K(y)$; \\ + \RETURN $x$; + \end{program} + This is a classic example of a malleable encryption scheme. + + Firstly, we show that $\mathcal{E}'$ is still IND-CCA1. Suppose that + $A'$ is an adversary attacking $\mathcal{E}'$ in the sense of IND-CCA1. + Consider $A$, attacking $\mathcal{E}$: + \begin{program} + Adversary $A^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $(x_0, x_1, s) \gets A'^{\Xid{D'}{sim}(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, s)$; \- \\[\smallskipamount] + Oracle $\Xid{D'}{sim}(y')$: \+ \\ + $(b, y) \gets y'$; \\ + $x \gets D(y)$; \\ + \RETURN $x$; + \next + Adversary $A^\bot(\cookie{guess}, y, s)$: \+ \\ + $b' \gets A'^\bot(\cookie{guess}, y, s)$; \\ + \RETURN $b'$; + \end{program} + Clearly, $A$ simulates the expected environment for $A'$ perfectly; hence + \[ \Adv{ind-cca1}{\mathcal{E}'}(A') = \Adv{ind-cca1}{\mathcal{E}}(A). \] + + Now, we show that $\mathcal{E}'$ is not NM-CPA. Consider the adversary + $C'$: + \begin{program} + Adversary $C'^{D(\cdot)}(\cookie{find}, P')$: \+ \\ + \RETURN $(0, 1, \bot)$; + \newline + Adversary $C'^{D(\cdot)}(\cookie{choose}, y', s)$: \+ \\ + $(b, y) \gets y'$; \\ + $\vect{y} \gets [(1, y)]$; \\ + \RETURN $(\vect{y}, \bot)$; + \next + Adversary $C'^{D(\cdot)}(\cookie{guess}, \vect{x}, t)$: \+ \\ + \RETURN $\vect{x}[0]$; + \end{program} + Since $(0, y) \ne (1, y)$, the experiment provides the plaintext + corresponding to the challenge ciphertext $y'$. + $\Adv{nm-cpa}{\mathcal{E}'}(C') = 1$. Hence, $\mathcal{E}'$ is insecure + in the NM-CPA sense. + + \item We show that $\text{NM-CPA} \not\implies \text{IND-CCA1}$. Suppose + that $\mathcal{E} = (G, E, D)$ is secure in the NM-CPA sense. Fix a + security parameter $k$. Consider the encryption scheme $\mathcal{E}' = + (G', E', D')$: + \begin{program} + Algorithm $G'(k)$: \+ \\ + $(P, K) \gets G(k)$; \\ + $u \getsr \{0, 1\}^k$; $v \getsr \{0, 1\}^k$; \\ + \RETURN $((P, u), (K, u, v))$; + \next + Algorithm $E'_{P'}(x)$: \+ \\ + $(P, u) \gets P'$; \\ + $y \gets E_P(x)$; \\ + \RETURN $(0, y)$; + \next + Algorithm $D'_{K'}(y')$: \+ \\ + $(b, y) \gets y'$; \\ + $(K, u, v) \gets K'$; \\ + \IF $b = 0$ \THEN $x \gets D_K(y)$; \\ + \ELSE \IF $y = u$ \THEN $x \gets v$; \\ + \ELSE \IF $y = v$ \THEN $x \gets K$; \\ + \ELSE $x \gets \bot$; \\ + \RETURN $x$; + \end{program} + The idea is that the decryption oracle can be used to leak the key by + following the little $u \to v \to K$ chain, but the parallel + non-malleability oracle can't do this. + + We first show that $\mathcal{E}'$ is still NM-CPA. Suppose that $A'$ is + an adversary attacking $\mathcal{E}'$ in the NM-CPA sense. Consider + this adversary $A$: + \begin{program} + Adversary $A^\bot(\cookie{find}, P)$: \+ \\ + $u \getsr \{0, 1\}^k$; $v \getsr \{0, 1\}^k$; \\ + $(x_0, x_1, s') \gets A'^\bot(\cookie{find}, (P, u))$; \\ + \RETURN $(x_0, x_1, (s', u, v))$; + \newline + Adversary $A^\bot(\cookie{choose}, y, s)$: \+ \\ + $(s', u, v) \gets s$; \\ + $(\vect{y}', t') \gets A'^\bot(\cookie{choose}, y, s')$; \\ + \FOR $j = 0$ \TO $|\vect{y}'| - 1$ \DO + $\vect{x}'[j] \gets \bot$; \\ + \FOR $j = 0$ \TO $|\vect{y}'| - 1$ \DO \\ \quad \= \+ \kill + $(b', y') \gets \vect{y}'[j]$; \\ + \IF $b' = 0$ \THEN $\vect{y}[j] \gets y'$; \\ + \ELSE \IF $y' = u$ \THEN \= $\vect{x}'[j] \gets v$; \\ + \> $\vect{y}[j] \gets \bot$; \\ + \ELSE \IF $y' = v$ \THEN \ABORT; \\ + \ELSE \= $\vect{x}'[j] \gets \bot$; \\ + \> $\vect{y}[j] \gets \bot$; \\ + \RETURN $(\vect{y}, (\vect{x}', t'))$; + \next + Adversary $A^\bot(\cookie{guess}, \vect{x}, t)$: \+ \\ + $(\vect{x}', t') \gets t$; \\ + \FOR $j = 0$ \TO $|\vect{x}| - 1$ \DO \\ + \quad \IF $\vect{x}'[j] = \bot$ \THEN + $\vect{x}'[j] \gets \vect{x}[j]$; \\ + $b' \gets A'^\bot(\cookie{guess}, \vect{x}', t')$; \\ + \RETURN $b'$; + \end{program} + Clearly, $A$ behaves indistinguishably from the NM-CPA experiment + expected by $A'$, unless $A$ aborts because $A'$ guesses $v$ during its + $\cookie{choose}$ phase. But $v$ is independent of $A'$'s view at that + moment, so the probability this occurs is $2^{-k}$. Hence + \[ \Adv{nm-cpa}{\mathcal{E}'} \le + \Adv{nm-cpa}{\mathcal{E}} + \frac{1}{2^k}. \]% + + Now to show that $\mathcal{E}'$ is insecure in the IND-CCA1 sense. + Consider this adversary: + \begin{program} + Adversary $C'^{D(\cdot)}(\cookie{find}, P')$: \+ \\ + $(P, u) \gets P'$; \\ + $v \gets D(u)$; $K \gets D(v)$; \\ + \RETURN $(0, 1, K)$; + \next + Adversary $C'^\bot(\cookie{guess}, y, K)$: \+ \\ + $b' \gets D_K(y)$; \\ + \RETURN $b'$; + \end{program} + The adversary $C'$ makes 2 decryption queries, and + $\Adv{ind-cca1}{\mathcal{E}'}(C') = 1$. Hence, $\mathcal{E}'$ is + insecure in the IND-CCA1 sense. + + \item We show that $\text{NM-CCA1} \not\implies \text{IND-CCA2}$. Suppose + that $\mathcal{E} = (G, E, D)$ is secure in the NM-CCA1 sense. Let + $\mathcal{M} = (T, V)$ be a secure MAC. Consider the encryption scheme + $\mathcal{E}' = (G', E', D')$: + \begin{program} + Algorithm $G'(k)$: \+ \\ + $(P, K) \gets G(k)$; \\ + $i \getsr \keys \mathcal{M}$; \\ + \RETURN $(P, (K, i))$; + \next + Algorithm $E'_P(x)$: \+ \\ + $y \gets E_P(x)$; \\ + \RETURN $(0, y, \bot)$; + \next + Algorithm $D'_{K'}(y')$: \+ \\ + $(b, y, \tau) \gets y'$; \\ + $(K, i) \gets K'$; \\ + \IF $b = 0$ \THEN $x \gets D_K(y)$; \\ + \ELSE \IF $\tau = \bot$ \THEN $x \gets T_i(y)$; \\ + \ELSE \IF $V_i(y, \tau) = 1$ \THEN $x \gets D_K(y)$; \\ + \ELSE $x \gets \bot$; \\ + \RETURN $x$; + \end{program} + Answering decryption queries only when presented with a correct tag + ensures that the NM-CCA1 adversary can't obtain anything useful with its + queries (hence the scheme remains NM-CCA1-secure), but the IND-CCA2 + adversary can use its adaptive queries to discover the required tag. + + Firstly, we show that $\mathcal{E}'$ is still NM-CCA1-secure. Suppose + $A'$ attacks $\mathcal{E}'$ in the sense of NM-CCA1. Consider the two + adversaries shown in \ref{fig:nm-cca1-sep-ind-cca2}: $A$ attacks the + original $\mathcal{E}$; $A''$ attacks the MAC $\mathcal{M}$. + \begin{figure} + \begin{program} + Adversary $A^{D(\cdot)}(\cookie{find}, P)$: \+ \\ + $i \getsr \keys F$; \\ + $(x_0, x_1, s') \gets A'^{\Xid{D}{sim}(\cdot)}(\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, (s', i))$; \- \\[\smallskipamount] + Adversary $A^\bot(\cookie{choose}, y, s)$: \+ \\ + $(s', i) \gets s$; \\ + $(\vect{y}', t') \gets A'^\bot(\cookie{choose}, y, s')$; \\ + \FOR $j = 0$ \TO $|\vect{y}'| - 1$ \DO + $\vect{x}'[j] \gets \bot$; \\ + \FOR $j = 0$ \TO $|\vect{y}'| - 1$ \DO \\ \quad \= \+ \kill + $(b', y', \tau') \gets \vect{y}'[j]$; \\ + \IF $b' = 0$ \THEN $\vect{y}[j] \gets y'$; \\ + \ELSE \IF $z' = \bot$ \THEN \= $\vect{x}'[j] \gets T_i(y')$; \\ + \> $\vect{y}[j] \gets \bot$; \\ + \ELSE \IF $V_i(y', \tau') \ne 1$ + \THEN $\vect{y}[j] \gets \bot$; \\ + \ELSE \IF $y' = y$ \THEN \ABORT; \\ + \ELSE $\vect{y}[j] \gets y'$; \- \\ + \RETURN $(\vect{y}, (\vect{x}', t'))$; \- \\[\smallskipamount] + Adversary $A^\bot(\cookie{guess}, \vect{x}, t)$: \+ \\ + $(\vect{x}', t') \gets t$; \\ + \FOR $j = 0$ \TO $|\vect{x}| - 1$ \DO \\ + \quad \IF $\vect{x}'[j] = \bot$ \THEN + $\vect{x}'[j] \gets \vect{x}[j]$; \\ + $b' \gets A'^\bot(\cookie{guess}, \vect{x}', t')$; \\ + \RETURN $b'$; + \next + Adversary $A''^{T(\cdot), V(\cdot)}$: \+ \\ + $(P, K) \gets G$; + $b \getsr \{0, 1\}$; \\ + $(x_0, x_1, s) \gets A'^{\Xid{D}{sim}(\cdot)}(\cookie{find}, P)$; \\ + $y \gets (0, E_P(x_b), \bot)$; \\ + $(\vect{y}, t) \gets A'^\bot(\cookie{choose}, y, s)$; \\ + \FOR $j = 0$ \TO $|\vect{y}| - 1$ \DO \\ \quad \= \+ \kill + $(b', y', \tau') \gets \vect{y}'[j]$; \\ + \IF $b' = 1 \land V(y', \tau') = 1$ + \THEN \RETURN $(y', \tau')$; \- \\ + \ABORT; \- \\[\smallskipamount] + Oracle $\Xid{D}{sim}(y')$: \+ \\ + $(b, y, \tau) \gets y'$; \\ + \IF $b = 0$ \THEN $x \gets D_K(y)$; \\ + \ELSE \IF $\tau = \bot$ \THEN $x \gets T(y)$; \\ + \ELSE \IF $V(y, \tau) = 1$ \THEN $x \gets D_K(y)$; \\ + \ELSE $x \gets \bot$; \\ + \RETURN $x$; + \end{program} + \caption{From the proof that $\text{NM-CCA1} \not\implies + \text{IND-CCA2}$ -- adversaries $A$ and $A''$, attacking $\mathcal{E}$ + in the NM-CCA1 sense and the MAC $\mathcal{M}$ respectively} + \label{fig:nm-cca1-sep-ind-cca2} + \end{figure} + + Suppose, without loss of generality, that if the challenge ciphertext $y$ + returned to $A'$ matches one of $A'$'s earlier decryption queries then + $A'$ wins without making its parallel chosen ciphertext query. + + Let $B$ be the event that $A$ aborts; let $S$ be the event that $A$ wins, + and let $S'$ be the event that $A'$ wins. Since unless it aborts, $A$ + implements the NM-CCA1 game for $\mathcal{E}'$ perfectly, we must have + \[ \Pr[A] = \Pr[A'] - \Pr[B]. \] + But $B$ is precisely the event in which $A''$ wins its game. Hence + \[ \Adv{nm-cca1}{\mathcal{E}'}(A') \le + \Adv{nm-cca1}{\mathcal{E}}(A) + \Adv{suf-cma}{\mathcal{E}}(A'') \]% + proving the claim that $\mathcal{E}'$ remains NM-CCA1 secure. + + Now to show that $\mathcal{E}'$ is not IND-CCA2 secure. + \begin{program} + Adversary $C'^{D(\cdot)}(\cookie{find}, P')$: \+ \\ + \RETURN $(0, 1, \bot)$; + \next + Adversary $C'^{D(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $\tau \gets D(1, y, \bot)$; \\ + $b' \gets D(1, y, \tau)$; \\ + \RETURN $b'$; + \end{program} + The adversary $C'$ makes 2 decryption queries, and + $\Adv{ind-cca2}{\mathcal{E}'}(C') = 1$. Hence, $\mathcal{E}'$ is + insecure in the IND-CCA2 sense. + + \end{enumerate} + All present and correct. +\end{proof} + +\begin{exercise} + In \cite{Goldwasser:1984:PE}, Shafi Goldwasser and Silvio Micali first + introduced the concept of semantic security as a definition of + \emph{computationally} secure encryption, and presented the first + provably-secure probabilistic encryption scheme. For this scheme, the + private key is a pair of $k$-bit primes $(p, q)$; the public key is their + product $n = pq$ and a \emph{pseudosquare} $z$. Given this starting point, + define a public-key encryption scheme, and relate its security in the + IND-CPA sense to the difficulty of the Quadratic Residuosity Problem. Show + that the encryption scheme is malleable. + + Hints: + \begin{parenum} + \item Encrypt the message one bit at a time. + \item Choosing a random element of $(\Z/n\Z)^*$ and squaring gives you a + random element of $Q_n$. + \item You will need to define a formal game for the Quadratic Residuosity + Problem. + \end{parenum} + \answer% + Encryption works as $\Xid{E}{GM}_{(n, z)}(x)$: \{ \FOREACH $1\colon x_i$ + \FROM $x$ \DO \{ $a_i \getsr (\Z/n\Z)^*$; $\vect{y}[i] \gets a_i^2 + z^{x_i}$~\} \RETURN $\vect{y}$;~\}. Decryption works as $\Xid{D}{GM}_{(p, + q)}(\vect{y})$: \{ $x \gets \emptystring$; \FOR $i = 0$ \TO $|\vect{y}| - + 1$ \DO \{ \IF $\jacobi{\vect{y}[i]}{p} = 1 \land \jacobi{\vect{y}[i]}{q} = + 1$ \THEN $x \gets x \cat 0$; \ELSE $x \gets x \cat 1$;~\} \RETURN $x$;~\}. + + To prove that this is meets IND-CPA, let $A$ be an adversary attacking the + GM scheme. Now, we present an algorithm which, given an odd composite + integer $n$ and an element $z$ with $\jacobi{x}{n} = 1$, decides whether $x + \in Q_n$: Algorithm $B(n, z)$: $(x_0, x_1, s) \gets A(\cookie{find}, (n, + z))$; $b \getsr \{0, 1\}$; $y \gets \Xid{E}{GM}_{(n, z)}(x_b)$; $b' \gets + A(\cookie{guess}, y, s)$; \IF $b = b'$ \THEN \RETURN $0$; \ELSE \RETURN + $1$;~\}. If $z$ is a nonresidue, then $B$ returns $0$ whenever $A$ + successfully guesses the right bit; if $z \in Q_n$ then the ciphertext $y$ + returned by $B$ is a string of random quadratic residues independent of the + challenge plaintext, and $A$ can have no advantage. Hence + $\InSec{ind-cpa}(\Xid{\mathcal{E}}{GM-$k$}; t) \le 2 \cdot \InSec{qrp}(k; + t)$. + + To prove malleability, simply note that multiplying an element + $\vect{y}[i]$ by $z$ toggles the corresponding plaintext bit. +\end{exercise} + +\xcalways\subsection{The ElGamal scheme}\x + +\begin{slide} + \topic{description} + \head{The ElGamal public-key encryption scheme} + + ElGamal's encryption scheme is based on Diffie-Hellman. Let $G = \langle g + \rangle$ be a cyclic group of order $q$. Plaintexts and ciphertexts in the + scheme are elements of $G$. + + The scheme $\Xid{\mathcal{E}}{ElGamal}^G = (\Xid{G}{ElGamal}^G, + \Xid{E}{ElGamal}^G, \Xid{D}{ElGamal}^G)$ is defined by: + \begin{program} + $\Xid{G}{ElGamal}^G$: \+ \\ + $\alpha \getsr \{1, 2, \ldots, q - 1\}$; \\ + \RETURN $(g^\alpha, \alpha)$; + \next + $\Xid{E}{ElGamal}^G_a(x)$: \+ \\ + $\beta \getsr \{1, 2, \ldots, q - 1\}$; \\ + \RETURN $(g^\beta, x a^\beta)$; + \next + $\Xid{D}{ElGamal}^G_\alpha(y)$: \+ \\ + $(b, c) \gets y$; \\ + $x \gets b^{-\alpha} c$; \\ + \RETURN $x$; + \end{program} + This scheme is secure in the IND-CPA sense if the Decisional Diffie-Hellman + problem is hard in $G$. +\end{slide} + +\begin{slide} + \topic{security proof} + \head{Security proof for ElGamal} + + Suppose $A$ is an adversary attacking the ElGamal scheme in the IND-CPA + sense. We construct from it an algorithm which solves the DDH problem + (i.e., given a triple $g^\alpha, g^\beta, c$, decides whether $c = + g^{\alpha\beta}$): + \begin{program} + Algorithm $D(a, b, c)$: \+ \\ + $(x_0, x_1, s) \gets A(\cookie{find}, a)$; \\ + $z \getsr \{0, 1\}$; \\ + $y \gets (b, x_z c)$; \\ + $z' \gets A(\cookie{guess}, y, s)$; \\ + \IF $z = z'$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} +\end{slide} + +\begin{slide} + \head{Security proof for ElGamal (cont.)} + + Let $\alpha$ and $\beta$ be the discrete logs of $a$ and $b$. + + If $c = g^{\alpha\beta}$, then $D$'s success probability is equal to $A$'s + probability of guessing the hidden bit correctly, which is + \[ \frac{\Adv{ind-cpa}{\Xid{\mathcal{E}}{ElGamal}^G}(A)}{2} + \frac{1}{2}. \]% + If $c$ is random, then $x_z c$ is uniformly distributed in $G$, and + independent of $b$, so $A$ answers correctly with probability exactly + $\frac{1}{2}$. + + Hence, $\Adv{ddh}{G}(D) = + \Adv{ind-cpa}{\Xid{\mathcal{E}}{ElGamal}^G}(A)/2$, and + \[ \InSec{ind-cpa}(\Xid{\mathcal{E}}{ElGamal}^G; t) \le + 2 \cdot \InSec{ddh}(G; t). \]% +\end{slide} + +\begin{slide} + \topic{other notes} + \head{Notes about ElGamal} + + \begin{itemize} + + \item We needed the Decisional Diffie-Hellman assumption to prove the + security. As noted before, this is a very strong assumption. Still, a + proof based on DDH is a lot better than nothing. + + We really do need the Decisional Diffie-Hellman assumption. An adversary + with a DDH algorithm can submit $x_0 \inr G$ and $x_1 = 1$; it receives a + ciphertext $(b, y)$, and returns $1$ if $(a, b, y)$ looks like a + Diffie-Hellman triple, or $0$ if it looks random. + + \item The plaintexts must be elements of the cyclic group $G$. For + example, if $G$ is a subgroup of $\F_p^*$, it's \emph{not} safe to allow + elements outside the subgroup as plaintexts: an adversary can compare + orders of ciphertext elements to break the semantic security of the + scheme. + + \item ElGamal is malleable. We can decrypt a challenge ciphertext $y = + (g^\beta, a^\beta x)$ by choosing a random $\gamma$ and requesting a + decryption of $y' = (g^{\beta\gamma}, a^{\beta\gamma} x^\gamma)$. + + \end{itemize} +\end{slide} + +\xcalways\subsection{Encrypting using trapdoor one-way functions}\x + +\begin{slide} + \head{Trapdoor one-way functions} + + Trapdoor one-way functions RSA ($x \mapsto x^e \bmod n$) and Rabin ($x + \mapsto x^2 \bmod n$) functions are well-studied. Can we make secure + schemes from them? + + We can't use them directly, however. For a start, the functions are + deterministic, so `encrypting' messages just by doing the modular + exponentiation on the plaintext will leak equality of plaintexts. + + How can we fix these schemes? + + We'll focus on RSA, because decryption is simpler; Rabin works in a very + similar way. +\end{slide} + +\begin{slide} + \topic{simple embedding schemes} + \head{Simple embedding schemes} + + If we restrict our attention to plaintext messages which are `about' the + input size of our one-way function, we'd like to be able to use ciphertexts + which are no bigger than the output size of the function. + + Perhaps if we encode the message in some way before passing it through the + function, we can improve security. Ideally, we'd like security against + chosen-ciphertext attacks. + + An encryption scheme which processes messages like this is called a + \emph{simple embedding scheme}. +\end{slide} + +\begin{slide} + \topic{\PKCS1 padding} + \head{\PKCS1 encryption padding for RSA \cite{RSA:1993:PRE}} + + Let $n = p q$ be an RSA modulus, with $2^{8(k-1)} < n < 2^{8k)}$ -- i.e., + $n$ is a $k$-byte number. Let $m$ be the message to be signed. + + We compute $\hat{m} = 0^8 \cat T \cat r \cat 0^8 \cat x$ for + some hash function $m$, where: + \begin{itemize} + \item $|\hat{m}| = 8k$, i.e., $\hat{m}$ is a $k$-byte string; + \item $0^8$ is the string of 8 zero-bits; + \item $T = 00000002$ is an 8-bit \emph{type} code; and + \item $r$ is a string of random \emph{non-zero} bytes (\PKCS1 requires at + least 8 bytes) + \end{itemize} + This bit string is then converted into an integer, using a big-endian + representation. Note that $\hat{m} < n$, since its top byte is zero. +\end{slide} + +\begin{slide} + \head{\PKCS1 encryption padding for RSA (cont.)} + + Diagramatically, \PKCS1 encryption padding looks like this: + \begin{tabular}[C]{r|c|c|c|c|} \hlx{c{2-5}v} + \hex{00} & \hex{01} & + \ldots\ random nonzero bytes \ldots & + \hex{00} & + $m$ + \\ \hlx{vc{2-5}} + \end{tabular} + + Unfortunately, this scheme isn't capable of resisting chosen-ciphertext + attacks: Bleichenbacher \cite{Bleichenbacher:1998:CCA} shows how to decrypt + a ciphertext $y$ given an oracle (say, an SSL server) which returns whether + a given ciphertext is valid (i.e., its padding is correct). +\end{slide} + +\begin{slide} + \topic{Optimal Asymmetric Encryption Padding (OAEP)} + \head{Optimal Asymmetric Encryption Padding (OAEP) \cite{Bellare:1995:OAE}} + + OAEP is a simple embedding scheme for use with an arbitrary trapdoor + one-way function. It requires two hash functions, $H$ and $H'$, which we + model as random oracles. + + We assume that our one-way function $f_P$ operates on $n$-bit strings. Fix + a security parameter $k$. We require two random oracles $G\colon \{0, + 1\}^k \to \{0, 1\}^{n-k}$ and $H\colon \{0, 1\}^{n-k} \to \{0, 1\}^k$. + Given a message $x \in \{0, 1\}^{n-2k}$, we encrypt it as follows: + \begin{program} + Algorithm $\Xid{E}{OAEP}^{\mathcal{T}, G(\cdot), H(\cdot)}_P(x)$: \+ \\ + $r \getsr \{0, 1\}^k$; \\ + $s \gets (x \cat 0^k) \xor G(r)$; $t \gets r \xor H(s)$; \\ + $w \gets s \cat t$; \\ + \RETURN $f_P(w)$; + \next + Algorithm $\Xid{D}{OAEP}^{\mathcal{T}, G(\cdot), H(\cdot)}_K(y)$: \+ \\ + $w \gets T_K(y)$; \\ + \PARSE $w$ \AS $s, k\colon t$; \\ + $r \gets t \xor H(s)$; $x' \gets s \xor G(r)$; \\ + \PARSE $x'$ \AS $x, k\colon c$; \\ + \IF $c = 0^k$ \THEN \RETURN $x$; \\ + \ELSE \RETURN $\bot$; + \end{program} +\end{slide} + +\begin{slide} + \head{Optimal Asymmetric Encryption Padding (OAEP) (cont.)} + + %% x <- {0, 1}^{n-2k} r <-R {0, 1}^k + %% | | + %% | | + %% v | + %% [||]<--- 0^k | + %% | | + %% | | + %% v | + %% (+)<-----------[G]--------------| + %% | | + %% | | + %% | v + %% |-------------[H]------------>(+) + %% | | + %% | | + %% v v + %% < s = (x || 0^k) (+) G(r) > < t = r (+) H(s) > + + \vfil + \[ \begin{graph} + []!{0; <1cm, 0cm>:} + {x \in \{0, 1\}^n}="x" [rrrr] {r \inr \{0, 1\}^k}="r" + "r" :[ddd] *{\xor}="h-xor" + :[d] *++=(2, 0)[F:thicker]{\strut t = r \xor H(s)} + "x" :[d] *{\ocat}="cat" + [r] *+[r]{\mathstrut 0^k} :"cat" + :[d] *{\xor}="g-xor" + :[dd] *++=(4, 0)[F:thicker]{\strut s = (x \cat 0^k) \xor G(r)} + [u] :[rr] *+[F]{H'} :"h-xor" + "r" [dd] :[ll] *+[F]{H} :"g-xor" + \end{graph} \] + \vfil +\end{slide} + +\begin{slide} + \head{Security of OAEP, 1: chosen plaintext attacks} + + We write the encryption scheme formed by using the trapdoor one-way + function $\mathcal{T}$ with OAEP embedding as + $\Xid{\mathcal{E}}{OAEP}^{\mathcal{T}}$. + + The security of OAEP in the IND-CPA sense is given by: + \[ \InSec{ind-cpa}(\Xid{\mathcal{E}}{OAEP}^{\mathcal{T}}; t, q_G, q_H) + \le 2 \left (\frac{q_G}{2^k} + + \InSec{owf}(\mathcal{T}; t + O(q_G q_H)) \right). \]% +\end{slide} + +We omit the security proof of OAEP, because it's very similar to the proof +for OAEP+ which is presented in full later. + +\begin{slide} + \topic{possible malleability of OAEP} + \head{OAEP is not (generically) non-malleable \cite{Shoup:2001:OAEPR}} + + If a one-way function might be \emph{XOR-malleable} -- i.e., given $f_P(x)$ + and $\delta$, it's possible to construct $f_P(x \xor \delta)$ without + knowledge of the trapdoor -- then the OAEP encryption scheme is malleable. + + \begin{graph} + []!{0; <1cm, 0cm>:} + *!UR{\parbox{0.5\linewidth}{ + \raggedright + We need to suppose, in addition, that the function leaks the leftmost + $n-k$ bits of its input; e.g., $f(s \cat t) = s \cat f'(t)$. Then + exploiting the differential (pictured right) only requires a pair of + queries to the $H$ oracle, not $G$. Using its parallel + chosen-ciphertext query, the adversary can decrypt its target + plaintext. + }} + !{+(1.5, 0)} + {x}="x" [rr] {r}="r" + "r" :[ddd] *{\xor}="h-xor" ^{0} + :[d] {t}^{H(s) \xor H(s \xor (\delta \cat 0^k))} + "x" :[d] *{\ocat}="cat" _{\delta} + [r] *+[r]{\mathstrut 0^k} :"cat" + :[d] *{\xor}="g-xor" _{\delta \cat 0^k} + :[dd] {s}_{\delta \cat 0^k} + [u] :[r] *+[F]{H'} :"h-xor" + "r" [dd] :[l] *+[F]{H} :"g-xor" + \end{graph} + + Nevertheless, OAEP \emph{does} provide security against chosen-ciphertext + attacks when used with the RSA and Rabin functions. +\end{slide} + +\xcalways\subsection{Plaintext awareness and OAEP+}\x + +\begin{slide} + \head{Plaintext awareness} + + The intuitive idea behind plaintext awareness is that it's hard to + construct a new ciphertext for which you can't easily guess the plaintext + (or guess that the ciphertext is invalid). Obviously, such an idea would + imply security against chosen ciphertext attack -- since the adversary + effectively knows the plaintext anyway, the decryption oracle is useless. + + The formalization introduces a \emph{plaintext extractor} -- an algorithm + which, given a ciphertext and the random oracle queries of the program + which created it, returns the corresponding plaintext. + + Note, then, that plaintext awareness (as currently defined) only works in + the random oracle model. +\end{slide} + +\begin{slide} + \topic{definition of plaintext awareness} + \head{Definition of plaintext awareness \cite{Bellare:1998:RAN}} + + Let $\mathcal{E} = (G, E, D)$ be a public-key encryption scheme. Consider + an adversary $A$ which takes a public key as input and returns a ciphertext + $y$. We run the adversary, providing it with an \emph{encryption} oracle, + and record + \begin{itemize} + \item the set $Q_H$, which contains a pair $(q, h)$ for each random oracle + query $q$ made by the adversary, together with its reply $h$; and + \item the set $C$, which contains the responses (only: not the queries) + from $A$'s encryption oracle. + \end{itemize} + We write $(y, Q_H, C) \gets \id{transcript}(A; z)$ to denote running $A$ on + the arguments $z$ and collecting this information. + + Note that $Q_H$ doesn't include the oracle queries required by the + encryption oracle. +\end{slide} + +\begin{slide} + \head{Definition of plaintext awareness (cont.)} + + An \emph{$\epsilon$-plaintext extractor} for $\mathcal{E}$ is an algorithm + $X$ for which + \begin{eqnarray*}[rl] + \min_A \Pr[ + & + (P, K) \gets G; + (y, Q_H, C) \gets \id{transcript}(A^{E(\cdot), H(\cdot)}; P); \\ + & x \gets X(y, P, Q_H, C) : x = D_K(y)] \ge 1 - \epsilon . + \end{eqnarray*} + + The scheme $\mathcal{E}$ is \emph{$\epsilon$-plaintext aware} (PA) if there + exists an $\epsilon$-plaintext extractor for $\mathcal{E}$ \emph{and} + $\mathcal{E}$ is IND-CPA. + + (The requirement that the scheme meet IND-CPA prevents trivial schemes such + as the identity function from being considered plaintext aware.) +\end{slide} + +\begin{slide} + \topic{chosen-ciphertext security} + \head{Plaintext awareness and chosen-ciphertext security + \cite{Bellare:1998:RAN}} + + If a public key encryption scheme $\mathcal{E}$ is PA, then it is also + IND-CCA2 (and hence NM-CCA2). This is proven using the plaintext extractor + to simulate the decryption oracle. The encryption oracle in the PA + definition is used to feed the challenge ciphertext to the plaintext + extractor. + + Quantatively, if $\mathcal{E}$ is $\epsilon$-plaintext aware, and the + plaintext extractor runs in time $t_X(q_H)$, then + \[ \InSec{ind-cca2}(\mathcal{E}; t, q_D, q_H) \le + \InSec{ind-cpa}(\mathcal{E}; t + q_D t_X(q_H), q_H) + q_D \epsilon. \]% + + The converse is not true: IND-CCA2 does not imply plaintext awareness. +\end{slide} + +\begin{proof} + Firstly, we prove that $\text{PA} \implies \text{IND-CCA2}$. Let + $\mathcal{E} = (G, E, D)$ be an $\epsilon$-plaintext aware public-key + encryption scheme. Suppose $A'$ is an adversary attacking $\mathcal{E}$ in + the IND-CCA2 sense. Let $X$ be the $\epsilon$-plaintext extractor for + $\mathcal{E}$. We construct an adversary attacking $\mathcal{E}$ in the + IND-CPA sense. + \begin{program} + Adversary $A^{H(\cdot)}(\cookie{find}, P)$: \+ \\ + $\Xid{H}{map} \gets \emptyset$; $y^* \gets \bot$; \\ + $(x_0, x_1, s') \gets A'^{\Xid{D}{sim}(\cdot), \Xid{H}{sim}(\cdot)} + (\cookie{find}, P)$; \\ + \RETURN $(x_0, x_1, (\Xid{H}{map}, P, s')$; \- \\[\smallskipamount] + Oracle $\Xid{H}{sim}(x)$: \+ \\ + $h \gets H(x)$; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{ x \mapsto h \}$; \\ + \RETURN $h$; + \next + Adversary $A^{H(\cdot)}(\cookie{guess}, y^*, s)$: \+ \\ + $(\Xid{H}{map}, P, s') \gets s$; \\ + $b \gets A'^{\Xid{D}{sim}(\cdot), \Xid{H}{sim}(\cdot)} + (\cookie{guess}, y^*, s')$; \\ + \RETURN $b$; \- \\[\smallskipamount] + Oracle $\Xid{D}{sim}(y)$: \+ \\ + $x \gets X(y, P, \Xid{H}{map}, \{ y^* \})$; \\ + \RETURN $x$; + \end{program} + Let $q_D$ be the number of decryption queries made by the adversary. We + can see that, if the plaintext extractor does its job correctly, $A$ + simulates the decryption oracle for $A'$ correctly. Hence + \[ \Adv{ind-cpa}{\mathcal{E}}(A) \ge + \Adv{ind-cca2}{\mathcal{E}}(A') - q_D \epsilon. \]% + Notice how the encryption oracle from the plaintext awareness definition is + used in the proof. + + We can show that $\text{IND-CCA2} \not\implies \text{PA}$ easily enough by + amending an existing IND-CCA2 scheme $\mathcal{E}$ so that it has a + `magical' ciphertext, attached to the public key. Here is our modified + scheme $\mathcal{E}' = (G', E', D')$: + \begin{program} + Algorithm $G'^{H(\cdot)}(k)$: \+ \\ + $(P, K) \gets G^{H(\cdot)}$; \\ + $p \getsr \dom E^{H(\cdot)}_P$; \\ + $c \getsr \ran E^{H(\cdot)}_P$; \\ + \RETURN $((P, c), (K, c, p))$; + \next + Algorithm $E'^{H(\cdot)}_{P, c}(x)$: \+ \\ + $y \gets E^{H(\cdot)}_P(x)$; \\ + \RETURN $y$; + \next + Algorithm $D'^{H(\cdot)}_{K, c, p}(y)$: \+ \\ + \IF $y = c$ \THEN $x \gets p$; \\ + \ELSE $x \gets D^{H(\cdot)}_K(y)$; \\ + \RETURN $x$; + \end{program} + Firstly, we show that it still meets IND-CCA2. Let $A'$ be an adversary + attacking $\mathcal{E}'$ in the IND-CCA2 sense. Then $A$, below, acheives + the same advantage against $\mathcal{E}$. + \begin{program} + Adversary $A^{D(\cdot), H(\cdot)}(\cookie{find}, P)$: \+ \\ + $p \getsr \dom E^{H(\cdot)}_P$; $c \getsr \ran E^{H(\cdot)}_P$; \\ + $(x_0, x_1, s') \gets A'^{\Xid{D}{sim}(\cdot), H(\cdot)} + (\cookie{find}, (P, c))$; \\ + \RETURN $(x_0, x_1, (p, c, s')$; + \next + Adversary $A^{D(\cdot), H(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $(p, c, s') \gets s$; \\ + $b \gets A'^{\Xid{D}{sim}(\cdot), H(\cdot)}(\cookie{guess}, y, s')$; \\ + \RETURN $y$; + \newline + Oracle $\Xid{D}{sim}(y)$: \+ \\ + \IF $y = c$ \THEN $x \gets p$; \\ + \ELSE $x \gets D(y)$; \\ + \RETURN $x$; + \end{program} + + Now to show that $\mathcal{E}'$ is not plaintext-aware. Suppose that it + is, and there is a plaintext extractor $X$. We show that $\mathcal{E}$ is + not IND-CPA (contradicting the earlier assumption that $\mathcal{E}$ was + IND-CCA2). + \begin{program} + Adversary $A''^{H(\cdot)}(\cookie{find}, P)$: \+ \\ + $x_0 \getsr \dom E_P$; $x_1 \getsr \dom E_P$; \\ + \RETURN $(x_0, x_1, (P, x_1))$; + \next + Adversary $A''^{H(\cdot)}(\cookie{guess}, y, s)$: \+ \\ + $(P, x_1) \gets s$; \\ + $x \gets X(y, (P, y), \emptyset, \emptyset)$; \\ + \IF $x = x_1$ \THEN \RETURN $1$; + \ELSE \RETURN $0$; + \end{program} + Since $x_0$ and $x_1$ are uniform in $\dom E_P$, the transcript $(y, (P, + y), \emptyset, \emptyset)$ passed to the extractor is distributed exactly + as for the $\mathcal{E}'$ adversary which, given the public key $(P, c)$ + returns $c$; hence it succeeds with its usual probability $1 - \epsilon$. + Then, $A''$'s advantage is + \[ \Adv{ind-cpa}{\mathcal{E}}(A'') = + 1 - 2\epsilon - \frac{1}{|{\dom E_P}|}. \]% + This completes the proof. +\end{proof} + +\begin{slide} + \topic{old definition} + \head{The old definition of plaintext awareness} + + An earlier definition of plaintext awareness omitted the encryption oracle + provided to the adversary. The designers of OAEP proved that it met this + definition of plaintext awareness and asserted, without proof, that this + implied security against adaptive chosen-ciphertext attacks. + + The original definition of plaintext-awareness is sufficient to guarantee + IND-CCA1 security, but it doesn't provide any sort of non-malleability. +\end{slide} + +\begin{slide} + \topic{OAEP+} + \head{Plaintext-aware encryption: OAEP+ \cite{Shoup:2001:OAEPR}, 1} + + OAEP+ is a simple embedding scheme, very similar to OAEP, which acheives + plaintext-awareness. + + We assume that our one-way function $f_P$ operates on $n$-bit strings. Fix + a security parameter $k$. We require three random oracles $G\colon \{0, + 1\}^k \to \{0, 1\}^{n-2k}$, $H\colon \{0, 1\}^{n-k} \to \{0, 1\}^k$, and + $H'\colon \{0, 1\}^{n-k} \to \{0, 1\}^k$. + \begin{program} + Algorithm $\Xid{E}{OAEP+} + ^{\mathcal{T}, G(\cdot), H(\cdot), H'(\cdot)}_P(x)$: \+ \\ + $r \getsr \{0, 1\}^k$; \\ + $c \gets H'(x \cat r)$; \\ + $s \gets (x \xor G(r)) \cat c$; \\ + $t \gets r \xor H(s)$; \\ + $w \gets s \cat t$; \\ + \RETURN $f_P(w)$; + \next + Algorithm $\Xid{D}{OAEP+} + ^{\mathcal{T}, G(\cdot), H(\cdot), H'(\cdot)}_K(y)$: \+ \\ + $w \gets T_K(y)$; \\ + \PARSE $w$ \AS $s, k\colon t$; \\ + $r \gets t \xor H(s)$; \\ + \PARSE $s$ \AS $s', k\colon c$; \\ + $x \gets s' \xor G(r)$; \\ + \IF $c = H'(x \cat r)$ \THEN \RETURN $x$; \\ + \ELSE \RETURN $\bot$; + \end{program} +\end{slide} + +\begin{slide} + \head{Plaintext-aware encryption: OAEP+, 2} + + %% x <- {0, 1}^{n-2k} r <-R {0, 1}^k + %% | | + %% | | + %% | | + %% |-------->[H']<----------------| + %% | | | + %% | | | + %% v | | + %% (+)<-------------[G]<-----------| + %% | | | + %% | | | + %% v | | + %% [||]<-------' | + %% | | + %% | | + %% | v + %% |-------------->[H]---------->(+) + %% | | + %% | | + %% v v + %% < s = (x (+) G(r)) || H'(x || r) > < t = r (+) H(s) > + + \vfil + \[ \begin{graph} + []!{0; <1cm, 0cm>:} + {x \in \{0, 1\}^n}="x" [rrrr] {r \inr \{0, 1\}^k}="r" + "x" [d] :[r] *+[F]{H'}="h'" "r" [d] :"h'" + "r" :[dddd] *{\xor}="h-xor" + :[d] *++=(2, 0)[F:thicker]{\strut t = r \xor H(s)} + "x" :[dd] *{\xor}="g-xor" + :[d] *{\ocat}="cat" + :[dd] *++=(4, 0)[F:thicker] + {\strut s = (x \xor G(r)) \cat H'(x \cat r)} + [u] :[rr] *+[F]{H} :"h-xor" + "r" [dd] :[ll] *+[F]{G} :"g-xor" + "h'" :'[d]*=<8pt>\cir<4pt>{r_l} `d"cat" "cat" + \end{graph} \] + \vfil +\end{slide} + +\begin{slide} + \head{Plaintext-aware encryption: OAEP+, 3} + + Let $\mathcal{T}$ be a trapdoor one-way function. Then the insecurity of + the public-key encryption scheme $\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}$ + in the IND-CPA sense is given by + \[ \InSec{ind-cpa}(\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}; + t, q_G, q_H, q_{H'}) + \le + 2 \left (\frac{q_G}{2^k} + + \InSec{owf}(\mathcal{T}; t + O(q_G q_H)) \right). \]% + Also, $\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}$ is $(q_G + 1) + 2^{-k}$-plaintext aware, the plaintext extractor running time being + $O(q_{H'} t_f)$, where $t_f$ is the time required to execute the one-way + function $f$. + + Tying up the various results, then, we can derive that + \begin{eqnarray*}[Ll] + \InSec{ind-cca2}(\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}; + t, q_D, q_G, q_H, q_{H'}) \\ + & \le + \frac{(2 + q_D) (q_G + 1) - 2}{2^k} + + 2 \cdot \InSec{owf}(\mathcal{T}; t + O(q_G q_H + q_D q_{H'} t_f)). + \end{eqnarray*} +\end{slide} + +Before we move on to the OAEP+ security results, we state and prove the +following lemma, due (I believe) to Victor Shoup. + +\begin{lemma}[Shoup] + \label{lem:shoup} + If $X$, $Y$ and $F$ are events, and + \[ \Pr[X \land \lnot F] = \Pr[Y \land \lnot F] \] + then + \[ |{\Pr[X]} - \Pr[Y]| \le \Pr[F]. \] +\end{lemma} +\begin{proof} + We have: + \begin{eqnarray*}[rll] + \Pr[X] &= \Pr[X \land F] &+ \Pr[X \land \lnot F] \\ + \Pr[Y] &= \Pr[Y \land F] &+ \Pr[Y \land \lnot F] + \end{eqnarray*} + Subtracting gives + \[ |{\Pr[X]} - \Pr[Y]| = |{\Pr[X \land F]} - \Pr[Y \land F]| \le \Pr[F] \] + as required. +\end{proof} + +\begin{proof}[Proof of the main OAEP+ result] + + We assume throughout that, before querying its $H'$ oracle at $x \cat r$, + the adversary has queried $G$ at $r$. + + First, we show that OAEP+ meets the IND-CPA notion. Suppose that $A$ + attacks the OAEP+ scheme in the IND-CPA sense. We shall play a series of + games with the adversary, and vary the rules as we go along. The adversary + remains the same throughout. + + At any point in a game, let $Q_G$ be the list of queries the adversary has + made to the oracle $G$, let $Q_H$ be the queries made to $H$, and let + $Q_{H'}$ be the queries made to $H'$. + + \paragraph{Game $\G0$} + This is effectively the original attack game: the adversary chooses two + plaintexts: one is chosen unformly at random, the adversary is given the + ciphertext and asked to identify which plaintext was chosen. Label the + selected plaintext $x^*$, and the target ciphertext $y^*$. Let $c^*$, + $s^*$, $t^*$ and $w^*$ be the intermediate results of the OAEP+ padding, as + described above. Let $S_0$ be the event that the adversary wins the game. + Our objective is to bound the probabilty $\Pr[S_0] = + (\Adv{ind-cpa}{\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}}(A) + 1)/2$. + + \paragraph{Game $\G1$} + At the beginning of the game, we select at random: + \begin{itemize} + \item $s^* \inr \{0, 1\}^{n-k}$; + \item $t^* \inr \{0, 1\}^k$; and + \item $r^* \inr \{0, 1\}^k$. + \end{itemize} + We compute $w^* = s^* \cat t^*$ and $y^* = f_P(w^*)$. Note, then, that + \emph{$y^*$ is fixed at the start of the game}. + + We also `rig' the random oracle $H$ so that $H(s^*) = r^* \xor t^*$. + This isn't much of a fix, because this value is evidently uniformly + distributed and independent of everything except $r^* \xor t^*$. + + We could rig $G$ and $H'$ too, once we knew $x^*$, so that $s^* = (x^* \xor + G(r)) \cat H'(x^* \cat r)$. But we don't. Instead, consider the event + $F_1$ that the adversary queries $G$ at $r^*$. Since we have opted for the + bare-faced lie rather than oracle subterfuge, the target ciphertext $y^*$ + and the oracles are entirely independent of the target plaintext $x^*$, + whatever that might be. Hence, $A$'s probability of guessing which + plaintext was chosen, $\Pr[S_1] = \frac{1}{2}$. + + When attempting to bound $\Pr[F_1]$, there are two cases to consider: + \begin{enumerate} + + \item The adversary has not previously queried $H$ at $s^*$. In this + case, the value of $r^*$ is independent of the adversary's view -- it has + no information at all about $r^*$. This can occur, therefore, with + probability at most $q_G 2^{-k}$. + + \item The adversary has previously queried $H$ at $s^*$. Then we can + construct an inverter. Note that $f_P$ is a permutation (by assumption); + hence, selecting a $y^*$ at random implies the values of $w^*$, and hence + $s^*$ and $t^*$, even though they can't be computed easily. + \begin{program} + Inverter $I(P, y)$: \+ \\ + $\Xid{G}{map} \gets \emptyset$; + $\Xid{H}{map} \gets \emptyset$; + $\Xid{H'}{map} \gets \emptyset$; \\ + $z \gets \bot$; \\ + $(x_0, x_1, s) \gets A^{G(\cdot), H(\cdot), H'(\cdot)} + (\cookie{find}, P)$; \\ + $b \gets A^{G(\cdot), H(\cdot), H'(\cdot)}(\cookie{guess}, y, s)$; \\ + \RETURN $z$; \- \\[\smallskipamount] + Oracle $H(s)$: \+ \\ + \IF $s \in \dom \Xid{H}{map}$ \\ + \THEN \RETURN $\Xid{H}{map}(s)$; \\ + $h \getsr \{0, 1\}^k$; \\ + $\Xid{H}{map} \gets \Xid{H}{map} \cup \{s \mapsto h\}$; \\ + \RETURN $h$; + \next + Oracle $H'(s)$: \+ \\ + \IF $s \in \dom \Xid{H'}{map}$ \\ + \THEN \RETURN $\Xid{H'}{map}(s)$; \\ + $h' \getsr \{0, 1\}^k$; \\ + $\Xid{H'}{map} \gets \Xid{H'}{map} \cup \{s \mapsto h'\}$; \\ + \RETURN $h$; \- \\[\smallskipamount] + Oracle $G(r)$: \+ \\ + \IF $r \in \dom \Xid{G}{map}$ \THEN \\ + \RETURN $\Xid{G}{map}(r)$; \\ + \FOR $s \in \dom \Xid{H}{map}$ \DO \\ \quad \= \+ \kill + $t \gets r \xor \Xid{H}{map}(s);$ \\ + $w \gets s \cat t$; \\ + \IF $f_P(w) = y$ \THEN $z \gets w$; \- \\ + $g \getsr \{0, 1\}^{n-2k}$; \\ + $\Xid{G}{map} \gets \Xid{G}{map} \cup \{r \mapsto g\}$; \\ + \RETURN $g$; + \end{program} + Clearly, $I$ succeeds whenever $A$ queries both $H$ at $s^*$ and $G$ at + $r^*$. $I$'s running time is about $O(q_G q_H)$ because of all of the + searching in the $G$ oracle. + + \end{enumerate} + Thus, + \[ \Pr[F_1] \le + \frac{q_G}{2^k} + \InSec{owf}(\mathcal{T}; t + O(q_G q_H)). \]% + Now observe that, if $F_1$ doesn't occur, Games $\G0$ and~$\G1$ are + indistinguishable. So + \[ \Pr[S_0 \land \lnot F_1] = \Pr[S_1 \land \lnot F_1], \] + so by Shoup's lemma, + \[ |{\Pr[S_0]} - \Pr[S_1]| \le \Pr[F_1]. \] + Rearranging yields: + \[ \Adv{ind-cpa}{\Xid{\mathcal{E}}{OAEP+}^{\mathcal{T}}}(A) + \le 2 \left (\frac{q_G}{2^k} + + \InSec{owf}(\mathcal{T}; t + O(q_G q_H)) \right). \]% + But $A$ was arbitrary. The result follows. + + To complete the plaintext-awareness proof, we must present a plaintext + extractor. + \begin{program} + Plaintext extractor $X(y, P, Q_G, Q_H, Q_{H'}, C)$: \+ \\ + \FOR $z \in \dom Q_{H'}$ \DO \\ \quad \= \+ \kill + \PARSE $z$ \AS $x, k\colon r$; \\ + $c \gets Q_{H'}(z)$; \\ + $s \gets (x \xor Q_G(r)) \cat c$; \\ + \IF $s \in \dom Q_H$ \THEN \\ \quad \= \+ \kill + $t \gets r \xor Q_H(s)$; \\ + $w \gets s \cat t$; \\ + $y' \gets f_P(w)$; \\ + \IF $y = y'$ \THEN \RETURN $x$; \-\- \\ + \RETURN $\bot$; + \end{program} + To obtain a lower bound on the success probability of $X$, consider an + adversary $A$ which queries its various oracles and returns a ciphertext + $y$. We aim to show that, if $A$ did not query all of its oracles as + required for $X$ to work then its probability of producing a valid + ciphertext (i.e., one for which the decryption algorithm does not return + $\bot$) is small. + + We consider the decryption algorithm applied to $y$, and analyse the + probability that $y$ is a valid ciphertext even though $A$ did not make all + of the appropriate oracle queries. + + We shall take some of our notation from the decryption algorithm, which + proceeds as follows: + \begin{program} + Algorithm $\Xid{D}{OAEP+}^ + {\mathcal{T}, G(\cdot), H(\cdot), H'(\cdot)}_K(y)$: \+ \\ + $w \gets T_K(y)$; \\ + \PARSE $w$ \AS $s, k\colon t$; \\ + $r \gets t \xor H(s)$; \\ + \PARSE $s$ \AS $s' \cat k\colon c$; \\ + $x \gets s' \xor G(r)$; \\ + \IF $c = H'(x \cat r)$ \THEN \RETURN $x$; \\ + \ELSE \RETURN $\bot$; + \end{program} + + Firstly, suppose that $A$ did not query $H'$ at $x \cat r$. Then obviously + there is only a $2^{-k}$ probability that $c$ is correct and the ciphertext + will be accepted. By our assumption about adversary behaviour, if $A$ + didn't query $G$ at $r$ then it also didn't query $H'$ at $x \cat r$ for + any $x$. Hence, a decryption algorithm which rejected immediately if it + discovered that $G$ hadn't been queried at $r$, or that $H'$ hadn't been + queried at $x \cat r$, would be incorrect with probability at most + $2^{-k}$. + + Secondly, suppose that $A$ did not query $H$ at $s$. Then $c$ is still + fixed, but $r$ is now random and independent of $A$'s view. The + probability that $G$ has been queried at $r$ is then at most $q_G 2^{-k}$. + So a decryption algorithm which, in addition to the changes above, also + rejected if it discovered that $H$ had not been queried at $s$ would be + incorrect with probability at most $q_G 2^{-k}$. + + But our plaintext extractor $X$ is just such a decryption algorithm. + Hence, $X$'s failure probability is + \[ \epsilon = \frac{q_G + 1}{2^k}. \] +\end{proof} + +\endinput + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: "ips" +%%% End: diff --git a/enc-symm.tex b/enc-symm.tex new file mode 100644 index 0000000..e0f3f01 --- /dev/null +++ b/enc-symm.tex @@ -0,0 +1,1188 @@ +\xcalways\section{Symmetric encryption}\x + +\xcalways\subsection{Syntax}\x + +\begin{slide} + \head{Symmetric encryption syntax} + + A \emph{symmetric encryption scheme} $\mathcal{E} = (E, D)$ is a set of + keys $\keys\mathcal{E}$ (usually $\{0, 1\}^k$ for some integer $k$) and + pair of algorithms: + \begin{itemize} + \item an \emph{encryption} algorithm $E\colon \keys\mathcal{E} \times \{0, + 1\}^* \to \{0, 1\}^*$; and + \item a \emph{decryption} algorithm $D\colon \keys\mathcal{E} \times \{0, + 1\}^* \to \{0, 1\}^*$. + \end{itemize} + We also have a \emph{correctness} requirement: for any $K \in + \keys\mathcal{E}$, and any plaintext $x$, if $E(K, x)$ returns $y$ then + $D(K, y)$ returns $x$. + + We write $E_K(\cdot)$ rather than $E(K, \cdot)$, and $D_K(\cdot)$ rather + than $D(K, \cdot)$. +\end{slide} + +\xcalways\subsection{Security notions}\x + +\begin{slide} + \head{Symmetric encryption security notions} + + In symmetric scheme, an adversary who doesn't know the key can't encrypt + data for itself. To modify our security notions by providing the adversary + with an encryption oracle. + + We consider semantic security, indistinguishability and non-malleability, + against chosen-plaintext and chosen-ciphertext attacks. To make life more + complicated, we have three different indistinguishability notions. + + We use the same notation to decscribe the decryption oracles provided in + various types of attacks: + \begin{tabular}[C]{l Mc Mc } + \hlx*{hv} + Attack & D_0(c) & D_1(c) \\ \hlx{vhv} + CPA & \bot & \bot \\ + CCA1 & D_K(c) & \bot \\ + CCA2 & D_K(c) & D_K(c) \\ \hlx*{vh} + \end{tabular} +\end{slide} + +\begin{slide} + \topic{semantic security} + \head{Semantic security} + + The semantic security game is as for the asymmetric case, except for the + presence of encryption oracles. + + \begin{program} + Experiment $\Expt{sem-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $(\mathcal{M}, s) \gets A^{E_K(\cdot), D_0(\cdot)} + (\cookie{select})$; \\ + $x_0 \getsr \mathcal{M}$; $x_1 \getsr \mathcal{M}$; \\ + $y \gets E_K(x_1)$; \\ + $(f, \alpha) \gets A^{E_K(\cdot), D_1(\cdot)} + (\cookie{predict}, y, s)$; \\ + \IF $f(x_b) = \alpha$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; + \end{program} + The distribution $\mathcal{M}$ must be \emph{valid}: all strings in + $\supp\mathcal{M}$ must have the same length. +\end{slide} + +\begin{slide} + \topic{find-then-guess} + \head{Find-then-guess indistinguishability} + + The `find-then-guess' (FTG) game corresponds to the standard public-key + indistinguishability notion. + \begin{program} + Experiment $\Expt{ftg-\id{atk}-$b$}{\mathcal{E}}(A)$: \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $(x_0, x_1, s) \gets A^{E_K(\cdot), D_0(\cdot)}(\cookie{find})$; \\ + \IF $|x_0| \ne |x_1|$ \THEN \RETURN $0$; \\ + $y \gets E_K(x_b)$; \\ + $b' \gets A^{E_K(\cdot), D_1(\cdot)}(\cookie{guess}, y, s)$; \\ + \RETURN $b'$; + \end{program} +\end{slide} + +\begin{slide} + \topic{left-or-right} + \head{Left-or-right indistinguishability} + + The `left-or-right' (LOR) notion is a natural extension of find-then-guess. + Rather than having to guess the hidden bit using a single challenge + ciphertext, the adversary is allowed to request more as it likes. It is + given an encryption oracle which accepts two arguments: it selects either + the left or the right plaintext, according to the hidden bit, and returns + the ciphertext. + \begin{program} + Experiment $\Expt{lor-\id{atk}-$b$}{\mathcal{E}}(A)$; \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $b' \gets A^{E_K(\id{lr}_b(\cdot, \cdot)), D_1(\cdot)}$; \\ + \RETURN $b'$; \- \\[\smallskipamount] + Function $\id{lr}_b(x_0, x_1)$: \+ \\ + \RETURN $x_b$; + \end{program} + Note that, because the adversary only runs in one stage, we can only + consider chosen-plaintext and adaptive chosen-ciphertext attacks. +\end{slide} + +\begin{slide} + \topic{real-or-random} + \head{Real-or-random indistinguishability} + + The `real-or-random' (ROR) notion is somewhat less natural, but turns out + to be useful for analysing block cipher encryption modes. + + The adversary is given an oracle which, when given a plaintext $x$, either + returns an encryption of $x$ or an encryption of a random string with the + same length as $x$. + \begin{program} + Experiment $\Expt{ror-\id{atk}-$0$}{\mathcal{E}}(A)$; \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $b' \gets A^{E_K(\id{rand}(\cdot)), D_1(\cdot)}$; \\ + \RETURN $b'$; \- \\[\smallskipamount] + Function $\id{rand}(x)$: \+ \\ + $x' \getsr \{0, 1\}^{|x|}$; \\ + \RETURN $x'$; + \next + Experiment $\Expt{ror-\id{atk}-$1$}{\mathcal{E}}(A)$; \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $b' \gets A^{E_K(\cdot), D_1(\cdot)}$; \\ + \RETURN $b'$; + \end{program} + Again, only chosen-plaintext and adaptive chosen-ciphertext attacks are + applicable. +\end{slide} + +\begin{slide} + \topic{relations between notions} + \head{Relations between notions \cite{Bellare:2000:CST}} + + \[ \xymatrix @=2cm { + \txt{LOR} \ar@<0.5ex>[r]^1 \ar@<-0.5ex>[d]_3 & + \txt{ROR} \ar@<0.5ex>[l]^2 \\ + \txt{FTG} \ar@{-->}@<-0.5ex>[u]_4 \ar@<0.5ex>[r] & + \txt{SEM} \ar@<0.5ex>[l] + } \] + \begin{list}{}{ + \settowidth{\labelwidth}{\textbf{Key}} + \leftmargin\labelwidth\advance\leftmargin\labelsep + \itemindent0pt\let\makelabel\textbf} + \item[Key] \begin{itemize} + \item A solid arrow $\xy\ar*+{A};<1.5cm, 0cm>*+{B}\endxy$ indicates an + \emph{security-preserving} reduction: if a scheme is secure in notion + $A$ then it is as secure in notion $B$, to within a small constant + factor. + \item A broken arrow $\xy\ar@{-->}*+{A};<1.5cm, 0cm>*+{B}\endxy$ + indicates a non-security-preserving reduction. + \item The numbers refer to sections of the proof provided in the notes. + \end{itemize} + \end{list} +\end{slide} + +\begin{proof} + We deal with the propositions one at a time. Most of them are pretty easy, + with the exception of the security-losing reduction from FTG to LOR. + \begin{enumerate} + + \item We show that $\text{LOR-\id{atk}} \implies \text{ROR-\id{atk}}$. + Suppose $A'$ attacks $\mathcal{E}$ in the real-or-random sense. + \begin{program} + Adversary $A^{E(\cdot, \cdot), D(\cdot)}$: \+ \\ + \RETURN $A'^{\id{ror-hack}(\cdot), D(\cdot)}$; \-\\[\smallskipamount] + Oracle $\id{ror-hack}(x)$: \+ \\ + $x' \getsr \{0, 1\}^{|x|}$; \\ + \RETURN $E(x', x)$; + \end{program} + Since this provides a perfect simulation of the ROR game, + \[ \Adv{lor-\id{atk}}{\mathcal{E}}(A) = + \Adv{ror-\id{atk}}{\mathcal{E}}(A'), \]% + and hence + \[ \InSec{ror-\id{atk}}(\mathcal{E}; t, q_E, q_D) \le + \InSec{lor-\id{atk}}(\mathcal{E}; t, q_E, q_D). \]% + + \item We show that $\text{ROR-\id{atk}} \implies \text{LOR-\id{atk}}$. + Suppose $A'$ attacks $\mathcal{E}$ in the left-or-right sense. + \begin{program} + Adversary $A^{E(\cdot), D(\cdot)}$: \+ \\ + $\hat{b} \gets \{0, 1\}$; \\ + $b' \gets A'^{\id{lor-hack}(\cdot, \cdot), D(\cdot)}$; \\ + \IF $b' = \hat{b}$ \THEN \RETURN $1$; \\ + \ELSE \RETURN $0$; \- \\[\smallskipamount] + Oracle $\id{lor-hack}(x_0, x_1)$: \+ \\ + \RETURN $E(x_{\hat{b}})$; + \end{program} + If the ROR oracle is returning correct encryptions, then $A'$ will return + the correct bit $\hat{b}$ with probability + \[ \frac{\Adv{lor-\id{atk}}{\mathcal{E}}(A')}{2} + \frac{1}{2}; \] + if the ROR oracle is returning ciphertexts for random plaintexts, then + $A'$ is being given no information about $\hat{b}$, and hence guesses + correctly with probability exactly $\frac{1}{2}$. We conclude that + \[ \Adv{ror-\id{atk}}{\mathcal{E}}(A) = + \frac{1}{2}\Adv{lor-\id{atk}}{\mathcal{E}}(A'), \]% + and hence + \[ \InSec{lor-\id{atk}}(\mathcal{E}; t, q_E, q_D) \le + 2 \cdot \InSec{ror-\id{atk}}(\mathcal{E}; t, q_E, q_D). \]% + + \item We show that $\text{LOR-\id{atk}} \implies \text{FTG-\id{atk}}$. + Suppose $A'$ attacks $\mathcal{E}$ in the find-then-guess sense. We + assume, without loss of generality, that $A'$ never queries its + decryption oracle on ciphertexts it obtained from its encryption oracle. + \begin{program} + Adversary $A^{E(\cdot, \cdot), D(\cdot)}$: \+ \\ + $(x_0, x_1, s) \gets A'^{\id{encrypt}(\cdot), D(\cdot)} + (\cookie{find})$; \\ + $y \gets E(x_0, x_1)$; \\ + $b' \gets A'^{\id{encrypt}(\cdot), D(\cdot)} + (\cookie{guess}, y, s);$ \\ + \RETURN $b'$; \- \\[\smallskipamount] + Oracle $\id{encrypt}(x)$: \+ \\ + \RETURN $E(x, x)$; + \end{program} + Since this provides a perfect simulation of the FTG game, + \[ \Adv{lor-\id{atk}}{\mathcal{E}}(A) = + \Adv{ftg-\id{atk}}{\mathcal{E}}(A'), \]% + and hence + \[ \InSec{ftg-\id{atk}}(\mathcal{E}; t, q_E, q_D) \le + \InSec{lor-\id{atk}}(\mathcal{E}; t, q_E + 1, q_D). \]% + + \item We show that $\text{FTG-\id{atk}} \implies \text{LOR-\id{atk}}$, + though with a factor of $q_E$ loss of security. + + This proof is slightly more tricky than the others. Consider the + following `hybrid' games, defined for $0 \le i \le q_E$: + \begin{program} + Experiment $\Expt{hyb-$i$-\id{atk}-$b$}{\mathcal{E}}$: \+ \\ + $K \getsr \keys\mathcal{E}$; \\ + $j \gets 0$; \\ + $b' \gets A^{E(\id{lr-hack}_b(\cdot, \cdot)), D_1(\cdot)}$; \\ + \RETURN $b'$; \- \\[\smallskipamount] + \next + Function $\id{lr-hack}_b(x_0, x_1)$: \+ \\ + \IF $j < i$ \THEN $x \gets x_0$; \\ + \ELSE \IF $j > i$ \THEN $x \gets x_1$; \\ + \ELSE $x \gets x_b$; \\ + $j \gets j + 1$; \\ + \RETURN $E_K(x_b)$; + \end{program} + As usual, we define + \[ \Adv{hyb-$i$-\id{atk}}{\mathcal{E}}(A) = + \Pr[\Expt{hyb-$i$-\id{atk}-$1$}{\mathcal{E}} = 1] - + \Pr[\Expt{hyb-$i$-\id{atk}-$0$}{\mathcal{E}} = 1]. \]% + + Observe that we have the identities + \begin{eqnarray*}[rl] + \Expt{lor-\id{atk}-$0$}{\mathcal{E}} &\equiv + \Expt{hyb-$(q_E{-}1)$-\id{atk}-$0$}{\mathcal{E}} + \\ + \Expt{lor-\id{atk}-$1$}{\mathcal{E}} &\equiv + \Expt{hyb-$0$-\id{atk}-$1$}{\mathcal{E}} + \\ + \tabpause{and, for $0 \le i < q_E$,} + \Expt{hyb-$i$-\id{atk}-$0$}{\mathcal{E}} &\equiv + \Expt{hyb-$(i{+}1)$-\id{atk}-$1$}{\mathcal{E}}. + \end{eqnarray*} + Thus, + \begin{eqnarray*}[rclclc] + \Adv{lor-\id{atk}}{\mathcal{E}}(A) + &=& \Pr[\Expt{lor-\id{atk}-$1$}{\mathcal{E}}(A) = 1] &-& + \Pr[\Expt{lor-\id{atk}-$0$}{\mathcal{E}}(A) = 1] + \\* + &=& \Pr[\Expt{hyb-$0$-\id{atk}-$1$}{\mathcal{E}}(A) = 1] &-& + \Pr[\Expt{hyb-$(q_E{-}1)$-\id{atk}-$0$}{\mathcal{E}}(A) = 1] + \\ + &=& \Pr[\Expt{hyb-$0$-\id{atk}-$1$}{\mathcal{E}}(A) = 1] &-& + \Pr[\Expt{hyb-$0$-\id{atk}-$0$}{\mathcal{E}}(A) = 1] &+ \\* + & & \Pr[\Expt{hyb-$1$-\id{atk}-$1$}{\mathcal{E}}(A) = 1] &-& + \Pr[\Expt{hyb-$1$-\id{atk}-$0$}{\mathcal{E}}(A) = 1] &+ \\* + & & \multicolumn{1}{c}{\smash\vdots} &-& + \multicolumn{1}{c}{\smash\vdots} &+ \\* + & & \Pr[\Expt{hyb-$(q_E{-}1)$-\id{atk}-$1$}{\mathcal{E}}(A) = 1] &-& + \Pr[\Expt{hyb-$(q_E{-}1)$-\id{atk}-$0$}{\mathcal{E}}(A) = 1] + \\* + &=& \sum_{0\le i