| 1 | %%% -*-latex-*- |
| 2 | %%% |
| 3 | %%% $Id$ |
| 4 | %%% |
| 5 | %%% Definition of the cipher |
| 6 | %%% |
| 7 | %%% (c) 2000 Mark Wooding |
| 8 | %%% |
| 9 | |
| 10 | %%%----- Revision history --------------------------------------------------- |
| 11 | %%% |
| 12 | %%% $Log: storin.tex,v $ |
| 13 | %%% Revision 1.7 2001/03/11 23:46:56 mdw |
| 14 | %%% Fixing to BibTeX stuff. |
| 15 | %%% |
| 16 | %%% Revision 1.6 2001/03/11 23:22:53 mdw |
| 17 | %%% Use BibTeX for the paper bibliography. |
| 18 | %%% |
| 19 | %%% Revision 1.5 2000/07/02 15:22:34 mdw |
| 20 | %%% Overhaul of differential cryptanalysis, including a new attack. |
| 21 | %%% |
| 22 | %%% Revision 1.4 2000/05/28 00:39:32 mdw |
| 23 | %%% Fix some errors. |
| 24 | %%% |
| 25 | %%% Revision 1.3 2000/05/25 19:46:22 mdw |
| 26 | %%% Improve analysis section. |
| 27 | %%% |
| 28 | %%% Revision 1.2 2000/05/21 21:43:26 mdw |
| 29 | %%% Fix a couple of typos. |
| 30 | %%% |
| 31 | %%% Revision 1.1 2000/05/21 11:28:30 mdw |
| 32 | %%% Initial check-in. |
| 33 | %%% |
| 34 | |
| 35 | %%%----- Preamble ----------------------------------------------------------- |
| 36 | |
| 37 | \documentclass[a4paper]{article} |
| 38 | \usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts} |
| 39 | \usepackage{mdwtab} |
| 40 | \usepackage{mathenv} |
| 41 | \usepackage{amsfonts} |
| 42 | \usepackage{mdwmath} |
| 43 | \usepackage{url} |
| 44 | \usepackage[all, dvips]{xy} |
| 45 | |
| 46 | \def\ror{\mathbin{>\!\!>\!\!>}} |
| 47 | \def\rol{\mathbin{<\!\!<\!\!<}} |
| 48 | \def\lsr{\mathbin{>\!\!>}} |
| 49 | \def\lsl{\mathbin{<\!\!<}} |
| 50 | \def\xor{\oplus} |
| 51 | \def\seq#1{{\langle #1 \rangle}} |
| 52 | |
| 53 | \def\hex#1{\texttt{#1}_{16}} |
| 54 | \let\msgid=\url |
| 55 | |
| 56 | \sloppy |
| 57 | |
| 58 | \title{Storin: A block cipher for digital signal processors} |
| 59 | \author{Mark Wooding (\texttt{mdw@nsict.org})} |
| 60 | |
| 61 | %% --- The cipher diagrams --- |
| 62 | |
| 63 | \def\figkeymix#1#2#3#4{% |
| 64 | \ar "a"; p-(0, 0.5)*{\xor} ="a" \ar "a"+(1, 0) *+[r]{k_{#1}}; "a"% |
| 65 | \ar "b"; p-(0, 0.5)*{\xor} ="b" \ar "b"+(1, 0) *+[r]{k_{#2}}; "b"% |
| 66 | \ar "c"; p-(0, 0.5)*{\xor} ="c" \ar "c"+(1, 0) *+[r]{k_{#3}}; "c"% |
| 67 | \ar "d"; p-(0, 0.5)*{\xor} ="d" \ar "d"+(1, 0) *+[r]{k_{#4}}; "d"% |
| 68 | } |
| 69 | |
| 70 | \def\figmatrix{% |
| 71 | \POS "a"+(3, -1) *++=(7, 0)[F]u\txt{Matrix multiply} ="m"% |
| 72 | \ar "a"; "m"+U-(3, 0) \ar "b"; "m"+U-(1, 0)% |
| 73 | \ar "c"; "m"+U+(1, 0) \ar "d"; "m"+U+(3, 0)% |
| 74 | } |
| 75 | |
| 76 | \def\figlintrans{% |
| 77 | \ar "m"+D-(3, 0); "a"-(0, 2.25)*{\xor} ="a"% |
| 78 | \POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 79 | \ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"% |
| 80 | \ar "m"+D-(1, 0); "b"-(0, 2.25)*{\xor} ="b"% |
| 81 | \POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 82 | \ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"% |
| 83 | \ar "m"+D+(1, 0); "c"-(0, 2.25)*{\xor} ="c"% |
| 84 | \POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 85 | \ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"% |
| 86 | \ar "m"+D+(3, 0); "d"-(0, 2.25)*{\xor} ="d"% |
| 87 | \POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 88 | \ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"% |
| 89 | } |
| 90 | |
| 91 | \def\figilintrans{% |
| 92 | \ar "a"; "a"-(0, 1)*{\xor} ="a"% |
| 93 | \POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 94 | \ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"% |
| 95 | \ar "b"; "b"-(0, 1)*{\xor} ="b"% |
| 96 | \POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 97 | \ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"% |
| 98 | \ar "c"; "c"-(0, 1)*{\xor} ="c"% |
| 99 | \POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 100 | \ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"% |
| 101 | \ar "d"; "d"-(0, 1)*{\xor} ="d"% |
| 102 | \POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"% |
| 103 | \ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"% |
| 104 | } |
| 105 | |
| 106 | \def\figstart#1{% |
| 107 | \POS 0;<1cm,0cm>:% |
| 108 | \turnradius{4pt}% |
| 109 | \ar @{-} (0, 0) *+{a#1}; p-(0, 0.5) ="a" |
| 110 | \ar @{-} (2, 0) *+{b#1}; p-(0, 0.5) ="b" |
| 111 | \ar @{-} (4, 0) *+{c#1}; p-(0, 0.5) ="c" |
| 112 | \ar @{-} (6, 0) *+{d#1}; p-(0, 0.5) ="d" |
| 113 | } |
| 114 | |
| 115 | \def\figround#1#2#3#4#5{% |
| 116 | \ar @{.} "a"-(0.5, 0); p+(8, 0)% |
| 117 | \POS "a"+(8, -1.75) *[r]\txt{#5}% |
| 118 | \figkeymix{#1}{#2}{#3}{#4}% |
| 119 | \figmatrix% |
| 120 | \figlintrans% |
| 121 | \ar @{-} "a"; p-(0, .5) ="a" |
| 122 | \ar @{-} "b"; p-(0, .5) ="b" |
| 123 | \ar @{-} "c"; p-(0, .5) ="c" |
| 124 | \ar @{-} "d"; p-(0, .5) ="d" |
| 125 | } |
| 126 | |
| 127 | \def\figiround#1#2#3#4#5{% |
| 128 | \ar @{.} "a"-(0.5, 0); p+(8, 0)% |
| 129 | \POS "a"+(8, -1.75) *[r]\txt{#5}% |
| 130 | \figkeymix{#1}{#2}{#3}{#4}% |
| 131 | \figilintrans% |
| 132 | \figmatrix% |
| 133 | \ar @{-} "m"+D-(3, 0); p-(0, .5) ="a" |
| 134 | \ar @{-} "m"+D-(1, 0); p-(0, .5) ="b" |
| 135 | \ar @{-} "m"+D+(1, 0); p-(0, .5) ="c" |
| 136 | \ar @{-} "m"+D+(3, 0); p-(0, .5) ="d" |
| 137 | } |
| 138 | |
| 139 | \def\figgap{% |
| 140 | \ar @{.} "a"-(0.5, 0); p+(8, 0) |
| 141 | \POS "a"+(8, -1)*[r]\txt{Six more rounds} |
| 142 | \ar @{--} "a"; "a"-(0, 2) ="a" |
| 143 | \ar @{--} "b"; "b"-(0, 2) ="b" |
| 144 | \ar @{--} "c"; "c"-(0, 2) ="c" |
| 145 | \ar @{--} "d"; "d"-(0, 2) ="d" |
| 146 | } |
| 147 | |
| 148 | \def\figwhite#1#2#3#4#5{% |
| 149 | \ar @{.} "a"-(0.5, 0); p+(8, 0) |
| 150 | \POS "a"+(8, -1)*[r]\txt{Postwhitening} |
| 151 | \figkeymix{#1}{#2}{#3}{#4} |
| 152 | \ar "a"; p-(0, 1) *+{a#5} |
| 153 | \ar "b"; p-(0, 1) *+{b#5} |
| 154 | \ar "c"; p-(0, 1) *+{c#5} |
| 155 | \ar "d"; p-(0, 1) *+{d#5} |
| 156 | } |
| 157 | |
| 158 | \begin{document} |
| 159 | \maketitle |
| 160 | |
| 161 | %%%----- The main text ------------------------------------------------------ |
| 162 | |
| 163 | \begin{abstract} |
| 164 | We present Storin: a new 96-bit block cipher designed to play to the |
| 165 | strengths of current digital signal processors (DSPs). In particular, DSPs |
| 166 | tend to provide single-cycle multiply-and-accumulate operations, making |
| 167 | matrix multiplications very cheap. Working in an environment where |
| 168 | multiplication is as fast as exclusive-or changes the usual perceptions |
| 169 | about which operations provide good cryptographic strength cheaply. The |
| 170 | scarcity of available memory, for code and for tables, and a penalty for |
| 171 | nonsequential access to data also make traditional block ciphers based |
| 172 | around substitution tables unsuitable. |
| 173 | \end{abstract} |
| 174 | |
| 175 | \tableofcontents |
| 176 | |
| 177 | \section{Definition of the cipher} |
| 178 | |
| 179 | \subsection{Overview} |
| 180 | |
| 181 | Storin is an eight-round SP network operating on 96-bit blocks. The block |
| 182 | cipher uses 36 24-bit subkey words, derived from a user key by the key |
| 183 | schedule. |
| 184 | |
| 185 | The 96-bit input is split into four 24-bit words. Each round then processes |
| 186 | these four words, using the following three steps: |
| 187 | \begin{enumerate} |
| 188 | \item Mixing in of some key material. Four 24-bit subkey words are XORed |
| 189 | with the four data words. |
| 190 | \item A matrix multiplication mod $2^{24}$. The four words are treated as a |
| 191 | column vector and premultiplied by a $4 \times 4$ vector using addition and |
| 192 | multiplication mod $2^{24}$. This is the main nonlinear step in the |
| 193 | cipher, and it also provides most of the cipher's diffusion. |
| 194 | \item A simple linear transformation, which replaces each word $x$ by $x \xor |
| 195 | (x \lsr 12)$. |
| 196 | \end{enumerate} |
| 197 | The four data words output by the final round are XORed with the last four |
| 198 | subkey words in a final postwhitening stage and combined to form the 96-bit |
| 199 | ciphertext. |
| 200 | |
| 201 | The cipher structure is shown diagrammatically in figure~\ref{fig:cipher}. |
| 202 | |
| 203 | \begin{figure} |
| 204 | \centering |
| 205 | \leavevmode |
| 206 | \begin{xy} |
| 207 | \xycompile{ |
| 208 | \figstart{} |
| 209 | \figround{0}{1}{2}{3}{Round 1} |
| 210 | \figround{4}{5}{6}{7}{Round 2} |
| 211 | \figgap |
| 212 | \figwhite{32}{33}{34}{35}{'}} |
| 213 | \end{xy} |
| 214 | \caption{The Storin encryption function} |
| 215 | \label{fig:cipher} |
| 216 | \end{figure} |
| 217 | |
| 218 | Since the matrix used in step 2 is chosen to be invertible, the cipher can be |
| 219 | inverted readily, simply by performing the inverse steps in the reverse |
| 220 | order. Since the postwhitening stage is the same as a key mixing stage, |
| 221 | decryption can be viewed as eight rounds consisting of key mixing, linear |
| 222 | transformation and matrix multiplication, followed by a postwhitening stage. |
| 223 | Thus, the structure of the inverse cipher is very similar to the forwards |
| 224 | cipher, and uses the same components. The decryption function is shown |
| 225 | diagrammatically in figure~\ref{fig:decipher}. |
| 226 | |
| 227 | \begin{figure} |
| 228 | \centering |
| 229 | \leavevmode |
| 230 | \begin{xy} |
| 231 | \xycompile{ |
| 232 | \figstart{'} |
| 233 | \figiround{32}{33}{34}{35}{Round 1} |
| 234 | \figiround{28}{29}{30}{31}{Round 2} |
| 235 | \figgap |
| 236 | \figwhite{0}{1}{2}{3}{}} |
| 237 | \end{xy} |
| 238 | \caption{The Storin decryption function} |
| 239 | \label{fig:decipher} |
| 240 | \end{figure} |
| 241 | |
| 242 | The key schedule is designed to be simple and to reuse the cipher components |
| 243 | already available. Given a user key, which is a sequence of one or more |
| 244 | 24-bit words, it produces the 36 subkey words required by the cipher. The |
| 245 | key schedule is very similar to Blowfish \cite{Schneier:1994:DNV}. The |
| 246 | subkey array is assigned an initial constant value derived from the matrix |
| 247 | used in the cipher. Words from the user key are XORed into the array, |
| 248 | starting from the beginning, and restarting from the beginning of the user |
| 249 | key when all the user key words are exhausted. A 96-bit block is initialized |
| 250 | to zero, and enciphered with Storin, using the subkeys currently in the |
| 251 | array. The first four subkey words are then replaced with the resulting |
| 252 | ciphertext, which is then encrypted again using the new subkeys. The next |
| 253 | four subkey words are replaced with the ciphertext, and the process |
| 254 | continues, nine times in all, until all of the subkey words have been |
| 255 | replaced. |
| 256 | |
| 257 | The Storin key schedule can in theory accept user keys up to 36 words (864 |
| 258 | bits) long. However, there are known problems with keys longer than 28 words |
| 259 | (672 bits), and these large keys are forbidden. We expect that with long |
| 260 | keys, attacks will be found which are more efficient than an exhaustive |
| 261 | search of the keyspace; we therefore (conservatively) recommend 5 word |
| 262 | (120-bit) keys as a practical maximum. |
| 263 | |
| 264 | |
| 265 | \subsection{Encryption} |
| 266 | |
| 267 | We define $\mathcal{W} = \mathbb{Z}_{2^{24}}$ to be set of 24-bit words, and |
| 268 | $\mathcal{P} = \mathcal{W}^4$ to be the set of four-entry column vectors over |
| 269 | $\mathcal{W}$. Storin plaintext blocks are members of $\mathcal{P}$. |
| 270 | |
| 271 | The Storin encryption function uses 36 24-bit words of key material $k_0$, |
| 272 | $k_1$, \ldots, $k_{35}$, which are produced from the user key by the key |
| 273 | schedule, described below. The key-mixing operation $K_i \colon \mathcal{P} |
| 274 | \to \mathcal{P}$ is defined for $0 \le i < 9$ by: |
| 275 | \[ |
| 276 | K_i \begin{pmatrix} a \\ b \\ c \\d \end{pmatrix} |
| 277 | = |
| 278 | \begin{pmatrix} |
| 279 | a \xor k_{4i} \\ b \xor k_{4i+1} \\ c \xor k_{4i+2} \\ d \xor k_{4i+3} |
| 280 | \end{pmatrix} |
| 281 | \] |
| 282 | |
| 283 | The matrix multiplication operation $M \colon \mathcal{P} \to \mathcal{P}$ |
| 284 | is described by $M(\mathbf{x}) = \mathbf{M} \mathbf{x}$, where $\mathbf{M}$ |
| 285 | is a fixed invertible $4 \times 4$ matrix over $\mathcal{W}$. The value of |
| 286 | $\mathbf{M}$ is defined below. |
| 287 | |
| 288 | The linear transformation $L \colon \mathcal{P} \to \mathcal{P}$ is defined by: |
| 289 | \[ |
| 290 | L \begin{pmatrix} a \\ b \\ c \\ d \end{pmatrix} |
| 291 | = |
| 292 | \begin{pmatrix} |
| 293 | a \xor (a \lsr 12) \\ |
| 294 | b \xor (b \lsr 12) \\ |
| 295 | c \xor (c \lsr 12) \\ |
| 296 | d \xor (d \lsr 12) |
| 297 | \end{pmatrix} |
| 298 | \] |
| 299 | |
| 300 | The round function $R_i \colon \mathcal{P} \to \mathcal{P}$ is defined for $0 |
| 301 | \le i < 8$ by |
| 302 | \[ \bigl(\mathbf{M} K_i(\mathbf{x}) \bigr) \] |
| 303 | |
| 304 | The cipher $C \colon \mathcal{P} \to \mathcal{P}$ is defined in terms of $R_i$ and |
| 305 | $K_i$. Let $\mathbf{x}_0 \in \mathcal{P}$ be a plaintext vector. Let |
| 306 | $\mathbf{x}_{i+1} = R_i(\mathbf{x}_i)$ for $0 \le i < 8$. Then we define |
| 307 | $C(\mathbf{x})$ by setting $C(\mathbf{x}_0) = K_8(\mathbf{x}_8)$. |
| 308 | |
| 309 | |
| 310 | \subsection{Key schedule} |
| 311 | |
| 312 | The key schedule converts a user key, which is a sequence of 24-bit words, |
| 313 | into the 36 subkeys required by the cipher. |
| 314 | |
| 315 | For $i \ge 0$, we define that |
| 316 | \[ |
| 317 | \begin{pmatrix} |
| 318 | m_{16i + 0} & m_{16i + 1} & m_{16i + 2} & m_{16i + 3} \\ |
| 319 | m_{16i + 4} & m_{16i + 5} & m_{16i + 6} & m_{16i + 7} \\ |
| 320 | m_{16i + 8} & m_{16i + 9} & m_{16i + 10} & m_{16i + 11} \\ |
| 321 | m_{16i + 12} & m_{16i + 13} & m_{16i + 14} & m_{16i + 15} |
| 322 | \end{pmatrix} |
| 323 | = \mathbf{M}^{i + 2} |
| 324 | \] |
| 325 | |
| 326 | Let the user-supplied key be $u_0$, $u_1$, \ldots, $u_{n-1}$, for some $n > |
| 327 | 0$. We define the sequence $z_0$, $z_1$, \ldots\ by |
| 328 | \[ z_i = m_i \xor u_{i \bmod n} \] |
| 329 | for $i \ge 0$. |
| 330 | |
| 331 | Denote the result of encrypting vector $\mathbf{x}$ using subkeys from the |
| 332 | sequence $\seq{w} = w_0, w_1, \ldots, w_{35}$ as $C_{\seq{w}}(\mathbf{x})$. |
| 333 | We define the key schedule to be $k_0$, $k_1$, \ldots, $k_{35}$, where: |
| 334 | \begin{eqlines*} |
| 335 | \seq{p^{(i)}} = k_0, k_1, \ldots, k_{4i-1}, z_{4i}, z_{4i+1}, \ldots \\ |
| 336 | \mathbf{x}_0 = \begin{pmatrix} 0 \\ 0 \\ 0 \\ 0 \end{pmatrix}; \qquad |
| 337 | \begin{pmatrix} k_{4i} \\ k_{4i+1} \\ k_{4i+2} \\ k_{4i+3} \end{pmatrix} |
| 338 | = \mathbf{x}_{i+1} = C_{\seq{p^{(i)}}}(\mathbf{x}_i) |
| 339 | \end{eqlines*} |
| 340 | |
| 341 | |
| 342 | \subsection{Decryption} |
| 343 | |
| 344 | The individual operations used during encryption are all invertible. Key |
| 345 | mixing is inverted by taking keys from the other end of the array: |
| 346 | \[ K^{-1}_i(\mathbf{x}) = K_{8-i}(\mathbf{x}) \] |
| 347 | The matrix multiplication may be inverted simply by using the inverse matrix |
| 348 | $\mathbf{M}^{-1}$: |
| 349 | \[ M^{-1}(\mathbf{x}) = \mathbf{M}^{-1} \mathbf{x} \] |
| 350 | Finally, the linear transformation is its own inverse: |
| 351 | \[ L^{-1}(\mathbf{x}) = L(\mathbf{x}) \] |
| 352 | The inverse round function can now be defined as: |
| 353 | \[ R^{-1}_i(\mathbf{x}) = |
| 354 | \mathbf{M}^{-1} L\bigl(K^{-1}_i(\mathbf{x})\bigr) \] |
| 355 | |
| 356 | The decryption function $C^{-1}: \mathcal{P} \to \mathcal{P}$ is defined |
| 357 | in terms of $R^{-1}$ and $K^{-1}$ in a very similar way to encryption. Let |
| 358 | $\mathbf{x}_0$ be a ciphertext vector. Let $\mathbf{x}_{i+1} = |
| 359 | R^{-1}_i(\mathbf{x}_i)$ for $0 \le i < 8$. Then we define |
| 360 | $C^{-1}(\mathbf{x}_0) = K^{-1}_8(\mathbf{x}_8)$. |
| 361 | |
| 362 | |
| 363 | \subsection{Constants} |
| 364 | |
| 365 | The matrix $\mathbf{M}$ and its inverse $\mathbf{M}^{-1}$ are: |
| 366 | \begin{eqnarray*}[rl] |
| 367 | \mathbf{M} = & |
| 368 | \begin{pmatrix} |
| 369 | \hex{f7a413} & \hex{54bd81} & \hex{447550} & \hex{ff4449} \\ |
| 370 | \hex{f31e87} & \hex{d85388} & \hex{de32cb} & \hex{40e3d7} \\ |
| 371 | \hex{d9db1d} & \hex{551b45} & \hex{e9d19f} & \hex{e443de} \\ |
| 372 | \hex{4b949a} & \hex{4d435d} & \hex{ef0a17} & \hex{b784e1} |
| 373 | \end{pmatrix} \\ |
| 374 | \mathbf{M}^{-1} = & |
| 375 | \begin{pmatrix} |
| 376 | \hex{17391b} & \hex{fafb4b} & \hex{a66823} & \hex{f2efb6} \\ |
| 377 | \hex{13e0e5} & \hex{2ed5e4} & \hex{b2cfff} & \hex{d9cdb5} \\ |
| 378 | \hex{2af462} & \hex{33826d} & \hex{de66a1} & \hex{eb6c85} \\ |
| 379 | \hex{c2f423} & \hex{e904a3} & \hex{e772d8} & \hex{d791f1} |
| 380 | \end{pmatrix} |
| 381 | \end{eqnarray*} |
| 382 | |
| 383 | |
| 384 | |
| 385 | \section{Rationale and analysis} |
| 386 | |
| 387 | \subsection{Design decisions} |
| 388 | |
| 389 | The initial objective was to produce a cipher which played to the particular |
| 390 | strengths of digital signal processors. DSPs tend to have good multipliers, |
| 391 | and are particularly good at matrix multiplication. The decision to use a |
| 392 | matrix multiplication over $\mathbb{Z}_{2^{24}}$ seemed natural, given that |
| 393 | 24 bits is a commonly offered word size. |
| 394 | |
| 395 | The choice of a 96-bit block is also fairly natural. A 2 word (48-bit) block |
| 396 | is clearly too small, and a 3 word (72-bit) block is a little on the small |
| 397 | side too. |
| 398 | |
| 399 | |
| 400 | \subsection{Matrix multiplication over $\mathbb{Z}_{2^{24}}$} |
| 401 | |
| 402 | Integer multiplication on a DSP is a cheap source of nonlinearity. Note that |
| 403 | bit $i$ of the result depends on all of the bits in the operands of lesser or |
| 404 | equal significance.position $i$ downwards. |
| 405 | |
| 406 | The decision to make the $4 \times 4$ matrix fixed was taken fairly early on. |
| 407 | Generating invertible matrices from key material seemed like too much work to |
| 408 | expect from the DSP. |
| 409 | |
| 410 | The matrix is generated pseudorandomly from a seed string, using SHA-1. The |
| 411 | criteria we used to choose the matrix are: |
| 412 | \begin{enumerate} |
| 413 | \item The matrix must be invertible. |
| 414 | \item Exactly one entry in each row and column of the matrix must be even. |
| 415 | \end{enumerate} |
| 416 | Criterion 1 is obvious. Criterion 2 encourages diffusion between the entries |
| 417 | in the block vector. Note that if a matrix satisfies the second criterion, |
| 418 | its inverse also does. |
| 419 | |
| 420 | Consider a vector $\mathbf{x}$ and its product with the matrix $\mathbf{M} |
| 421 | \mathbf{x}$. Whether the top bit of entry $i$ in $\mathbf{x}$ affects |
| 422 | entry $j$ in the product depends on whether the entry in row $j$, column $i$ |
| 423 | of $\mathbf{M}$ is even. Criterion 2 ensures the following: |
| 424 | \begin{itemize} |
| 425 | \item A top-bit change in a single word affects three words in the output. |
| 426 | \item A top-bit change in two words affects two words in the output. |
| 427 | \end{itemize} |
| 428 | |
| 429 | The seed string used is \texttt{matrix-seed-string}. The program which |
| 430 | generates the matrix is included with the Storin example source code. |
| 431 | |
| 432 | \subsection{The linear transformation} |
| 433 | |
| 434 | A bit change in one of the inputs to the matrix can only affect bits at that |
| 435 | position and higher in the output. The linear transformation at the end of |
| 436 | the round aims to provide diffusion from the high-order bits back to the |
| 437 | low-order bits. |
| 438 | |
| 439 | A single high-order bit change in the input to a round will affect the |
| 440 | high-order bits of three words in the output of the matrix multiply. The |
| 441 | linear transformation causes it to affect bits in the low halves of each of |
| 442 | these words. The second round's multiplication causes these bits to affect |
| 443 | the whole top halves of all of the output words. The linear transformation |
| 444 | propagates this change to the bottom halves. Complete avalanche is therefore |
| 445 | achieved after three rounds of Storin. |
| 446 | |
| 447 | |
| 448 | \subsection{Key schedule notes} |
| 449 | |
| 450 | The key schedule is intended to be adequate for bulk encryption; it doesn't |
| 451 | provide good key agility, and isn't intended to. The key schedule accepts up |
| 452 | to 28 words of user key, although expecting 672 bits of security from the |
| 453 | cipher is not realistic. The suggested maximum of 5 words (120 bits) seems |
| 454 | more sensible. This maximum can be raised easily when our understanding of |
| 455 | the cipher increases our confidence in it. |
| 456 | |
| 457 | The key schedule is strongly reminiscent of Blowfish |
| 458 | \cite{Schneier:1994:DNV}. Use of existing components of the cipher, such as |
| 459 | the matrix multiplication and the cipher itself, help reduce the amount of |
| 460 | code required in the implementation. |
| 461 | |
| 462 | The restriction of the key schedule to 28 words is due to an interesting |
| 463 | property, also shared by Blowfish (see figure~\ref{fig:bfkeysched}): the |
| 464 | output of the first round of the second encryption doesn't depend on the |
| 465 | previous round. To see why this is so, it is enough to note that the first |
| 466 | round key has just been set equal to what is now the plaintext; the result of |
| 467 | the key mixing stage is zero, which is unaffected by the matrix and linear |
| 468 | transformation. |
| 469 | |
| 470 | A limit of 28 words is chosen to ensure that the round-1 key affects the |
| 471 | round-2 key in a part of the cipher earlier than the postwhitening stage. |
| 472 | |
| 473 | \begin{figure} |
| 474 | \centering |
| 475 | \leavevmode |
| 476 | \begin{xy} |
| 477 | \xycompile{ |
| 478 | \POS 0; <0.7cm, 0cm>: |
| 479 | \POS (0, 0) ="o" +(3, 0) ="w" |
| 480 | \ar "o" *+{P[0]}; p-(0, 1) *{\xor} ="x" |
| 481 | \ar "x" -(1, 0) *+[l]{P[0]}; "x" |
| 482 | \ar@{-} "x"; p-(0, 2) ="as" |
| 483 | \ar "w" *+{P[1]}; p-(0, 2) *{\xor} ="x" |
| 484 | \ar "o"-(0, 2); "x" |-*+[F]{F} |
| 485 | \ar@{-} "x"; p-(0, 1) ="bs" |
| 486 | \ar@{-} "as"; "bs"-(0, 1) ="w" |
| 487 | \ar@{-} "bs"; "as"-(0, 1) ="o" |
| 488 | \ar "o"; p-(0, 1) *+{P[1] \xor F(0)} ="x" |
| 489 | \ar "x"; p-(0, 1) *{\xor} ="x" |
| 490 | \ar "x" -(1, 0) *+[l]{P[1]}; "x" |
| 491 | \ar "x"; p-(0, 2) *+{F(0)} |
| 492 | \ar "w"; p-(0, 1) *+{0} ="x" |
| 493 | \ar "x"; p-(0, 2) *{\xor} ="x" |
| 494 | \ar "o"-(0, 3); "x" |-*+[F]{F} |
| 495 | \ar "x"; p-(0, 1) *+{F^2(0)}} |
| 496 | \end{xy} |
| 497 | \caption{Blowfish key schedule: $P[2]$ and $P[3]$ don't depend on $P[0]$ and |
| 498 | $P[1]$.} |
| 499 | \label{fig:bfkeysched} |
| 500 | \end{figure} |
| 501 | |
| 502 | \subsection{Attacking Storin} |
| 503 | |
| 504 | \subsubsection{Differential cryptanalysis} |
| 505 | |
| 506 | There is a two-round truncated differential \cite{Wooding:2000:Storin-diff}, |
| 507 | which can be used to break Storin reduced to only 2 rounds. The differential |
| 508 | \[ \begin{pmatrix} |
| 509 | 1 \lsl 23 \\ 1 \lsl 23 \\ 1 \lsl 23 \\ 0 |
| 510 | \end{pmatrix} \to |
| 511 | \begin{pmatrix} |
| 512 | 0 \\ 0 \\ 1 \lsl 23 \\ 0 |
| 513 | \end{pmatrix} |
| 514 | \] |
| 515 | holds with probability 1 through the matrix multiplication. |
| 516 | Differentials in the linear transform are easy to find; for example: |
| 517 | \[ \begin{pmatrix} |
| 518 | 0 \\ 0 \\ 1 \lsl 23 \\ 0 |
| 519 | \end{pmatrix} \to |
| 520 | \begin{pmatrix} |
| 521 | 0 \\ 0 \\ (1 \lsl 23) \xor (1 \lsl 11) \\ 0 |
| 522 | \end{pmatrix} |
| 523 | \] |
| 524 | We can continue through the second round's matrix multiplication with a |
| 525 | truncated differential, again with probability 1: |
| 526 | \[ \begin{pmatrix} |
| 527 | 0 \\ 0 \\ (1 \lsl 23) \xor (1 \lsl 11) \\ 0 |
| 528 | \end{pmatrix} \to |
| 529 | \begin{pmatrix} |
| 530 | \delta_0 \lsl 12 \\ |
| 531 | (\delta_1 \lsl 12) \xor (1 \lsl 11) \\ |
| 532 | (\delta_2 \lsl 12) \xor (1 \lsl 11) \\ |
| 533 | (\delta_3 \lsl 12) \xor (1 \lsl 11) \\ |
| 534 | \end{pmatrix} |
| 535 | \] |
| 536 | where the $\delta_i$ are unknown 12-bit values. Applying the linear |
| 537 | transformation to this output difference gives us |
| 538 | \[ \begin{pmatrix} |
| 539 | \delta_0 \lsl 12 \\ |
| 540 | (\delta_1 \lsl 12) \xor (1 \lsl 11) \\ |
| 541 | (\delta_2 \lsl 12) \xor (1 \lsl 11) \\ |
| 542 | (\delta_3 \lsl 12) \xor (1 \lsl 11) \\ |
| 543 | \end{pmatrix} \to |
| 544 | \begin{pmatrix} |
| 545 | (\delta_0 \lsl 12) \xor \delta_0 \\ |
| 546 | (\delta_1 \lsl 12) \xor \delta_1 \xor (1 \lsl 11) \\ |
| 547 | (\delta_2 \lsl 12) \xor \delta_2 \xor (1 \lsl 11) \\ |
| 548 | (\delta_3 \lsl 12) \xor \delta_3 \xor (1 \lsl 11) \\ |
| 549 | \end{pmatrix} |
| 550 | \] |
| 551 | A subsequent key-mixing or postwhitening stage won't affect the difference. |
| 552 | We can therefore combine the differentials above to construct a probability-1 |
| 553 | truncated differential for a 2-round variant of Storin: |
| 554 | \[ \begin{pmatrix} |
| 555 | 1 \lsl 23 \\ 1 \lsl 23 \\ 1 \lsl 23 \\ 0 |
| 556 | \end{pmatrix} \to |
| 557 | \begin{pmatrix} |
| 558 | (\delta_0 \lsl 12) \xor \delta_0 \\ |
| 559 | (\delta_1 \lsl 12) \xor \delta_1 \xor (1 \lsl 11) \\ |
| 560 | (\delta_2 \lsl 12) \xor \delta_2 \xor (1 \lsl 11) \\ |
| 561 | (\delta_3 \lsl 12) \xor \delta_3 \xor (1 \lsl 11) \\ |
| 562 | \end{pmatrix} |
| 563 | \] |
| 564 | This characteristic is non-iterative, and can't be extended to more rounds. |
| 565 | |
| 566 | The differential can be converted into a key-recovery attack against $n$ |
| 567 | rounds fairly easily, by obtaining the ciphertext for an appropriate |
| 568 | plaintext pair and guessing the $n - 2$ round keys, testing the guesses by |
| 569 | working backwards and finding out whether the expected output difference is |
| 570 | visible. The attack requires a pair of chosen plaintexts, and |
| 571 | $O(2^{96(n - 2)})$ work. It is only more efficient than exhaustive search |
| 572 | when the key is longer than $96(n - 2)$ bits. |
| 573 | |
| 574 | This attack can be improved. Consider a 3-round variant of Storin, where the |
| 575 | objective is to discover the postwhitening keys. The postwhitening stage can |
| 576 | be commuted with the linear transform simply by applying the transform to the |
| 577 | postwhitening keys. We do this, and guess the least significant 12 bits of |
| 578 | each of the (transformed) postwhitening key words. Working through the |
| 579 | matrix multiplication mod $2^{12}$ rather than mod $2^{24}$ then gives us the |
| 580 | 12 least significant bits of the state words on input to the matrix. Further |
| 581 | key bits can then be guessed and tested, four at a time, to recover the |
| 582 | remaining postwhitening key bits, by ensuring that the differences in the |
| 583 | more significant bits of the third round matrix input obey the characteristic |
| 584 | described above. This requires only about $2^{48}$ work, and may be extended |
| 585 | to further rounds by exhaustively searching for the extra round keys. |
| 586 | |
| 587 | This attack can break Storin with $n$ rounds ($n \ge 3$) with minimal chosen |
| 588 | plaintext and $O(2^{48 + 96(n - 3)})$ work. This is the best attack known |
| 589 | against Storin. |
| 590 | |
| 591 | \subsubsection{Other attacks} |
| 592 | |
| 593 | In \cite{Fisher:2000:Storin-collide}, Matthew Fisher speculates on breaking 2 |
| 594 | rounds of Storin by forcing collisions in the matrix multiplication outputs. |
| 595 | This attack doesn't extend to more than two rounds either. |
| 596 | |
| 597 | One possible avenue of attack worth exploring is to attempt to cause zero |
| 598 | words to be input into the first-round matrix by choosing plaintext words |
| 599 | identical to subkey words for the first round. Causing $n$ matrix input |
| 600 | words to be zero clearly takes $O(2^{24n})$ time. If a method can be found |
| 601 | to detect when zero words have been input to the matrix, this can be used to |
| 602 | discover the subkey words rather more rapidly than exhaustive search. We |
| 603 | can't see a way to exploit this at the moment, but it could be a fruitful |
| 604 | place to look for cryptanalysis. |
| 605 | |
| 606 | |
| 607 | \section{Conclusion} |
| 608 | |
| 609 | We have presented a new block cipher, Storin. Any cryptanalysis will be |
| 610 | received with interest. |
| 611 | |
| 612 | \bibliographystyle{alpha} |
| 613 | \bibliography{cryptography,mdw} |
| 614 | |
| 615 | %%%----- That's all, folks -------------------------------------------------- |
| 616 | |
| 617 | \end{document} |
| 618 | |
| 619 | %%% Local Variables: |
| 620 | %%% mode: latex |
| 621 | %%% TeX-master: t |
| 622 | %%% End: |