[storin] / storin.tex

%%% -*-latex-*-
%%%
%%% $Id: storin.tex,v 1.2 2000/05/21 21:43:26 mdw Exp $
%%%
%%% Definition of the cipher
%%%
%%% (c) 2000 Mark Wooding
%%%

%%%----- Revision history ---------------------------------------------------
%%%
%%% $Log: storin.tex,v $
%%% Revision 1.2  2000/05/21 21:43:26  mdw
%%% Fix a couple of typos.
%%%
%%% Revision 1.1  2000/05/21 11:28:30  mdw
%%% Initial check-in.
%%%

%%%----- Preamble -----------------------------------------------------------

\documentclass[a4paper]{article}
\usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
\usepackage{mdwtab}
\usepackage{mathenv}
\usepackage{amsfonts}
\usepackage{mdwmath}
\usepackage[all, dvips]{xy}

\def\ror{\mathbin{>\!\!>\!\!>}}
\def\rol{\mathbin{<\!\!<\!\!<}}
\def\lsr{\mathbin{>\!\!>}}
\def\lsl{\mathbin{<\!\!<}}
\def\xor{\oplus}
\def\seq#1{{\langle #1 \rangle}}

\sloppy

\title{Storin: A block cipher for digital signal processors}
\author{Mark Wooding (\texttt{mdw@nsict.org})}

%% --- The cipher diagrams ---

\def\figkeymix#1#2#3#4{%
  \ar "a"; p-(0, 0.5)*{\xor} ="a" \ar "a"+(1, 0) *+[r]{k_{#1}}; "a"%
  \ar "b"; p-(0, 0.5)*{\xor} ="b" \ar "b"+(1, 0) *+[r]{k_{#2}}; "b"%
  \ar "c"; p-(0, 0.5)*{\xor} ="c" \ar "c"+(1, 0) *+[r]{k_{#3}}; "c"%
  \ar "d"; p-(0, 0.5)*{\xor} ="d" \ar "d"+(1, 0) *+[r]{k_{#4}}; "d"%
}

\def\figmatrix{%
  \POS "a"+(3, -1) *++=(7, 0)[F]u\txt{Matrix multiply} ="m"%
  \ar "a"; "m"+U-(3, 0) \ar "b"; "m"+U-(1, 0)%
  \ar "c"; "m"+U+(1, 0) \ar "d"; "m"+U+(3, 0)%
}

\def\figlintrans{%
  \ar "m"+D-(3, 0); "a"-(0, 2.25)*{\xor} ="a"%
  \POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"%
  \ar "m"+D-(1, 0); "b"-(0, 2.25)*{\xor} ="b"%
  \POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"%
  \ar "m"+D+(1, 0); "c"-(0, 2.25)*{\xor} ="c"%
  \POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"%
  \ar "m"+D+(3, 0); "d"-(0, 2.25)*{\xor} ="d"%
  \POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"%
}

\def\figilintrans{%
  \ar "a"; "a"-(0, 1)*{\xor} ="a"%
  \POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"%
  \ar "b"; "b"-(0, 1)*{\xor} ="b"%
  \POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"%
  \ar "c"; "c"-(0, 1)*{\xor} ="c"%
  \POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"%
  \ar "d"; "d"-(0, 1)*{\xor} ="d"%
  \POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"%
  \ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"%
}

\def\figstart#1{%
  \POS 0;<1cm,0cm>:%
  \turnradius{4pt}%
  \ar @{-} (0, 0) *+{a#1}; p-(0, 0.5) ="a"
  \ar @{-} (2, 0) *+{b#1}; p-(0, 0.5) ="b"
  \ar @{-} (4, 0) *+{c#1}; p-(0, 0.5) ="c"
  \ar @{-} (6, 0) *+{d#1}; p-(0, 0.5) ="d"
}

\def\figround#1#2#3#4#5{%
  \ar @{.} "a"-(0.5, 0); p+(8, 0)%
  \POS "a"+(8, -1.75) *[r]\txt{#5}%
  \figkeymix{#1}{#2}{#3}{#4}%
  \figmatrix%
  \figlintrans%
  \ar @{-} "a"; p-(0, .5) ="a"
  \ar @{-} "b"; p-(0, .5) ="b"
  \ar @{-} "c"; p-(0, .5) ="c"
  \ar @{-} "d"; p-(0, .5) ="d"
}

\def\figiround#1#2#3#4#5{%
  \ar @{.} "a"-(0.5, 0); p+(8, 0)%
  \POS "a"+(8, -1.75) *[r]\txt{#5}%
  \figkeymix{#1}{#2}{#3}{#4}%
  \figilintrans%
  \figmatrix%
  \ar @{-} "m"+D-(3, 0); p-(0, .5) ="a"
  \ar @{-} "m"+D-(1, 0); p-(0, .5) ="b"
  \ar @{-} "m"+D+(1, 0); p-(0, .5) ="c"
  \ar @{-} "m"+D+(3, 0); p-(0, .5) ="d"
}

\def\figgap{%
  \ar @{.} "a"-(0.5, 0); p+(8, 0)
  \POS "a"+(8, -1)*[r]\txt{Six more rounds}
  \ar @{--} "a"; "a"-(0, 2) ="a"
  \ar @{--} "b"; "b"-(0, 2) ="b"
  \ar @{--} "c"; "c"-(0, 2) ="c"
  \ar @{--} "d"; "d"-(0, 2) ="d"
}

\def\figwhite#1#2#3#4#5{%
  \ar @{.} "a"-(0.5, 0); p+(8, 0)
  \POS "a"+(8, -1)*[r]\txt{Postwhitening}
  \figkeymix{#1}{#2}{#3}{#4}
  \ar "a"; p-(0, 1) *+{a#5}
  \ar "b"; p-(0, 1) *+{b#5}
  \ar "c"; p-(0, 1) *+{c#5}
  \ar "d"; p-(0, 1) *+{d#5}
}

\begin{document}
\maketitle

%%%----- The main text ------------------------------------------------------

\begin{abstract}
  We present Storin: a new 96-bit block cipher designed to play to the
  strengths of current digital signal processors (DSPs).  In particular, DSPs
  tend to provide single-cycle multiply-and-accumulate operations, making
  matrix multiplications very cheap.  Working in an environment where
  multiplication is as fast as exclusive-or changes the usual perceptions
  about which operations provide good cryptographic strength cheaply.  The
  scarcity of available memory, for code and for tables, and a penalty for
  nonsequential access to data also make traditional block ciphers based
  around substitution tables unsuitable.
\end{abstract}

\tableofcontents

\section{Definition of the cipher}

\subsection{Overview}

Storin is an eight-round SP network operating on 96-bit blocks.  The block
cipher uses 36 24-bit subkey words, derived from a user key by the key
schedule.

The 96-bit input is split into four 24-bit words.  Each round then processes
these four words, using the following three steps:
\begin{enumerate}
\item Mixing in of some key material.  Four 24-bit subkey words are XORed
  with the four data words.
\item A matrix multiplication mod $2^{24}$.  The four words are treated as a
  column vector and premultiplied by a $4 \times 4$ vector using addition and
  multiplication mod $2^{24}$.  This is the main nonlinear step in the
  cipher, and it also provides most of the cipher's diffusion.
\item A simple linear transformation, which replaces each word $x$ by $x \xor
  (x \lsr 12)$.
\end{enumerate}
The four data words output by the final round are XORed with the last four
subkey words in a final postwhitening stage and combined to form the 96-bit
ciphertext.

The cipher structure is shown diagrammatically in figure~\ref{fig:cipher}.

\begin{figure}
\centering
\leavevmode
\begin{xy}
  \xycompile{
    \figstart{}
    \figround{0}{1}{2}{3}{Round 1}
    \figround{4}{5}{6}{7}{Round 2}
    \figgap
    \figwhite{32}{33}{34}{35}{'}}
\end{xy}
\caption{The Storin encryption function}
\label{fig:cipher}
\end{figure}

Since the matrix used in step 2 is chosen to be invertible, the cipher can be
inverted readily, simply by performing the inverse steps in the reverse
order.  Since the postwhitening stage is the same as a key mixing stage,
decryption can be viewed as eight rounds consisting of key mixing, linear
transformation and matrix multiplication, followed by a postwhitening stage.
Thus, the structure of the inverse cipher is very similar to the forwards
cipher, and uses the same components.  The decryption function is shown
diagrammatically in figure~\ref{fig:decipher}.

\begin{figure}
\centering
\leavevmode
\begin{xy}
  \xycompile{
    \figstart{'}
    \figiround{32}{33}{34}{35}{Round 1}
    \figiround{28}{29}{30}{31}{Round 2}
    \figgap
    \figwhite{0}{1}{2}{3}{}}
\end{xy}
\caption{The Storin decryption function}
\label{fig:decipher}
\end{figure}

The key schedule is designed to be simple and to reuse the cipher components
already available.  Given a user key, which is a sequence of one or more
24-bit words, it produces the 36 subkey words required by the cipher.  The
key schedule is very similar to Blowfish \cite{blowfish}.  The subkey array
is assigned an initial constant value derived from the matrix used in the
cipher.  Words from the user key are XORed into the array, starting from the
beginning, and restarting from the beginning of the user key when all the
user key words are exhausted.  A 96-bit block is initialized to zero, and
enciphered with Storin, using the subkeys currently in the array.  The first
four subkey words are then replaced with the resulting ciphertext, which is
then encrypted again using the new subkeys.  The next four subkey words are
replaced with the ciphertext, and the process continues, nine times in all,
until all of the subkey words have been replaced.

The Storin key schedule can accept user keys up to 36 words (864 bits) long.
It is unrealistic, however, to expect this much strength from the cipher.  We
recommend against using keys longer than 5 words (120 bits).


\subsection{Encryption}

We define $\mathcal{W} = \mathbb{Z}_{2^{24}}$ to be set of 24-bit words, and
$\mathcal{P} = \mathcal{W}^4$ to be the set of four-entry column vectors over
$\mathcal{W}$.  Storin plaintext blocks are members of $\mathcal{P}$.

The Storin encryption function uses 36 24-bit words of key material $k_0$,
$k_1$, \ldots, $k_{35}$, which are produced from the user key by the key
schedule, described below.  The key-mixing operation $K_i: \mathcal{P}
\rightarrow \mathcal{P}$ is defined for $0 \le i < 9$ by:
\[
  K_i \begin{pmatrix} a \\ b \\ c \\d \end{pmatrix}
  =
  \begin{pmatrix}
    a \xor k_{4i} \\ b \xor k_{4i+1} \\ c \xor k_{4i+2} \\ d \xor k_{4i+3}
  \end{pmatrix}
\]

The matrix multiplication operation $M: \mathcal{P} \to \mathcal{P}$
is described by $M(\mathbf{x}) = \mathbf{M} \mathbf{x}$, where $\mathbf{M}$
is a fixed invertible $4 \times 4$ matrix over $\mathcal{W}$.  The value of
$\mathbf{M}$ is defined below.

The linear transformation $L: \mathcal{P} \to \mathcal{P}$ is defined by:
\[
  L \begin{pmatrix} a \\ b \\ c \\ d \end{pmatrix}
  =
  \begin{pmatrix}
    a \xor (a \lsr 12) \\
    b \xor (b \lsr 12) \\
    c \xor (c \lsr 12) \\
    d \xor (d \lsr 12)
  \end{pmatrix}
\]

The round function $R_i: \mathcal{P} \to \mathcal{P}$ is defined for $0 \le i
< 8$ by
\[ R_i(\mathbf{x}) = L\bigl(\mathbf{M} K_i(\mathbf{x}) \bigr) \]

The cipher $C: \mathcal{P} \to \mathcal{P}$ is defined in terms of $R_i$ and
$K_i$.  Let $\mathbf{x}_0 \in \mathcal{P}$ be a plaintext vector.  Let
$\mathbf{x}_{i+1} = R_i(\mathbf{x}_i)$ for $0 \le i < 8$.  Then we define
$C(\mathbf{x})$ by setting $C(\mathbf{x}_0) = K_8(\mathbf{x}_8)$.


\subsection{Key schedule}

The key schedule converts a user key, which is a sequence of 24-bit words,
into the 36 subkeys required by the cipher.

For $i \ge 0$, we define that
\[
\begin{pmatrix}
  m_{16i +  0} & m_{16i +  1} & m_{16i +  2} & m_{16i +  3} \\
  m_{16i +  4} & m_{16i +  5} & m_{16i +  6} & m_{16i +  7} \\
  m_{16i +  8} & m_{16i +  9} & m_{16i + 10} & m_{16i + 11} \\
  m_{16i + 12} & m_{16i + 13} & m_{16i + 14} & m_{16i + 15}
\end{pmatrix}
= \mathbf{M}^{i + 2}
\]

Let the user-supplied key be $u_0$, $u_1$, \ldots, $u_{n-1}$, for some $n >
0$.  We define the sequence $z_0$, $z_1$, \ldots\ by
\[ z_i = m_i \xor u_{i \bmod n} \]
for $i \ge 0$.

Denote the result of encrypting vector $\mathbf{x}$ using subkeys from the
sequence $\seq{w} = w_0, w_1, \ldots, w_{35}$ as $C_{\seq{w}}(\mathbf{x})$.
We define the key schedule to be $k_0$, $k_1$, \ldots, $k_{35}$, where:
\begin{eqlines*}
  \seq{p^{(i)}} = k_0, k_1, \ldots, k_{4i-1}, z_{4i}, z_{4i+1}, \ldots \\
  \mathbf{x}_0 = \begin{pmatrix} 0 \\ 0 \\ 0 \\ 0 \end{pmatrix}; \qquad
  \begin{pmatrix} k_{4i} \\ k_{4i+1} \\ k_{4i+2} \\ k_{4i+3} \end{pmatrix}
    = \mathbf{x}_{i+1} = C_{\seq{p^{(i)}}}(\mathbf{x}_i)
\end{eqlines*}


\subsection{Decryption}

The individual operations used during encryption are all invertible.  Key
mixing is inverted by taking keys from the other end of the array:
\[ K^{-1}_i(\mathbf{x}) = K_{8-i}(\mathbf{x}) \]
The matrix multiplication may be inverted simply by using the inverse matrix
$\mathbf{M}^{-1}$:
\[ M^{-1}(\mathbf{x}) = \mathbf{M}^{-1} \mathbf{x} \]
Finally, the linear transformation is its own inverse:
\[ L^{-1}(\mathbf{x}) = L(\mathbf{x}) \]
The inverse round function can now be defined as:
\[ R^{-1}_i(\mathbf{x}) =
    \mathbf{M}^{-1} L\bigl(K^{-1}_i(\mathbf{x})\bigr) \]
    
The decryption function $C^{-1}: \mathcal{P} \to \mathcal{P}$ is defined
in terms of $R^{-1}$ and $K^{-1}$ in a very similar way to encryption.  Let
$\mathbf{x}_0$ be a ciphertext vector.  Let $\mathbf{x}_{i+1} =
R^{-1}_i(\mathbf{x}_i)$ for $0 \le i < 8$.  Then we define
$C^{-1}(\mathbf{x}_0) = K^{-1}_8(\mathbf{x}_8)$.


\subsection{Constants}

The matrix $\mathbf{M}$ and its inverse $\mathbf{M}^{-1}$ are:
\begin{eqnarray*}[rl]
  \mathbf{M} = &
  \begin{pmatrix}[[>{\hbox\bgroup\ttfamily}c<{\egroup}]
    f7a413 & 54bd81 & 447550 & ff4449 \\
    f31e87 & d85388 & de32cb & 40e3d7 \\
    d9db1d & 551b45 & e9d19f & e443de \\
    4b949a & 4d435d & ef0a17 & b784e1
  \end{pmatrix} \\
  \mathbf{M}^{-1} = &
  \begin{pmatrix}[[>{\hbox\bgroup\ttfamily}c<{\egroup}]
    17391b & fafb4b & a66823 & f2efb6 \\
    13e0e5 & 2ed5e4 & b2cfff & d9cdb5 \\
    2af462 & 33826d & de66a1 & eb6c85 \\
    c2f423 & e904a3 & e772d8 & d791f1
  \end{pmatrix}
\end{eqnarray*}


\section{Rationale and analysis}

\subsection{Design decisions}

The initial objective was to produce a cipher which played to the particular
strengths of digital signal processors.  DSPs tend to have good multipliers,
and are particularly good at matrix multiplication.  The decision to use a
matrix multiplication over $\mathbb{Z}_{2^{24}}$ seemed natural, given that
24 bits is a commonly offered word size.

The choice of a 96-bit block is also fairly natural.  A 2 word (48-bit) block
is clearly too small, and a 3 word (72-bit) block is a little on the small
side too.


\subsection{Matrix multiplication over $\mathbb{Z}_{2^{24}}$}

Integer multiplication on a DSP is a cheap source of nonlinearity.  Note that
bit $i$ of the result depends on all of the bits in the operands of lesser or
equal significance.position $i$ downwards.

The decision to make the $4 \times 4$ matrix fixed was taken fairly early on.
Generating invertible matrices from key material seemed like too much work to
expect from the DSP.

The matrix is generated pseudorandomly from a seed string, using SHA-1.  The
criteria we used to choose the matrix are:
\begin{enumerate}
\item The matrix must be invertible.
\item Exactly one entry in each row and column of the matrix must be even.
\end{enumerate}
Criterion 1 is obvious.  Criterion 2 encourages diffusion between the entries
in the block vector.  Note that if a matrix satisfies the second criterion,
its inverse also does.

Consider a vector $\mathbf{x}$ and its product with the matrix $\mathbf{M}
\mathbf{x}$.  Whether the top bit of entry $i$ in $\mathbf{x}$ affects
entry $j$ in the product depends on whether the entry in row $j$, column $i$
of $\mathbf{M}$ is even.  Criterion 2 ensures the following:
\begin{itemize}
\item A top-bit change in a single word or three words affects three words in
  the output.
\item A top-bit change in two words affects two words in the output.
\end{itemize}

The seed string used is \texttt{matrix-seed-string}.  The program which
generates the matrix is included with the Storin example source code.

\subsection{The linear transformation}

A bit change in one of the inputs to the matrix can only affect bits at that
position and higher in the output.  The linear transformation at the end of
the round aims to provide diffusion from the high-order bits back to the
low-order bits.

A single high-order bit change in the input to a round will affect the
high-order bits of three words in the output of the matrix multiply.  The
linear transformation causes it to affect bits in the low halves of each of
these words.  The second round's multiplication causes these bits to affect
the whole top halves of all of the output words.  The linear transformation
propagates this change to the bottom halves.  Complete avalanche is therefore
achieved after three rounds of Storin.


\subsection{Key schedule notes}

The key schedule is intended to be adequate for bulk encryption; it doesn't
provide good key agility, and isn't intended to.  The key schedule accepts an
arbitrary number of 24-bit words, although expecting 864 bits of security
from the cipher is not realistic.  The suggested maximum of 5 words (120
bits) seems more sensible.  This maximum can be raised easily when our
understanding of the cipher increases our confidence in it.

The key schedule is strongly reminiscent of Blowfish \cite{blowfish}.  Use of
existing components of the cipher, such as the matrix multiplication and the
cipher itself, help reduce the amount of code required in the implementation.

There is an interesting feature of the key schedule: the output of the first
round of the second encryption is zero.  To see why this is so, it is enough
to note that the first round key has just been set equal to what is now the
plaintext; the result of the key mixing stage is zero, which is unaffected by
the matrix and linear transformation.


\subsection{Attacking Storin}

A brief\footnote{About three days' worth on a 300MHz Pentium II.}
computerized analysis of the matrix multiplication failed to turn up any
high-probability differential characteristics.  While an exhaustive search
was clearly not possible, the program tested all differentials of Hamming
weight 5 or less, and then random differentials, applying each to a suite of
$2^{13}$ different 96-bit inputs chosen at random.  No output difference was
noted more than once.

One possible avenue of attack worth exploring is to attempt to cause zero
words to be input into the first-round matrix by choosing plaintext words
identical to subkey words for the first round.  Causing $n$ matrix input
words to be zero clearly takes $O(2^{24n})$ time.  If a method can be found
to detect when zero words have been input to the matrix, this can be used to
discover the subkey words rather more rapidly than exhaustive search.  We
can't see a way to exploit this at the moment, but it could be a fruitful
place to look for cryptanalysis.


\section{Conclusion}

We have presented a new block cipher, Storin.  Any cryptanalysis will be
received with interest.


\begin{thebibliography}{99}
\bibitem{idea}{X. Lai, `On the Design and Security of Block Ciphers', ETH
    Series in Informatics Processing, J. L. Massey (editor), vol. 1,
    Hartung-Gorre Verlag Konstanz, Technische Hochschule (Zurich), 1992}
\bibitem{blowfish}{B. Schneier, `The Blowfish Encryption Algorithm',
    \textit{Dr Dobb's Journal}, vol. 19 no. 4, April 1994, pp. 38--40}
\end{thebibliography}

%%%----- That's all, folks --------------------------------------------------

\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End:
Commit	Line	Data
e6e0e332 MW	1	%%% --latex--
e6e0e332 MW	2	%%%
31b692a0	3	%%% $Id: storin.tex,v 1.2 2000/05/21 21:43:26 mdw Exp $
e6e0e332 MW	4	%%%
	5	%%% Definition of the cipher
	6	%%%
	7	%%% (c) 2000 Mark Wooding
	8	%%%
	9
	10	%%%----- Revision history ---------------------------------------------------
	11	%%%
	12	%%% $Log: storin.tex,v $
31b692a0 MW	13	%%% Revision 1.2 2000/05/21 21:43:26 mdw
	14	%%% Fix a couple of typos.
	15	%%%
e6e0e332 MW	16	%%% Revision 1.1 2000/05/21 11:28:30 mdw
	17	%%% Initial check-in.
	18	%%%
	19
	20	%%%----- Preamble -----------------------------------------------------------
	21
	22	\documentclass[a4paper]{article}
	23	\usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
	24	\usepackage{mdwtab}
	25	\usepackage{mathenv}
	26	\usepackage{amsfonts}
	27	\usepackage{mdwmath}
	28	\usepackage[all, dvips]{xy}
	29
	30	\def\ror{\mathbin{>\!\!>\!\!>}}
	31	\def\rol{\mathbin{<\!\!<\!\!<}}
	32	\def\lsr{\mathbin{>\!\!>}}
	33	\def\lsl{\mathbin{<\!\!<}}
	34	\def\xor{\oplus}
	35	\def\seq#1{{\langle #1 \rangle}}
	36
	37	\sloppy
	38
	39	\title{Storin: A block cipher for digital signal processors}
	40	\author{Mark Wooding (\texttt{mdw@nsict.org})}
	41
	42	%% --- The cipher diagrams ---
	43
	44	\def\figkeymix#1#2#3#4{%
	45	\ar "a"; p-(0, 0.5){\xor} ="a" \ar "a"+(1, 0) +[r]{k_{#1}}; "a"%
	46	\ar "b"; p-(0, 0.5){\xor} ="b" \ar "b"+(1, 0) +[r]{k_{#2}}; "b"%
	47	\ar "c"; p-(0, 0.5){\xor} ="c" \ar "c"+(1, 0) +[r]{k_{#3}}; "c"%
	48	\ar "d"; p-(0, 0.5){\xor} ="d" \ar "d"+(1, 0) +[r]{k_{#4}}; "d"%
	49	}
	50
	51	\def\figmatrix{%
	52	\POS "a"+(3, -1) *++=(7, 0)[F]u\txt{Matrix multiply} ="m"%
	53	\ar "a"; "m"+U-(3, 0) \ar "b"; "m"+U-(1, 0)%
	54	\ar "c"; "m"+U+(1, 0) \ar "d"; "m"+U+(3, 0)%
	55	}
	56
	57	\def\figlintrans{%
	58	\ar "m"+D-(3, 0); "a"-(0, 2.25)*{\xor} ="a"%
	59	\POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	60	\ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"%
	61	\ar "m"+D-(1, 0); "b"-(0, 2.25)*{\xor} ="b"%
	62	\POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	63	\ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"%
	64	\ar "m"+D+(1, 0); "c"-(0, 2.25)*{\xor} ="c"%
	65	\POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	66	\ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"%
	67	\ar "m"+D+(3, 0); "d"-(0, 2.25)*{\xor} ="d"%
	68	\POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	69	\ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"%
	70	}
	71
	72	\def\figilintrans{%
	73	\ar "a"; "a"-(0, 1)*{\xor} ="a"%
	74	\POS "a"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	75	\ar `r "a"+(0, 0.5); p+(1, 0) "x" \ar "x"; "a"%
	76	\ar "b"; "b"-(0, 1)*{\xor} ="b"%
	77	\POS "b"+(1, 0) *+[F]{{} \lsr 12} ="x"%
	78	\ar `r "b"+(0, 0.5); p+(1, 0) "x" \ar "x"; "b"%
	79	\ar "c"; "c"-(0, 1)*{\xor} ="c"%
80	\POS "c"+(1, 0) *+[F]{{} \lsr 12} ="x"%
81	\ar `r "c"+(0, 0.5); p+(1, 0) "x" \ar "x"; "c"%
82	\ar "d"; "d"-(0, 1)*{\xor} ="d"%
83	\POS "d"+(1, 0) *+[F]{{} \lsr 12} ="x"%
84	\ar `r "d"+(0, 0.5); p+(1, 0) "x" \ar "x"; "d"%
85	}
86
31b692a0	87	\def\figstart#1{%
e6e0e332 MW	88	\POS 0;<1cm,0cm>:%
e6e0e332 MW	89	\turnradius{4pt}%
31b692a0 MW	90	\ar @{-} (0, 0) *+{a#1}; p-(0, 0.5) ="a"
	91	\ar @{-} (2, 0) *+{b#1}; p-(0, 0.5) ="b"
	92	\ar @{-} (4, 0) *+{c#1}; p-(0, 0.5) ="c"
	93	\ar @{-} (6, 0) *+{d#1}; p-(0, 0.5) ="d"
e6e0e332 MW	94	}
	95
	96	\def\figround#1#2#3#4#5{%
	97	\ar @{.} "a"-(0.5, 0); p+(8, 0)%
	98	\POS "a"+(8, -1.75) *[r]\txt{#5}%
	99	\figkeymix{#1}{#2}{#3}{#4}%
	100	\figmatrix%
	101	\figlintrans%
	102	\ar @{-} "a"; p-(0, .5) ="a"
	103	\ar @{-} "b"; p-(0, .5) ="b"
	104	\ar @{-} "c"; p-(0, .5) ="c"
	105	\ar @{-} "d"; p-(0, .5) ="d"
	106	}
	107
	108	\def\figiround#1#2#3#4#5{%
	109	\ar @{.} "a"-(0.5, 0); p+(8, 0)%
	110	\POS "a"+(8, -1.75) *[r]\txt{#5}%
	111	\figkeymix{#1}{#2}{#3}{#4}%
	112	\figilintrans%
	113	\figmatrix%
	114	\ar @{-} "m"+D-(3, 0); p-(0, .5) ="a"
	115	\ar @{-} "m"+D-(1, 0); p-(0, .5) ="b"
	116	\ar @{-} "m"+D+(1, 0); p-(0, .5) ="c"
	117	\ar @{-} "m"+D+(3, 0); p-(0, .5) ="d"
	118	}
	119
	120	\def\figgap{%
	121	\ar @{.} "a"-(0.5, 0); p+(8, 0)
	122	\POS "a"+(8, -1)*[r]\txt{Six more rounds}
	123	\ar @{--} "a"; "a"-(0, 2) ="a"
	124	\ar @{--} "b"; "b"-(0, 2) ="b"
	125	\ar @{--} "c"; "c"-(0, 2) ="c"
	126	\ar @{--} "d"; "d"-(0, 2) ="d"
	127	}
	128
31b692a0	129	\def\figwhite#1#2#3#4#5{%
e6e0e332 MW	130	\ar @{.} "a"-(0.5, 0); p+(8, 0)
	131	\POS "a"+(8, -1)*[r]\txt{Postwhitening}
	132	\figkeymix{#1}{#2}{#3}{#4}
31b692a0 MW	133	\ar "a"; p-(0, 1) *+{a#5}
	134	\ar "b"; p-(0, 1) *+{b#5}
	135	\ar "c"; p-(0, 1) *+{c#5}
	136	\ar "d"; p-(0, 1) *+{d#5}
e6e0e332 MW	137	}
	138
	139	\begin{document}
	140	\maketitle
	141
	142	%%%----- The main text ------------------------------------------------------
	143
	144	\begin{abstract}
	145	We present Storin: a new 96-bit block cipher designed to play to the
	146	strengths of current digital signal processors (DSPs). In particular, DSPs
	147	tend to provide single-cycle multiply-and-accumulate operations, making
	148	matrix multiplications very cheap. Working in an environment where
	149	multiplication is as fast as exclusive-or changes the usual perceptions
	150	about which operations provide good cryptographic strength cheaply. The
	151	scarcity of available memory, for code and for tables, and a penalty for
	152	nonsequential access to data also make traditional block ciphers based
	153	around substitution tables unsuitable.
	154	\end{abstract}
	155
	156	\tableofcontents
	157
	158	\section{Definition of the cipher}
	159
	160	\subsection{Overview}
	161
	162	Storin is an eight-round SP network operating on 96-bit blocks. The block
	163	cipher uses 36 24-bit subkey words, derived from a user key by the key
	164	schedule.
	165
	166	The 96-bit input is split into four 24-bit words. Each round then processes
	167	these four words, using the following three steps:
	168	\begin{enumerate}
	169	\item Mixing in of some key material. Four 24-bit subkey words are XORed
	170	with the four data words.
	171	\item A matrix multiplication mod $2^{24}$. The four words are treated as a
	172	column vector and premultiplied by a $4 \times 4$ vector using addition and
	173	multiplication mod $2^{24}$. This is the main nonlinear step in the
	174	cipher, and it also provides most of the cipher's diffusion.
	175	\item A simple linear transformation, which replaces each word $x$ by $x \xor
	176	(x \lsr 12)$.
	177	\end{enumerate}
	178	The four data words output by the final round are XORed with the last four
	179	subkey words in a final postwhitening stage and combined to form the 96-bit
	180	ciphertext.
	181
	182	The cipher structure is shown diagrammatically in figure~\ref{fig:cipher}.
	183
	184	\begin{figure}
	185	\centering
	186	\leavevmode
	187	\begin{xy}
	188	\xycompile{
31b692a0	189	\figstart{}
e6e0e332 MW	190	\figround{0}{1}{2}{3}{Round 1}
	191	\figround{4}{5}{6}{7}{Round 2}
	192	\figgap
31b692a0	193	\figwhite{32}{33}{34}{35}{'}}
e6e0e332 MW	194	\end{xy}
	195	\caption{The Storin encryption function}
	196	\label{fig:cipher}
	197	\end{figure}
	198
	199	Since the matrix used in step 2 is chosen to be invertible, the cipher can be
	200	inverted readily, simply by performing the inverse steps in the reverse
	201	order. Since the postwhitening stage is the same as a key mixing stage,
	202	decryption can be viewed as eight rounds consisting of key mixing, linear
	203	transformation and matrix multiplication, followed by a postwhitening stage.
	204	Thus, the structure of the inverse cipher is very similar to the forwards
	205	cipher, and uses the same components. The decryption function is shown
	206	diagrammatically in figure~\ref{fig:decipher}.
	207
	208	\begin{figure}
	209	\centering
	210	\leavevmode
	211	\begin{xy}
	212	\xycompile{
31b692a0	213	\figstart{'}
e6e0e332 MW	214	\figiround{32}{33}{34}{35}{Round 1}
	215	\figiround{28}{29}{30}{31}{Round 2}
	216	\figgap
31b692a0	217	\figwhite{0}{1}{2}{3}{}}
e6e0e332 MW	218	\end{xy}
	219	\caption{The Storin decryption function}
	220	\label{fig:decipher}
	221	\end{figure}
	222
	223	The key schedule is designed to be simple and to reuse the cipher components
	224	already available. Given a user key, which is a sequence of one or more
	225	24-bit words, it produces the 36 subkey words required by the cipher. The
	226	key schedule is very similar to Blowfish \cite{blowfish}. The subkey array
	227	is assigned an initial constant value derived from the matrix used in the
	228	cipher. Words from the user key are XORed into the array, starting from the
	229	beginning, and restarting from the beginning of the user key when all the
	230	user key words are exhausted. A 96-bit block is initialized to zero, and
	231	enciphered with Storin, using the subkeys currently in the array. The first
	232	four subkey words are then replaced with the resulting ciphertext, which is
	233	then encrypted again using the new subkeys. The next four subkey words are
	234	replaced with the ciphertext, and the process continues, nine times in all,
	235	until all of the subkey words have been replaced.
	236
	237	The Storin key schedule can accept user keys up to 36 words (864 bits) long.
	238	It is unrealistic, however, to expect this much strength from the cipher. We
	239	recommend against using keys longer than 5 words (120 bits).
	240
	241
	242	\subsection{Encryption}
	243
	244	We define $\mathcal{W} = \mathbb{Z}_{2^{24}}$ to be set of 24-bit words, and
	245	$\mathcal{P} = \mathcal{W}^4$ to be the set of four-entry column vectors over
	246	$\mathcal{W}$. Storin plaintext blocks are members of $\mathcal{P}$.
	247
	248	The Storin encryption function uses 36 24-bit words of key material $k_0$,
	249	$k_1$, \ldots, $k_{35}$, which are produced from the user key by the key
	250	schedule, described below. The key-mixing operation $K_i: \mathcal{P}
	251	\rightarrow \mathcal{P}$ is defined for $0 \le i < 9$ by:
	252	\[
	253	K_i \begin{pmatrix} a \\ b \\ c \\d \end{pmatrix}
	254	=
	255	\begin{pmatrix}
	256	a \xor k_{4i} \\ b \xor k_{4i+1} \\ c \xor k_{4i+2} \\ d \xor k_{4i+3}
	257	\end{pmatrix}
	258	\]
	259
	260	The matrix multiplication operation $M: \mathcal{P} \to \mathcal{P}$
	261	is described by $M(\mathbf{x}) = \mathbf{M} \mathbf{x}$, where $\mathbf{M}$
	262	is a fixed invertible $4 \times 4$ matrix over $\mathcal{W}$. The value of
	263	$\mathbf{M}$ is defined below.
	264
	265	The linear transformation $L: \mathcal{P} \to \mathcal{P}$ is defined by:
	266	\[
	267	L \begin{pmatrix} a \\ b \\ c \\ d \end{pmatrix}
	268	=
	269	\begin{pmatrix}
	270	a \xor (a \lsr 12) \\
	271	b \xor (b \lsr 12) \\
	272	c \xor (c \lsr 12) \\
	273	d \xor (d \lsr 12)
	274	\end{pmatrix}
	275	\]
	276
	277	The round function $R_i: \mathcal{P} \to \mathcal{P}$ is defined for $0 \le i
	278	< 8$ by
	279	\[ R_i(\mathbf{x}) = L\bigl(\mathbf{M} K_i(\mathbf{x}) \bigr) \]
	280
	281	The cipher $C: \mathcal{P} \to \mathcal{P}$ is defined in terms of $R_i$ and
282	$K_i$. Let $\mathbf{x}_0 \in \mathcal{P}$ be a plaintext vector. Let
283	$\mathbf{x}_{i+1} = R_i(\mathbf{x}_i)$ for $0 \le i < 8$. Then we define
284	$C(\mathbf{x})$ by setting $C(\mathbf{x}_0) = K_8(\mathbf{x}_8)$.
285
286
287	\subsection{Key schedule}
288
289	The key schedule converts a user key, which is a sequence of 24-bit words,
290	into the 36 subkeys required by the cipher.
291
292	For $i \ge 0$, we define that
293	\[
294	\begin{pmatrix}
295	m_{16i + 0} & m_{16i + 1} & m_{16i + 2} & m_{16i + 3} \\
296	m_{16i + 4} & m_{16i + 5} & m_{16i + 6} & m_{16i + 7} \\
297	m_{16i + 8} & m_{16i + 9} & m_{16i + 10} & m_{16i + 11} \\
298	m_{16i + 12} & m_{16i + 13} & m_{16i + 14} & m_{16i + 15}
299	\end{pmatrix}
300	= \mathbf{M}^{i + 2}
301	\]
302
303	Let the user-supplied key be $u_0$, $u_1$, \ldots, $u_{n-1}$, for some $n >
304	0$. We define the sequence $z_0$, $z_1$, \ldots\ by
305	\[ z_i = m_i \xor u_{i \bmod n} \]
306	for $i \ge 0$.
307
308	Denote the result of encrypting vector $\mathbf{x}$ using subkeys from the
309	sequence $\seq{w} = w_0, w_1, \ldots, w_{35}$ as $C_{\seq{w}}(\mathbf{x})$.
310	We define the key schedule to be $k_0$, $k_1$, \ldots, $k_{35}$, where:
311	\begin{eqlines*}
312	\seq{p^{(i)}} = k_0, k_1, \ldots, k_{4i-1}, z_{4i}, z_{4i+1}, \ldots \\
313	\mathbf{x}_0 = \begin{pmatrix} 0 \\ 0 \\ 0 \\ 0 \end{pmatrix}; \qquad
314	\begin{pmatrix} k_{4i} \\ k_{4i+1} \\ k_{4i+2} \\ k_{4i+3} \end{pmatrix}
315	= \mathbf{x}_{i+1} = C_{\seq{p^{(i)}}}(\mathbf{x}_i)
316	\end{eqlines*}
317
318
319	\subsection{Decryption}
320
321	The individual operations used during encryption are all invertible. Key
322	mixing is inverted by taking keys from the other end of the array:
323	\[ K^{-1}_i(\mathbf{x}) = K_{8-i}(\mathbf{x}) \]
324	The matrix multiplication may be inverted simply by using the inverse matrix
325	$\mathbf{M}^{-1}$:
326	\[ M^{-1}(\mathbf{x}) = \mathbf{M}^{-1} \mathbf{x} \]
327	Finally, the linear transformation is its own inverse:
328	\[ L^{-1}(\mathbf{x}) = L(\mathbf{x}) \]
329	The inverse round function can now be defined as:
330	\[ R^{-1}_i(\mathbf{x}) =
331	\mathbf{M}^{-1} L\bigl(K^{-1}_i(\mathbf{x})\bigr) \]
332
333	The decryption function $C^{-1}: \mathcal{P} \to \mathcal{P}$ is defined
334	in terms of $R^{-1}$ and $K^{-1}$ in a very similar way to encryption. Let
335	$\mathbf{x}_0$ be a ciphertext vector. Let $\mathbf{x}_{i+1} =
336	R^{-1}_i(\mathbf{x}_i)$ for $0 \le i < 8$. Then we define
337	$C^{-1}(\mathbf{x}_0) = K^{-1}_8(\mathbf{x}_8)$.
338
339
340	\subsection{Constants}
341
342	The matrix $\mathbf{M}$ and its inverse $\mathbf{M}^{-1}$ are:
343	\begin{eqnarray*}[rl]
344	\mathbf{M} = &
345	\begin{pmatrix}[[>{\hbox\bgroup\ttfamily}c<{\egroup}]
346	f7a413 & 54bd81 & 447550 & ff4449 \\
347	f31e87 & d85388 & de32cb & 40e3d7 \\
348	d9db1d & 551b45 & e9d19f & e443de \\
349	4b949a & 4d435d & ef0a17 & b784e1
350	\end{pmatrix} \\
351	\mathbf{M}^{-1} = &
352	\begin{pmatrix}[[>{\hbox\bgroup\ttfamily}c<{\egroup}]
353	17391b & fafb4b & a66823 & f2efb6 \\
354	13e0e5 & 2ed5e4 & b2cfff & d9cdb5 \\
355	2af462 & 33826d & de66a1 & eb6c85 \\
356	c2f423 & e904a3 & e772d8 & d791f1
357	\end{pmatrix}
358	\end{eqnarray*}
359
360
361
362	\section{Rationale and analysis}
363
364	\subsection{Design decisions}
365
366	The initial objective was to produce a cipher which played to the particular
367	strengths of digital signal processors. DSPs tend to have good multipliers,
31b692a0	368	and are particularly good at matrix multiplication. The decision to use a
e6e0e332 MW	369	matrix multiplication over $\mathbb{Z}_{2^{24}}$ seemed natural, given that
	370	24 bits is a commonly offered word size.
	371
	372	The choice of a 96-bit block is also fairly natural. A 2 word (48-bit) block
	373	is clearly too small, and a 3 word (72-bit) block is a little on the small
	374	side too.
	375
	376
	377	\subsection{Matrix multiplication over $\mathbb{Z}_{2^{24}}$}
	378
	379	Integer multiplication on a DSP is a cheap source of nonlinearity. Note that
	380	bit $i$ of the result depends on all of the bits in the operands of lesser or
	381	equal significance.position $i$ downwards.
	382
	383	The decision to make the $4 \times 4$ matrix fixed was taken fairly early on.
	384	Generating invertible matrices from key material seemed like too much work to
	385	expect from the DSP.
	386
	387	The matrix is generated pseudorandomly from a seed string, using SHA-1. The
	388	criteria we used to choose the matrix are:
	389	\begin{enumerate}
	390	\item The matrix must be invertible.
	391	\item Exactly one entry in each row and column of the matrix must be even.
	392	\end{enumerate}
	393	Criterion 1 is obvious. Criterion 2 encourages diffusion between the entries
	394	in the block vector. Note that if a matrix satisfies the second criterion,
	395	its inverse also does.
	396
	397	Consider a vector $\mathbf{x}$ and its product with the matrix $\mathbf{M}
	398	\mathbf{x}$. Whether the top bit of entry $i$ in $\mathbf{x}$ affects
	399	entry $j$ in the product depends on whether the entry in row $j$, column $i$
	400	of $\mathbf{M}$ is even. Criterion 2 ensures the following:
	401	\begin{itemize}
	402	\item A top-bit change in a single word or three words affects three words in
	403	the output.
	404	\item A top-bit change in two words affects two words in the output.
	405	\end{itemize}
	406
	407	The seed string used is \texttt{matrix-seed-string}. The program which
	408	generates the matrix is included with the Storin example source code.
	409
	410	\subsection{The linear transformation}
	411
	412	A bit change in one of the inputs to the matrix can only affect bits at that
	413	position and higher in the output. The linear transformation at the end of
	414	the round aims to provide diffusion from the high-order bits back to the
	415	low-order bits.
	416
	417	A single high-order bit change in the input to a round will affect the
	418	high-order bits of three words in the output of the matrix multiply. The
	419	linear transformation causes it to affect bits in the low halves of each of
	420	these words. The second round's multiplication causes these bits to affect
	421	the whole top halves of all of the output words. The linear transformation
	422	propagates this change to the bottom halves. Complete avalanche is therefore
	423	achieved after three rounds of Storin.
	424
	425
	426	\subsection{Key schedule notes}
	427
	428	The key schedule is intended to be adequate for bulk encryption; it doesn't
	429	provide good key agility, and isn't intended to. The key schedule accepts an
	430	arbitrary number of 24-bit words, although expecting 864 bits of security
	431	from the cipher is not realistic. The suggested maximum of 5 words (120
	432	bits) seems more sensible. This maximum can be raised easily when our
433	understanding of the cipher increases our confidence in it.
434
435	The key schedule is strongly reminiscent of Blowfish \cite{blowfish}. Use of
436	existing components of the cipher, such as the matrix multiplication and the
437	cipher itself, help reduce the amount of code required in the implementation.
438
439	There is an interesting feature of the key schedule: the output of the first
440	round of the second encryption is zero. To see why this is so, it is enough
441	to note that the first round key has just been set equal to what is now the
442	plaintext; the result of the key mixing stage is zero, which is unaffected by
443	the matrix and linear transformation.
444
445
446	\subsection{Attacking Storin}
447
448	A brief\footnote{About three days' worth on a 300MHz Pentium II.}
449	computerized analysis of the matrix multiplication failed to turn up any
450	high-probability differential characteristics. While an exhaustive search
451	was clearly not possible, the program tested all differentials of Hamming
452	weight 5 or less, and then random differentials, applying each to a suite of
453	$2^{13}$ different 96-bit inputs chosen at random. No output difference was
454	noted more than once.
455
456	One possible avenue of attack worth exploring is to attempt to cause zero
457	words to be input into the first-round matrix by choosing plaintext words
458	identical to subkey words for the first round. Causing $n$ matrix input
459	words to be zero clearly takes $O(2^{24n})$ time. If a method can be found
460	to detect when zero words have been input to the matrix, this can be used to
461	discover the subkey words rather more rapidly than exhaustive search. We
462	can't see a way to exploit this at the moment, but it could be a fruitful
463	place to look for cryptanalysis.
464
465
466	\section{Conclusion}
467
468	We have presented a new block cipher, Storin. Any cryptanalysis will be
469	received with interest.
470
471
472	\begin{thebibliography}{99}
473	\bibitem{idea}{X. Lai, `On the Design and Security of Block Ciphers', ETH
474	Series in Informatics Processing, J. L. Massey (editor), vol. 1,
475	Hartung-Gorre Verlag Konstanz, Technische Hochschule (Zurich), 1992}
476	\bibitem{blowfish}{B. Schneier, `The Blowfish Encryption Algorithm',
477	\textit{Dr Dobb's Journal}, vol. 19 no. 4, April 1994, pp. 38--40}
478	\end{thebibliography}
479
480	%%%----- That's all, folks --------------------------------------------------
481
482	\end{document}
483
484	%%% Local Variables:
485	%%% mode: latex
486	%%% TeX-master: t
487	%%% End: