From: mdw <mdw>
Date: Fri, 10 Dec 1999 23:27:11 +0000 (+0000)
Subject: Embryonic library reference manual.
X-Git-Url: https://git.distorted.org.uk/u/mdw/catacomb/commitdiff_plain/3471ebd194145da52d419c6315459237b076e18d

Embryonic library reference manual.
---

diff --git a/manual/.cvsignore b/manual/.cvsignore
new file mode 100644
index 0000000..b2ba3d7
--- /dev/null
+++ b/manual/.cvsignore
@@ -0,0 +1,2 @@
+*.aux *.toc *.dvi *.ps *.ind *.idx *.log
+
diff --git a/manual/catacomb.tex b/manual/catacomb.tex
new file mode 100644
index 0000000..2a2d350
--- /dev/null
+++ b/manual/catacomb.tex
@@ -0,0 +1,112 @@
+%%% -*-latex-*-
+%%%
+%%% $Id: catacomb.tex,v 1.1 1999/12/10 23:27:11 mdw Exp $
+%%%
+%%% Catacomb manual
+%%%
+%%% (c) 1999 Straylight/Edgeware
+%%%
+
+%%%----- Licensing notice ---------------------------------------------------
+%%%
+%%% This file is part of Catacomb.
+%%%
+%%% Catacomb is free software; you can redistribute it and/or modify
+%%% it under the terms of the GNU Library General Public License as
+%%% published by the Free Software Foundation; either version 2 of the
+%%% License, or (at your option) any later version.
+%%% 
+%%% Catacomb is distributed in the hope that it will be useful,
+%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
+%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+%%% GNU Library General Public License for more details.
+%%% 
+%%% You should have received a copy of the GNU Library General Public
+%%% License along with Catacomb; if not, write to the Free
+%%% Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+%%% MA 02111-1307, USA.
+
+%%%----- Revision history ---------------------------------------------------
+%%%
+%%% $Log: catacomb.tex,v $
+%%% Revision 1.1  1999/12/10 23:27:11  mdw
+%%% Embryonic library reference manual.
+%%%
+
+\documentclass[numbering]{strayman}
+\usepackage[T1]{fontenc}
+\usepackage[palatino, helvetica, courier, maths=cmr]{mdwfonts}
+\usepackage{mdwlist}
+\usepackage{mdwtab}
+\usepackage{mdwmath}
+\usepackage{sverb}
+\usepackage{syntax}
+\usepackage{url}
+\usepackage{cmtt}
+\usepackage{amstext}
+
+\errorcontextlines=99
+
+\shortverb\|
+
+\makeatletter
+
+\def\defaultdesc{%
+  \desclabelwidth{80pt}%
+  \desclabelstyle\nextlinelabel%
+  \def\makelabel##1{\bfseries##1\hfil}%
+}
+
+\def\description{\basedescript{\let\makelabel\bfseries}}
+\let\enddescription\endbasedescript
+\let\listingsize\relax
+
+\let\unit\textsf
+\let\hdr\url
+\def\code#1{\mtt{\syn@ttspace#1}}
+
+\def\fsec#1{%
+  \par%
+  \textbf{#1}%
+  \par\nobreak\@nobreaktrue%
+}
+
+\newcolumntype\!{!{\vline \qquad \qquad \vline}}
+
+\def\mp{\mathop{\operator@font MP}}
+
+\def\mont#1{\tilde{#1}}
+
+\def\mm{\mathit{mm}}
+\def\vl{\mathit{vl}}
+\def\av{\mathit{av}}
+\def\avl{\mathit{avl}}
+\def\bv{\mathit{bv}}
+\def\bvl{\mathit{bvl}}
+\def\dv{\mathit{dv}}
+\def\dvl{\mathit{dvl}}
+\def\qv{\mathit{qv}}
+\def\qvl{\mathit{qvl}}
+\def\rv{\mathit{rv}}
+\def\rvl{\mathit{rvl}}
+\def\sz{\mathit{sz}}
+
+\makeatother
+
+\title[Catacomb]{Catacomb \\ A cryptographic library}
+\author{Mark Wooding}
+
+\begin{document}
+\maketitle
+\frontmatter
+\tableofcontents
+\mainmatter
+\include{mp}
+\end{document}
+
+%%%----- That's all, folks --------------------------------------------------
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: 
diff --git a/manual/mp-mod.tex b/manual/mp-mod.tex
new file mode 100644
index 0000000..6c04ec3
--- /dev/null
+++ b/manual/mp-mod.tex
@@ -0,0 +1,379 @@
+%%% -*-latex-*-
+
+\section{Modular arithmetic}
+
+Many public key cryptographic algorithms require modular arithmetic of
+various kinds.  Modular exponentiation (calculation of $g^x \bmod n$) is
+particularly important.  Catacomb provides two efficient methods for
+performing modular arithmetic.
+
+
+\subsection{Barrett reduction}
+\label{sec:mp-barrett}
+
+P.~Barrett invented an elegant method for computing modular reductions.  It
+requires a small amount of precomputation, depending only on the modulus $m$.
+Thereafter, it can compute residues mod $m$ using only multiplication and
+arithmetic shifts.
+
+Montgomery's method (section~\ref{sec:mp-mont}, page~\pageref{sec:mp-mont})
+is similar, and more efficient for heavy use, but only works with odd moduli.
+Barrett reduction works with an arbitrary modulus.
+
+The declarations required to use Barrett reduction are in
+\hdr{<catacomb/mpbarrett.h>}.  The precomuted values required are stored in a
+\emph{Barrett reduction context} of type \code{mpbarrett}.  A context can be
+initialized by calling \code{mpbarrett_create}
+(page~\pageref{fn:mpbarrett-create}) and disposed of by
+\code{mpbarrett_destroy} (page~\pageref{fn:mpbarrett-destroy}).  A number
+less then $m^2$ may then be reduced modulo $m$ by passing it to
+\code{mpbarrett_reduce} (page~\pageref{fn:mpbarrett-reduce}) along with the
+appropriate reduction context.
+
+Since modular exponentiation is such a common operation in cryptography, a
+function \code{mpbarrett_exp} (page~\pageref{fn:mpbarrett-exp}) is provided
+which uses a simple square-and-multiply method combined with Barrett
+reduction.
+
+\subsubsection{The \code{mpbarrett_create} function}
+\label{fn:mpbarrett-create}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpbarrett.h>| \\
+|void mpbarrett_create(mpbarrett *|$b$|, mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Initializes a Barrett reduction context $b$ suitable for performing
+reductions modulo $m$.  The memory used by the context block must be provided
+by the caller, and must not be discarded until the context is destroyed.
+
+The computation performed is as follows.  Let $k = 2 \cdot |MP_LEN(|m|)|$.
+Calculate $\mu = \lfloor 2^{\code{MPW_BITS} \cdot k} / m \rfloor$.  The
+values $k$, $\mu$ and $m$ are stored in the context block for later use.
+
+\subsubsection{The \code{mpbarrett_destroy} function}
+\label{fn:mpbarrett-destroy}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpbarrett.h>| \\
+|void mpbarrett_destroy(mpbarrett *|$b$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Disposes of a Barrett reduction context.  Resources associated with the
+context block are freed.  The memory used to hold the context may safely be
+discarded.
+
+\subsubsection{The \code{mpbarrett_reduce} function}
+\label{fn:mpbarrett-reduce}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpbarrett.h>| \\
+|mp *mpbarrett_reduce(mpbarrett *|$b$|, mp *|$d$|, mp *|$x$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Calculates $x \bmod m$, where $m$ is the modulus configured into the Barrett
+reduction context $b$.  The argument $d$ is a suggested destination.
+
+Note that not all values of $x$ are suitable for reduction: in particular,
+negative values are improperly handled.  Let $k = 2 \cdot |MP_LEN(|m|)|$;
+then $x$ must be in the interval $[\,0, 2^{\code{MPW_BITS} \cdot k})$.  It's
+probably safer (and eqsier) to ensure that $x < m^2$.
+
+For reference, the calculation works as follows:
+\begin{itemize}
+\item To simplify the notation, let $b = 2^{\code{MPW_BITS}}$.
+\item Let $q_0 = \lfloor x / b^{k - 1} \rfloor$.  Let
+  $q = \lfloor q_0 \mu / b^{k + 1} \rfloor$.
+\item Let $r = (x - q m) \bmod b^{k + 1}$.
+\item At this point, $r \equiv x \pmod m$, but $r$ may be too large.  It can
+  be reduced by repeated subtraction until it's in range.
+\end{itemize}
+
+\fsec{Returns}
+
+A value $d$ such that $0 \le d < m$ and $d \equiv x \pmod m$.
+
+\subsubsection{The \code{mpbarrett_exp} function}
+\label{fn:mpbarrett-exp}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpbarrett.h>| \\
+|mp *mpbarrett_exp(mpbarrett *|$b$|, mp *|$d$|, mp *|$x$|, mp *|$e$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Computes $x^e \bmod m$, where $m$ is the modulus configured into the Barrett
+reduction context $b$.  The argument $d$ is a suggested destination.
+
+\fsec{Returns}
+
+A value $d$ such that $0 \le d < m$ and $d \equiv x^e \pmod m$.
+
+
+\subsection{Montgomery reduction}
+\label{sec:mp-mont}
+
+Peter Montgomery has invented an ingenious method for computing modular
+reductions.  The method requires a number of values to be precomputed.
+\begin{itemize}
+\item Let $m$ be the modulus.  Let $b$ be the radix used to represent
+  multiprecision integers.  (In Catacomb, then, $b = 2^{\code{MPW_BITS}}$.)
+  For Montgomery reduction to work, $m$ and $b$ must have no common factors.
+\item Let $R = b^n > m$ be the smallest power of $b$ which exceeds $m$.
+\item Precompute $m'$ such that $m m' \equiv -1 \pmod b$.
+\item Precompute $R \bmod m$ and $R^2 \bmod m$, since these are generally
+  useful when using Montgomery's method.
+\end{itemize}
+Given $x < R^2$ and the above values, Montgomery's algorithm computes $\mont
+x = x R^{-1} \bmod m$ efficiently, using only multiplications, additions and
+word-sized shifts.  The quantity $\mont x$ is called the \emph{Montgomery
+reduction of $x$}.
+
+At first sight, this isn't particularly useful.  However, an interesting
+thing happens if you run an extra factor of $R$ all the way through your
+calculation.  Note for example that the Montgomery reduction of $x R \cdot y
+R$ is $xy R^2 R^{-1} \bmod m = x y R \bmod m$ -- there's still only a single
+factor of $R$ in the result.  This can be removed finally by an additional
+reduction step.
+
+Catacomb provides a function for performing a simple Montgomery reduction,
+and for calculating Montgomery multiplication: the Montgomery reduction of
+the product of two integers.
+
+Multiplying in the extra factor of $R$ can be done efficiently by multiplying
+by $R^2 \bmod m$ and reducing.\footnote{%
+  Some of the Catacomb comments refer to values with a factor of $R$ as
+  having been `Montgomerized'.  While this is an ugly word, it does describe
+  a useful concept.} %
+This is the reason that $R^2 \bmod m$ is precomputed.  The value $R \bmod m$
+is the Montgomery multiplicative identity and is often useful as an initial
+value when forming products.
+
+\subsubsection{Overview of functions provided}
+
+All of the types and declarations needed for Montgomery reduction are in the
+header file \hdr{<catacomb/mpmont.h>}.
+
+Catacomb maintains precomputed values in a \emph{Montgomery reduction
+context}, which has type \code{mpmont}.  A reduction context is initialized
+by calling \code{mpmont_create} (page~\pageref{fn:mpmont-create}) and
+disposed of by \code{mpmont_destroy} (page~\pageref{fn:mpmont-destroy}).
+
+The Montgomery reduction context is a structure containing the following
+members:
+\begin{description}
+  \def\makelabel#1{\code{#1}\hfil}
+\item[mp *m] The modulus $m$ with which the reduction context is working.
+\item[mp *mi] The quantity $m'$ where $m m' \equiv -1
+  \pmod{ 2^{\code{MPW_BITS}}}$.
+\item[size_t shift] The smallest integer $n$ such that
+  $R = 2^{\code{MPW_BITS} \cdot n} > m$.
+\item[mp *r] The quantity $R \bmod m$.  The Montgomery multiplicative
+  identity.
+\item[mp *r2] The quantity $R^2 \bmod m$.
+\end{description}
+All of these are set up by \code{mpmont_create} and should not be changed.
+
+Montgomery reduction can be performed by passing a Montgomery reduction
+context and a number to be reduced to \code{mpmont_reduce}
+(page~\pageref{fn:mpmont-reduce}).  Simultaneous multiplication and reduction
+are performed by \code{mpmont_mul} (page~\pageref{fn:mpmont-mul}).
+
+Catacomb can perform modular exponentiation using Montgomery reduction.  The
+function \code{mpmont_exp} (page~\pageref{fn:mpmont-exp}) performs a standard
+modular exponentiation; \code{mpmont_expr} (page~\pageref{fn:mpmont-expr})
+does the same but doesn't remove the factor of $R$ from the result, which can
+be useful if you want to perform further computations.
+
+Catacomb can also perform multiple simultaneous modular exponentations,
+multiplying the results together, without much of a performance penalty over
+a single exponentation.  The function \code{mpmont_mexp}
+(page~\pageref{fn:mpmont-mexp}) calculates the product $x_0^{e_0} x_1^{e_1}
+\ldots x_{k - 1}^{e_{k - 1}} \bmod m$ given an array of $x_i$ and $e_i$;
+\code{mpmont_mexpr} (page~\pageref{fn:mpmont-mexpr}) does the same but leaves
+a factor of $R$ in the result.  These functions are particularly useful for
+verifying discrete-logarith-based digital signatures.
+
+\subsubsection{Calculation using Montgomery reduction}
+
+Before any reduction work can be done, a Montgomery context must be
+initialized.  Suppose $m$ is the modulus we need to work with:
+\begin{listing}
+mpmont mm;
+mpmont_create(&mm, m);
+\end{listing}
+The next step is usually to multiply all of the inputs to the calculation by
+$R$.  This is usually best done like this:
+\begin{listing}
+xr = mpmont_mul(&mm, MP_NEW, x, mm.r2);
+\end{listing}
+Because multiplication is distributive over addition, addition and
+subtraction work normally, although it can be worth doing a little repeated
+subtraction in case the result ends up larger than the modulus.
+\begin{listing}
+a = mp_add(a, a, b);
+a = mp_add(a, a, c);
+a = mp_add(a, a, d);
+while (MP_CMP(a, >=, mm.m))
+  a = mp_sub(a, a, mm.m);
+\end{listing}
+Multiplication of different numbers is best done with the supplied
+multiply-and-reduce operation.
+\begin{listing}
+ab = mpmont_mul(&mm, MP_NEW, a, b);
+\end{listing}
+However, squaring is best done using separate square and reduce steps.
+\begin{listing}
+a2 = mp_sqr(MP_NEW, a);
+a2 = mpmont_reduce(&mm, a2, a2);
+\end{listing}
+When the computation has finished, the results can be read out using
+straightforward Montgomery reduction.  Don't forget to dispose of the
+reduction context.
+\begin{listing}
+x = mpmont_reduce(&mm, x, xr);
+mpmont_destroy(&mm);
+\end{listing}
+
+For particularly simple calculations, it can save a multiplication if you
+reduce first and correct afterwards.
+\begin{listing}
+ab = mpmont_mul(&mm, MP_NEW, a, b);
+ab = mpmont_mul(&mm, ab, ab, mm.r2);
+\end{listing}
+The first stage computes $ab R^{-1} \bmod m$.  The second computes $abR^{-1}
+\cdot R^2 \cdot R^{-1} \bmod m = ab \bmod m$.  Doing this the usual way is
+nowhere near as efficient:
+\begin{listing}
+ar = mpmont_mul(&mm, MP_NEW, a, mm.r2);
+br = mpmont_mul(&mm, MP_NEW, b, mm.r2);
+ab = mpmont_mul(&mm, ar, ar, br);
+ab = mpmont_reduce(&mm, ab, ab);
+mp_drop(br);
+\end{listing}
+
+Remember that for all of this magic to work, $m$ and $b$ must have no common
+factors.  Since in Catacomb $b$ is a power of 2, this means simply that
+\emph{$m$ must be odd}.  If you want to work with an even modulus, you'll
+have to use Barrett reduction instead (section~\ref{sec:mp-barrett},
+page~\pageref{sec:mp-barrett}).
+
+\begin{note}[Important]
+  Montgomery reduction only works when the modulus is an odd number.
+\end{note}
+
+\subsubsection{How it works}
+
+Let $x_0$ be the integer we wish to reduce.  Compute $u_1 = x_0 m' \bmod b$,
+and $v_1 = x_0 + u_1 m$.
+
+Obviously $u_1 = x_0 m' + bk$ for some integer $k$.  So $v_1 = x_0 + u_1 m =
+x_0 + x_0 m m' + b k m$.  Note that $m m' \equiv -1 \pmod b$; thus $m m' =
+b k' - 1$ for some $k'$, so $v_1$ then becomes $x_0(1 + bk' - 1) + b k m =
+b (k' x_0 + k m)$.  Let $x_1 = v_1 / b = k' x_0 + k m$.
+
+Now note that $b k' = m m' + 1$, so $b k' \equiv 1 \pmod m$; hence, $k'
+\equiv b^{-1} \pmod m$.  Thus $x_1 \equiv x_0 b^{-1} \pmod m$.
+
+
+
+\subsubsection{The \code{mpmont_create} function}
+\label{fn:mpmont-create}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpmont.h>| \\
+|void mpmont_create(mpmont *|$\mm$|, mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Initializes a Montgomery reduction context $\mm$, using the given modulus
+$m$.  The caller must provide memory for the context, and must ensure that
+the memory remains available until the context is destroyed.
+
+If Catacomb is compiled with debugging support enabled, it will abort if $m$
+is negative or even.
+
+\subsubsection{The \code{mpmont_destroy} function}
+\label{fn:mpmont-destroy}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpmont.h>| \\
+|void mpmont_destroy(mpmont *|$\mm$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Destroys the Montgomery reduction context $\mm$, releasing any resources it
+had acquired.
+
+\subsubsection{The \code{mpmont_reduce} function}
+\label{fn:mpmont-reduce}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpmont.h>| \\
+|mp *mpmont_reduce(mpmont *|$\mm$|, mp *|$d$|, mp *|$x$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Performs a Montgomery reduction operation on the integer $x$, modulo $m$.
+The integer $d$ is a suggested destination.
+
+For Montgomery reduction to work, $x$ must be less than $R^2$.  In practice,
+it's probably safest to ensure that $x < m^2$.
+
+This function is particularly efficient if $d = x$, i.e., if you overwrite
+$x$ with its Montgomery reduction.
+
+\fsec{Returns}
+
+An integer $d$ such that $0 \le d < m$ and $d R \equiv x \pmod m$.
+
+\subsubsection{The \code{mpmont_mul} function}
+\label{fn:mpmont-mul}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpmont.h>| \\
+|mp *mpmont_mul(mpmont *|$\mm$|, mp *|$d$|, mp *|$x$|, mp *|$y$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Multiplies $x$ and $y$, and returns the Montgomery reduction of the product
+modulo $m$.  The integer $d$ is a suggested destination.
+
+Both $x$ and $y$ must be less than $R$, and it's probably safer in practice
+if you ensure that $x, y < m$.
+
+\fsec{Returns}
+
+An integer $d$ such that $0 \le d < m$ and $d R \equiv x y \bmod m$.
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "catacomb"
+%%% End: 
diff --git a/manual/mp-mp.tex b/manual/mp-mp.tex
new file mode 100644
index 0000000..4f3aa67
--- /dev/null
+++ b/manual/mp-mp.tex
@@ -0,0 +1,450 @@
+%%% -*-latex-*-
+
+\section{High-level multiprecision integer represention}
+\label{sec:mp}
+
+The high-level interface to multiprecision integers is exported by the header
+file \hdr{<catacomb/mp.h>}.
+
+
+\subsection{The \code{mp} structure}
+\label{sec:mp-struct}
+
+At the high level, a multiprecision integer is represented as an object of
+type \code{mp}.  Most of the time, programs use pointers to \code{mp} objects
+which have been dynamically allocated.  (See \code{mp_build},
+page~\pageref{fn:mp-build}, for the exception to this rule.)
+
+The \code{mp} type is a structure.  Some of the members are considered
+internal to the implementation.  However, the following may be used by client
+programs:
+\begin{description}
+  \def\makelabel#1{\code{#1}\hfil}
+\item[mpw *v, *vl] The base and limit of the multiprecision
+  integer array.
+\item[unsigned f] Various flags describing the current state of the integer.
+\end{description}
+Other members are used for keeping track of how much memory has been
+allocated to the number and for managing the reference counting system.
+These members should not be accessed directly by application code.  None of
+the structure members should be modified directly.
+
+The following flags are defined:
+\begin{description*}
+  \let\makelabel\code
+\item[MP_NEG] The integer is negative.
+\item[MP_BURN] Clear the integer's storage after use.
+\item[MP_CONST] The integer may not be altered or freed.
+\item[MP_UNDEF] The integer currently has no interesting value.
+\item[MP_DESTROYED] The integer has been destroyed.
+\end{description*}
+
+The \code{MP_BURN} flag is inherited by the results of calculations.  Thus,
+setting the flag on a value causes it and all other values calculated from it
+to be destroyed after use.\footnote{%
+  There are currently a small number of places where the \code{MP_BURN} flag
+  may be lost from a result.  These usually result from performing complex
+  calculations with degenerate arguments (e.g., computing a GCD with zero).
+  Fixing this is not a high priority.} %
+
+The \code{MP_DESTROYED} flag is used by \code{mp_destroy}
+(page~\pageref{fn:mp-destroy}) to trap attempts to destroy the same integer
+more than once, which can otherwise lead to confusing bugs.
+
+
+\subsubsection{The \code{MP_LEN} macro}
+\label{fn:MP-LEN}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|ptrdiff_t MP_LEN(const mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Returns}
+
+The number of words used in the representation of the integer $m$.  Note that
+the argument $m$ may be evaluated multiple times by this macro.
+
+
+\subsubsection{The \code{mp_burn} function}
+\label{fn:mp-burn}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_burn(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Sets the \code{MP_BURN} flag on the integer $m$.  Whenever memory associated
+with $m$, or results derived from it, is deallocated, it is overwritten with
+zeros to prevent traces being found in swap files or core dumps.
+
+
+\subsection{Multiprecision integer constants}
+\label{sec:mp-const}
+
+There are a few commonly-used multiprecision integer constants provided.  All
+are declared in \hdr{<catacomb/mp.h>}:
+
+\begin{tabular}[C]
+  {| >{\ttfamily}l | Mr @{\extracolsep{0pt plus 10000fill}\hskip\tabcolsep}
+  | c @{\extracolsep{0pt}} | >{\ttfamily}l | Mr |}	   \hlx{c{1,2,4,5}v}
+  \multicolumn{1}{|c|}{\textbf{Constant}} &
+  \multicolumn{1}{c|}{\textbf{Value}} & \qquad &
+  \multicolumn{1}{c|}{\textbf{Constant}} &
+  \multicolumn{1}{c|}{\textbf{Value}}			\\ \hlx{vc{1,2,4,5}v}
+  MP_ZERO	& 0	&& MP_FOUR       & 4 		\\
+  MP_ONE	& 1	&& MP_FIVE       & 5 		\\
+  MP_TWO	& 2	&& MP_TEN        & 10		\\
+  MP_THREE	& 3	&& MP_MONE       & -1		\\ \hlx{vc{1,2,4,5}}
+\end{tabular}
+\goodbreak
+
+All of the multiprecision constants are actually pointers to the real
+values.  All have the \code{MP_CONST} flag set, so they cannot be modified or
+destroyed.
+
+
+\subsection{Creation and destruction}
+\label{sec:mp-create}
+
+Multiprecision integers are reference counted; i.e., each \code{mp} remembers
+how many references there are to it, and is destroyed only when the last
+reference disappears.
+
+New multiprecision integers are created using the functions \code{mp_create}
+(page~\pageref{fn:mp-create}) or \code{mp_build}
+(page~\pageref{fn:mp-build}).  A newly created integer has one reference.
+Additional references are returned by the function \code{mp_copy}
+(page~\pageref{fn:mp-copy}).  A reference may be removed from an integer by
+calling \code{mp_drop} (page~\pageref{fn:mp-drop}).
+
+It isn't usually a good idea to modify an integer to which there are multiple
+references: the owners of the other references will usually react badly if
+their number has changed.  A modifiable copy may be made of an integer using
+the \code{mp_split} function (page~\pageref{fn:mp-split}).  If there are no
+other references, the integer is returned unchanged; otherwise a real copy is
+made and returned, and the number of references to the original is
+decremented by one.
+
+Most of the arithmetic functions allow the caller to state a preferred
+location for the result.  This may either be an existing integer or a
+completely new one, requested by the special value \code{MP_NEW}.  This
+behaviour is provided by the \code{mp_modify} function
+(page~\pageref{fn:mp-modify}).
+
+Note that functions may not actually use the location requested, although if
+it decides not to overwrite the requested destination, a reference to it is
+removed.
+
+
+\subsubsection{The \code{mp_create} function}
+\label{fn:mp-create}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|mp *mp_create(size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Allocates a new multiprecision integer, with at least $\sz$ words of storage
+available for its representation.  The flag \code{MP_UNDEF} is set initially;
+the other flags are cleared.  The integer's array limit pointer \code{vl} is
+set to be $\sz$ higher than the base pointer \code{v}.
+
+\fsec{Returns}
+
+A pointer to a newly allocated multiprecision integer.
+
+\subsubsection{The \code{mp_destroy} function}
+\label{fn:mp-destroy}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_destroy(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Destroys the multiprecision integer $m$.  All resouces allocated to the
+integer are freed.  It's most unusual for this to be the function you want:
+most of the time \code{mp_drop} (page~\pageref{fn:mp-drop}) is much more
+useful.
+
+If compiled with debugging support, the \code{mp_destroy} function will abort
+if requested to destroy an integer whose \code{MP_CONST} or
+\code{MP_DESTROYED} flags are set.
+
+\subsubsection{The \code{mp_build} function}
+\label{fn:mp-build}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_build(mp *|$m$|, mpw *|$v$|, mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Creates a constant multiprecision integer from an array of \code{mpw} words.
+Storage for the \code{mp} object and the array is provided by the user --
+\code{mp_build} allocates no memory.  Conversely, it's safe to dispose of the
+\code{mp} block and \code{mpw} array at any time, as long as there are no
+live references to them.
+
+The \code{mp_build} function is particularly useful for constructing small
+multiprecision integers, and for addressing parts of other integers.  For
+example, if $x$ is a multiprecision integer, then
+\begin{listing}
+mp_build(&m, x->v + 5, x->vl);
+\end{listing}
+sets $m$ equal to $\lfloor x / 2^{5 \cdot \code{MPW_BITS}} \rfloor$ without
+copying any data.\footnote{%
+  This technique is used in the implementation of Barrett reduction
+  (page~\pageref{sec:mp-barrett}), for example.  See the source file
+  \code{mpbarrett.c} for details.} %
+
+\subsubsection{The \code{mp_copy} function and \code{MP_COPY} macro}
+\label{fn:mp-copy}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|mp *mp_copy(mp *|$m$|);| \\
+|mp *MP_COPY(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_copy} adds a reference to the multiprecision integer
+$m$.  The integer will not be destroyed until all references to it have been
+dropped.
+
+The macro \code{MP_COPY} performs exactly the same action as the
+\code{mp_copy} function, except that it uses inline code rather than a
+function call.  It's probably smaller and faster than the function call too.
+
+\fsec{Returns}
+
+The address of the new reference.  This will usually be the same as the
+original argument $m$.
+
+\subsubsection{The \code{mp_drop} function and \code{MP_DROP} macro}
+\label{fn:mp-drop}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_drop(mp *|$m$|);| \\
+|void MP_DROP(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_drop} removes (`drops') a reference to the
+multiprecision integer $m$.  If there are no more references to the integer,
+it is destroyed, as if by \code{mp_destroy} (page~\pageref{fn:mp-destroy}).
+
+The macro \code{MP_DROP} performs exactly the same action as the
+\code{mp_drop} function, except that it uses inline code rather than a
+function call, so it's slightly faster but uses a little more code.
+
+\subsubsection{The \code{mp_split} function and \code{MP_SPLIT} macro}
+\label{fn:mp-split}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|mp *mp_split(mp *|$m$|);| \\
+|void MP_SPLIT(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+If the integer $m$ has only one reference, the \code{mp_split} function
+returns $m$ unchanged; otherwise, a copy is made and returned, and the
+reference count of $m$ is decremented.  Thus, either way, \code{mp_split}
+returns a pointer to an integer with the same value as $m$ and a single
+refernce.  Thus, the result of \code{mp_split} is therefore safe to modify.
+
+The macro \code{MP_SPLIT} performs the same action as the \code{mp_split}
+function except that it uses inline code rather than a function call, and it
+modifies its argument in place rather than returning the new reference.  The
+macro \code{MP_SPLIT} may evaluate its argument $m$ multiple times.
+
+\fsec{Returns}
+
+The function \code{mp_split} returns a pointer to a copy of its argument $m$
+which is safe to modify.
+
+\subsubsection{The \code{mp_modify} function and \code{MP_MODIFY} macro}
+\label{fn:mp-modify}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|mp *mp_modify(mp *|$m$|, size_t |$\sz$|);| \\
+|void MP_MODIFY(mp *|$m$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_modify} returns a pointer to a suitable destination $d$
+for a result given a suggested destination $m$ and a required size $\sz$.
+The actual behaviour is complicated:
+
+\begin{itemize}
+\item If $m$ is equal to \code{MP_NEW}, or the \code{MP_CONST} flag is set on
+  $m$, then allocate a new integer $d$ of size $\sz$ by calling
+  \code{mp_create} (page~\pageref{fn:mp-create}).
+\item Otherwise, if $m$ has more than one reference, a reference is removed
+  asif by \code{mp_drop} (page~\pageref{fn:mp-drop}) and a new integer $d$ of
+  size $\sz$ is allocated as if by \code{mp_create}.
+\item Otherwise, the integer $m$ is extended to $\sz$ words, as if by calling
+  \code{mp_ensure} (page~\pageref{fn:mp-ensure}), and $d$ is set equal to
+  $m$.
+\end{itemize}
+
+The resulting integer $d$ is returned from the function.
+
+The upshot of all of this is that $d$ has exactly one reference, space for
+$\sz$ words of data, and no particular value set.  Any other references to
+the original integer $m$ (whether counted or not) continue to refer to the
+same value.  The total number of references to $d$ and $m$ after
+\code{mp_modify} returns is equal to the initial number of references to $m$
+before the call.
+
+The macro \code{MP_MODIFY} does the same job using inline code; it sets $m$
+to the new integer $d$ when it's finished.  The macro expands to a fairly
+large chunk of code.
+
+\fsec{Returns}
+
+The function \code{mp_modify} returns the destination integer $d$ as
+described above.
+
+
+\subsection{Resizing multiprecision integers}
+\label{sec:mp-resize}
+
+There are a collection of useful functions and macros for changing the size
+of a multiprecision integer.
+
+\subsubsection{The \code{mp_resize} function and \code{MP_RESIZE} macro}
+\label{fn:mp-resize}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_resize(mp *|$m$|, size_t |$\sz$|);| \\
+|void MP_RESIZE(mp *|$m$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_resize} resizes the array used to hold the value of the
+integer $m$ such that it has space for $\sz$ words.  No check is made to see
+whether $m$'s array is already the correct size.  The integer's array limit
+pointer \code{vl} is set to point $\sz$ words higher than the base pointer
+\code{v}.
+
+The macro \code{MP_RESIZE} performs exactly the same action as the
+\code{mp_resize} function, except that it uses inline code rather than a
+function call.  It's slightly faster but uses quite a lot more code than the
+plain function call.
+
+\subsubsection{The \code{mp_ensure} function and \code{MP_ENSURE} macro}
+\label{fn:mp-ensure}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_ensure(mp *|$m$|, size_t |$\sz$|);| \\
+|void MP_ENSURE(mp *|$m$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_ensure} resizes the integer $m$ (as if by calling
+\code{mp_resize}) if its array is currently smaller than $\sz$ words; if not,
+the array is left alone.  The integer's array limit pointer \code{vl} is set
+to point $\sz$ words higher than the base pointer \code{v}.
+
+The macro \code{MP_ENSURE} performs exactly the same action as the
+\code{mp_ensure} function, except that it uses inline code rather than a
+function call.  It's slightly faster but uses considerably more code.
+
+\subsubsection{The \code{mp_shrink} function and \code{MP_SHRINK} macro}
+\label{fn:mp-shrink}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_shrink(mp *|$m$|);| \\
+|void MP_SHRINK(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mp_shrink} shrinks the representation of the integer $m$.
+It lowers the integer's array limit pointer so that the most significant word
+in the representation is nonzero.  This improves the efficiency of arithmetic
+operations on $m$.
+
+Note that shrinking an integer doesn't release any memory.  Use
+\code{mp_minimize} (below) for that.
+
+The macro \code{MP_SHRINK} performs exactly the same operation as
+\code{mp_shrink} using inline code rather than a function call.
+
+\subsubsection{The \code{mp_minimize} function}
+\label{fn:mp-minimize}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mp.h>| \\
+|void mp_minimize(mp *|$m$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Minimizes the amount of memory used by the represenation of the integer $m$.
+This isn't usually useful unless $m$ is going to remain constant for a long
+time, or has become extremely large for some reason.
+
+
+\subsection{Bit-scanning}
+\label{sec:mp-scan}
+
+\subsection{Loading and storing}
+\label{sec:mp-io}
+
+\subsection{Simple arithmetic}
+\label{sec:mp-arith}
+
+\subsection{More advanced algorithms}
+\label{sec:mp-adv}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "catacomb"
+%%% End: 
diff --git a/manual/mp-mpx.tex b/manual/mp-mpx.tex
new file mode 100644
index 0000000..f775f6b
--- /dev/null
+++ b/manual/mp-mpx.tex
@@ -0,0 +1,635 @@
+%%% -*-latex-*-
+
+\section{Low-level details: MPX}
+\label{sec:mpx}
+
+
+\subsection{Data types for low-level arithmetic}
+
+The header file \hdr{<catacomb/mpw.h>} defines two types, \code{mpw} and
+\code{mpd}, and a collection of macros describing them.
+
+The `multiprecision word' type, \code{mpw}, is an unsigned integer type whose
+representation uses \code{MPW_BITS}.  The largest value representable in an
+\code{mpw} is $\code{MPW_MAX} = 2^{\code{MPW_BITS}} - 1$.
+The value of \code{MPW_BITS} is at least 16.  Note that, on some
+architectures, an \code{mpw} may be capable of representing values larger
+than \code{MPW_MAX}.  The expression \code{MPW($x$)} returns the value of $x
+\bmod \code{MPW_MAX}$ cast to type \code{mpw}.
+
+The `multiprecision doubleword' type, \code{mpd}, is an unsigned integer type
+with at least double the width of \code{mpw}.  Hence, an \code{mpd} is
+capable of representing the product of two \code{mpw}s, and is useful when
+performing multiplications and divisions.  The constants \code{MPD_BITS} and
+\code{MPD_MAX} are defined to be the number of bits used in the
+representation of an \code{mpd} and the largest value representable in an
+\code{mpd} respectively.
+
+A few macros useful for manipulating \code{mpw}s are defined.
+
+\subsubsection{The \code{MPW} macro}
+\label{fn:MPW}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpw.h>| \\
+|mpw MPW(|$x$|);|
+\end{listinglist}
+
+\fsec{Returns}
+
+The value of $x \bmod 2^{\code{MPW_BITS}}$, as an \code{mpw}.  This is a
+frequently used abbreviation for |(mpw)(|$x$| & MPW_MAX)|.
+
+\subsubsection{The \code{MPWS} macro}
+\label{fn:MPWS}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpw.h>| \\
+|size_t MPWS(size_t |$n$|);|
+\end{listinglist}
+
+\fsec{Returns}
+
+The number of bytes occupied by an array of $n$ \code{mpw}s (i.e., $n \cdot
+|sizeof(mpw)|$).
+
+\subsubsection{The \code{MPW_RQ} macro}
+\label{fn:MPW-RQ}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpw.h>| \\
+|size_t MPW_RQ(size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Returns}
+
+The number of \code{mpw}s required to represent a multiprecision integer
+occupying $\sz$ octets in an external representation.
+
+
+\subsection{Low-level multiprecision integer representation}
+
+The low-level multiprecision definitions are exported in the header file
+\hdr{<catacomb/mpx.h>}.  This header includes \hdr{<catacomb/mpw.h>}, so
+there's usually no need to include it explicitly.
+
+A multiprecision integer is represented as an array of \code{mpw}s,
+least-significant first.  The array's bounds are described by a a pair of
+pointers to the array's \emph{base} (i.e., its first element) and
+\emph{limit} (i.e., the location immediately following its last element).
+
+Let $v$ be the base and $\vl$ the limit of a multiprecision integer array.
+The array itself is notated as $v .. \vl$.  The array's size in words may be
+computed as $\vl - v$ in the normal C fashion.  The integer represented by
+the array, denoted $\mp(v .. \vl)$, is defined to be
+\[
+  \mp(v .. \vl) =
+    \sum_{0 \le i < \vl - v}
+        2^{\code{MPW_BITS} \cdot i} v[i]
+\]
+If the array is empty (i.e., $v = \vl$) then the number is zero.  If the
+array is empty, or the final word is nonzero, then the representation is said
+to be \emph{shrunken}.  Shrunken representations are more efficient, since
+the arithmetic algorithms don't need to consider high-order words which make
+no difference to the final result anyway.
+
+Whenever a result is too large to be represented in the memory allocated for
+it, high-order bits are discarded.  Thus, a result written to an array of $k$
+words is reduced modulo $2^{\code{MPW_BITS} \cdot k}$.
+
+
+\subsection{Low-level macros}
+\label{sec:mpx-macro}
+
+The following macros perform various simple operations useful for
+manipulating multiprecision integers at the MPX level.
+
+\subsubsection{The \code{MPX_SHRINK} macro}
+\label{fn:MPX-SHRINK}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|MPX_SHRINK(const mpw *|$v$|, const mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The \code{MPX_SHRINK} macro reduces the limit $\vl$ of a multiprecision
+integer array so that either $v = \vl$ or $\vl[-1] \ne 0$.  The $\vl$
+argument must be an lvalue, since the macro writes the result back when it
+finishes.
+
+\subsubsection{The \code{MPX_BITS} macro}
+\label{fn:MPX-BITS}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|MPX_BITS(unsigned long |$b$|, const mpw *|$v$|, const mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Determines the smallest number of bits which could represent the number
+$\mp(v .. \vl)$, and writes the answer to $b$, which must therefore be an
+lvalue.  The result is zero if the number is zero; otherwise $b$ is the
+largest integer such that
+\[ \mp(v .. \vl) \ge 2^{(b - 1) \bmod \code{MPW_BITS}}.\]
+
+\subsubsection{The \code{MPX_OCTETS} macro}
+\label{fn:MPX-OCTETS}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|MPX_OCTETS(size_t |$o$|,|
+           |const mpw *|$v$|, const mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Determines the smallest number of octets which could represent the number
+$\mp(v .. \vl)$, and writes the answer to $o$, which must therefore be an
+lvalue.  This is useful for determining appropriate buffer sizes for the
+results of \code{mpx_storel} and \code{mpx_storeb}.
+
+The result $o$ can be calculated from the number of bits $b$ reported by
+\code{MPX_BITS} as $o = \lceil b / 8 \rceil$; the algorithm used by
+\code{MPX_OCTETS} is more efficient than this, however.
+
+\subsubsection{The \code{MPX_COPY} macro}
+\label{fn:MPX-COPY}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|MPX_COPY(mpw *|$\dv$|, mpw *|$\dvl$|,|
+         |const mpw *|$\av$|, const mpw *|$\avl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Copies a multiprecision integer from the source array $\av .. \avl$ to the
+destination array $\dv .. \dvl$.  If the destination array is large enough,
+the result is equal to the source; otherwise, high-order bits are discarded
+as usual.
+
+\subsubsection{The \code{MPX_ZERO} macro}
+\label{fn:MPX-ZERO}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|MPX_ZERO(mpw *|$v$|, mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Sets the integer $\mp(v .. \vl)$ to zero.
+
+
+\subsection{Transfer formats: loading and storing}
+\label{sec:mpx-io}
+
+The MPX layer can translate between internal representations of integers as
+arrays of \code{mpw}s and external formats where integers are stored as
+arrays of octets.  Both little- and big-endian orders are supported.
+
+If $a_0, a_1, \ldots, a_{k - 1}$ is an array of $k$ octets, with
+$0 \le a_i < 256$ for all $0 \le i < k$, then the number $n$ represented by
+$a$ in little-endian byte order is
+\[ n = \sum_{0 \le i < k} 256^i a_i .\]
+Similarly, the number $n'$ represented by $a$ in big-endian byte order is
+\[ n' = \sum_{0 \le i < k} 256^{k - i - 1} a_i .\]
+
+If, in a store operation, the destination octet array is not large enough to
+represent the number to be stored, high-order octets are discarded; hence,
+the number is reduced modulo $2^{8k}$, where $k$ is the size of the
+destination array in octets.
+
+Two useful macros help when working out how much memory is needed for
+translation between internal and transfer formats for multiprecision
+integers.  The macro \code{MPX_OCTETS} (page~\pageref{fn:MPX-OCTETS})
+calculates the smallest octet array size which can represent a multiprecision
+integer.  The macro \code{MPW_RQ} (page~\pageref{fn:MPW-RQ}) calculates the
+smallest \code{mpw} array which can represent a multiprecision integer held
+in an octet array of a particular size.
+
+\subsubsection{The \code{mpx_storel} function}
+\label{fn:mpx-storel}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_storel(const mpw *|$v$|, const mpw *|$\vl$|,|
+                |void *|$p$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Stores the number held in the array $v .. \vl$ to the array of $\sz$ octets
+starting at address $p$ in little-endian byte order (i.e., least significant
+byte first).
+
+\subsubsection{The \code{mpx_loadl} function}
+\label{fn:mpx-loadl}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_loadl(mpw *|$v$|, mpw *|$\vl$|,|
+               |const void *|$p$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Loads into the array $v .. \vl$ the number represented in the array of $\sz$
+octets starting at address $p$ in little-endian byte order (i.e., least
+significant byte first).
+
+\subsubsection{The \code{mpx_storeb} function}
+\label{fn:mpx-storeb}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_storeb(const mpw *|$v$|, const mpw *|$\vl$|,|
+                |void *|$p$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Stores the number held in the array $v .. \vl$ to the array of $\sz$ octets
+starting at address $p$ in big-endian byte order (i.e., least significant
+byte last).
+
+\subsubsection{The \code{mpx_loadb} function}
+\label{fn:mpx-loadb}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_loadb(mpw *|$v$|, mpw *|$\vl$|,|
+               |const void *|$p$|, size_t |$\sz$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Loads into the array $v .. \vl$ the number represented in the array of $\sz$
+octets starting at address $p$ in big-endian byte order (i.e., least
+significant byte last).
+
+
+\subsection{Bit shifting operations}
+\label{sec:mpx-shift}
+
+The MPX layer provides functions for efficient multiplication and division of
+multiprecision integers by powers of two, performed simply by shifting the
+binary representation left or right by a number of bits.  Shifts by one bit
+and by a multiple of \code{MPW_BITS} are particularly fast.
+
+There are two shift functions, one for left shifts (multiplications) and one
+for right shifts (divisions).  Each function is passed an array containing
+the number to be shifted, a destination array for the result (which may be
+the source array), and the number of bits to be shifted as an unsigned
+integer.
+
+
+\subsubsection{The \code{mpx_lsl} function}
+\label{fn:mpx-lsl}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_lsl(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\
+               \>|const mpw *|$\av$|, const mpw *|$\avl$|, size_t |$n$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Stores in $\dv .. \dvl$ the result of shifting $\mp(\av .. \avl)$ left by $n$
+bits (i.e., multiplying it by $2^n$).
+
+\subsubsection{The \code{mpx_lsr} function}
+\label{fn:mpx-lsr}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_lsr(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\
+               \>|const mpw *|$\av$|, const mpw *|$\avl$|, size_t |$n$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Stores in $\dv .. \dvl$ the result of shifting $\mp(\av .. \avl)$ right by
+$n$ bits (i.e., dividing it by $2^n$, rounding towards zero).
+
+
+\subsection{Low level arithmetic}
+
+The remaining functions perform standard arithmetic operations on large
+integers.  The form for the arguments is fairly well-standardized:
+destination arrays are given first, followed by the source arrays in the
+conventional order.  (This ordering reflects the usual order in a C
+assignment expression, the \code{strcpy} and \code{memcpy} functions, and the
+order of operands in many assembly languages.)
+
+Some functions allow the destination array to be the same as one (or both)
+source arrays; others forbid this.  Under no circumstances may the
+destination partially overlap the sources.
+
+
+\subsubsection{The \code{mpx_2c} function}
+\label{fn:mpx-2c}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_2c(mpw *|$\dv$|, mpw *|$\dvl$|,|
+            |const mpw *|$v$|, const mpw *|$\vl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Computes the two's complement of the number $\mp(v .. \vl)$ and stores it in
+$\dv .. \dvl$.  The two arrays $v .. \vl$ and $\dv .. \dvl$ may be the same.
+
+\subsubsection{The \code{mpx_ucmp} function and \code{MPX_UCMP} macro}
+\label{fn:mpx-ucmp}
+
+\fsec{Synopsis}
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|int mpx_ucmp(|\=|const mpw *|$\av$|, const mpw *|$\avl$|,| \\
+               \>|const mpw *|$\bv$|, const mpw *|$\bvl$|);| \\
+|int MPX_UCMP(|\=|const mpw *|$\av$|, const mpw *|$\avl$|, |\synt{rel-op}|,|
+ \\            \>|const mpw *|$\bv$|, const mpw *|$\bvl$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mpx_ucmp} performs an unsigned comparison of two unsigned
+multiprecision integers $a$ and $b$, passed in the arrays $\av .. \avl$ and
+$\bv .. \bvl$ respectively.
+
+The macro \code{MPX_UCMP} provides a slightly more readable interface for
+comparing integers.  The \synt{rel-op} may be any C relational operator
+(e.g., |!=|, or |<=|).  The macro tests whether
+$a \mathrel{\synt{rel-op}} b$.
+
+\fsec{Returns}
+
+The function \code{mpx_ucmp} returns a value less then, equal to, or
+greater than zero depending on whether $a$ is less than, equal to or greater
+than $b$.
+
+The macro \code{MPX_UCMP} returns a nonzero result if $a
+\mathrel{\synt{rel-op}} b$ is true, and zero if false.
+
+\subsubsection{The \code{mpx_uadd} function}
+\label{fn:mpx-uadd}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_uadd(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,|
+                  |const mpw *|$\av$|, const mpw *|$\avl$|,| \\
+                \>|const mpw *|$\bv$|, const mpw *|$\bvl$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Adds two multiprecision integers.  The sum of the two arguments
+$\mp(\av .. \avl) + \mp(\bv ..  \bvl)$ is stored in $\dv .. \dvl$.  The
+destination array may be equal to either or both source arrays.\footnote{%
+  Adding a number to itself is a poor way of doubling.  A call to
+  \code{mpx_lsl} (page~\pageref{fn:mpx-lsl}) is much more efficient.} %
+
+\subsubsection{The \code{mpx_uaddn} function and \code{MPX_UADDN} macro}
+\label{fn:mpx-uaddn}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_uaddn(mpw *|$\dv$|, mpw *|$\dvl$|, mpw |$n$|);| \\
+|void MPX_UADDN(mpw *|$\dv$|, mpw *|$\dvl$|, mpw |$n$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mpx_uaddn} adds a small integer $n$ (expressed as a single
+\code{mpw}) to the multiprecision integer held in $\dv .. \dvl$.
+
+The macro \code{MPX_UADDN} performs exactly the same operation, but uses
+inline code rather than calling a function.
+
+\subsubsection{The \code{mpx_usub} function}
+\label{fn:mpx-usub}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_usub(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,|
+                  |const mpw *|$\av$|, const mpw *|$\avl$|,| \\
+                \>|const mpw *|$\bv$|, const mpw *|$\bvl$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Subtracts one multiprecision integer from another.  The difference of the two
+arguments $\mp(\av .. \avl) - \mp(\bv .. \bvl)$ is stored in $\dv .. \dvl$.
+The destination array may be equal to either or both source
+arrays.\footnote{% 
+  Subtracting a number from itself is a particularly poor way of clearing an
+  integer to zero.  A call to \code{MPX_ZERO} (page~\pageref{fn:MPX-ZERO}) is
+  much more efficient.} %
+
+Because overly large results are truncated to fit the destination array, if
+the second argument is larger than the first, a two's-complement result is
+obtained.
+
+\subsubsection{The \code{mpx_usubn} function and \code{MPX_USUBN} macro}
+\label{fn:mpx-usubn}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_usubn(mpw *|$\dv$|, mpw *|$\dvl$|, mpw |$n$|);| \\
+|void MPX_USUBN(mpw *|$\dv$|, mpw *|$\dvl$|, mpw |$n$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mpx_usubn} subtracts a small integer $n$ (expressed as a
+single \code{mpw}) from the multiprecision integer held in $\dv .. \dvl$.
+
+The macro \code{MPX_USUBN} performs exactly the same operation, but uses
+inline code rather than calling a function.
+
+\subsubsection{The \code{mpx_umul} function}
+\label{fn:mpx-umul}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_umul(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,|
+                  |const mpw *|$\av$|, const mpw *|$\avl$|,| \\
+                \>|const mpw *|$\bv$|, const mpw *|$\bvl$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Multiplies two multiprecision integers.  The product of the two arguments
+$\mp(\av .. \avl) \times \mp(\bv .. \bvl)$ is stored in $\dv .. \dvl$.  The
+destination array may not be equal to either source array.
+
+\subsubsection{The \code{mpx_umuln} function and \code{MPX_UMULN} macro}
+\label{fn:mpx-umuln}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_umuln(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\
+                 \> |const mpw *|$\av$|, const mpw *|$\avl$|, mpw |$n$|);| \\
+|void MPX_UMULN(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\
+                 \> |const mpw *|$\av$|, const mpw *|$\avl$|, mpw |$n$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mpx_umuln} multiplies the multiprecision integer passed in
+$\av .. \avl$ by a small integer $n$ (expressed as a single \code{mpw}),
+writing the product $n \cdot \mp(\av .. \avl)$ to the destination array $\dv
+.. \dvl$.  The destination array may be equal to the source array $\av
+.. \avl$.
+
+The macro \code{MPX_UMULN} performs exactly the same operation, but uses
+inline code rather than calling a function.
+
+\subsubsection{The \code{mpx_umlan} function and \code{MPX_UMLAN} macro}
+\label{fn:mpx-umlan}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\*
+|void mpx_umlan(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\*
+                 \> |const mpw *|$\av$|, const mpw *|$\avl$|, mpw |$n$|);| \\
+|void MPX_UMLAN(|\=|mpw *|$\dv$|, mpw *|$\dvl$|,| \\
+                 \> |const mpw *|$\av$|, const mpw *|$\avl$|, mpw |$n$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+The function \code{mpx_umlan} multiplies the multiprecision integer passed in
+$\av .. \avl$ by a small integer $n$ (expressed as a single \code{mpw}), and
+adds it to the value already stored in the destination array $\dv .. \dvl$.
+The destination array may be equal to the source array $\av .. \avl$,
+although this isn't very useful.
+
+The macro \code{MPX_UMLAN} performs exactly the same operation, but uses
+inline code rather than calling a function.
+
+\subsubsection{The \code{mpx_usqr} function}
+\label{fn:mpx-usqr}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+|#include <catacomb/mpx.h>| \\
+|void mpx_usqr(mpw *|$\dv$|, mpw *|$\dvl$|,|
+              |const mpw *|$\av$|, const mpw *|$\avl$|);|
+\end{listinglist}
+
+\fsec{Description}
+
+Squares a multiprecision integer.  The result $\mp(\av .. \avl)^2$ is stored
+in $\dv .. \dvl$.  The destination array may not be equal to the source
+array.
+
+Squaring a number is approximately twice as fast as multiplying a number by
+itself.
+
+\subsubsection{The \code{mpx_udiv} function}
+\label{fn:mpx-udiv}
+
+\fsec{Synopsis}
+
+\begin{listinglist}
+\begin{tabbing}
+|#include <catacomb/mpx.h>| \\
+|void mpx_udiv(|\=|mpw *|$\qv$|, mpw *|$\qvl$|, mpw *|$\rv$|, mpw *|$\rvl$|,|
+\\              \>|const mpw *|$\dv$|, const mpw *|$\dvl$|,|
+                  |mpw *|$\mathit{sv}$|, mpw *|$\mathit{svl}$|);|
+\end{tabbing}
+\end{listinglist}
+
+\fsec{Description}
+
+Calculates the quotient and remainder when one multiprecision integer is
+divided by another.
+
+The calling convention is slightly strange.  The dividend and divisor are
+passed in the arrays $\rv .. \rvl$ and $\dv .. \dvl$ respectively.  The
+quotient is written to the array $\qv .. \qvl$; the remainder is left in $\rv
+.. \rvl$.
+
+The division function requires some workspace.  The `scratch' array
+$\mathit{sv} .. \mathit{svl}$ must be at least one word longer than the
+divisor array $\dv .. \dvl$.  The scratch array'sfinal contents are not
+useful.  Also, the remainder array $\rv .. \rvl$ must have at least two
+words' headroom.
+
+\unverb\|
+Given a dividend $x$ and a divisor $y$, the function calculates a quotient
+$q$ and remainder $r$ such that $q = \lfloor x / y \rfloor$ and $x = qy + r$.
+In particular, this definition implies that $r$ has the same sign as $y$,
+which is a useful property when performing modular reductions.
+\shortverb\|
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "catacomb"
+%%% End: 
diff --git a/manual/mp.tex b/manual/mp.tex
new file mode 100644
index 0000000..ced4dca
--- /dev/null
+++ b/manual/mp.tex
@@ -0,0 +1,34 @@
+%%% -*-latex-*-
+
+\chapter{Multiprecision arithmetic}
+\label{chap:mp}
+
+Most public-key cryptographic systems, and some other cryptographic
+primitives, require arithmetic on large numbers.  Catacomb provides a
+reasonably efficient library of arithmetic functions, designed particularly
+for cryptographic applications.
+
+
+\section{Structure of the Catacomb multiprecision library}
+
+The multiprecision routines in Catacomb are divided into a number of
+logically separate units:
+
+\begin{itemize*}
+\item Very low-level unsigned arithmetic (\unit{mpx}).
+\item Memory management support and allocation hooks (\unit{mparena}).
+\item Standard operations on signed multiprecision integers (\unit{mp}).
+\item I/O support for multiprecision integers (\unit{mptext}, \unit{mpint}).
+\item Number-theoretic algorithms and functions (\unit{mpcrt}).
+\item Modular multiplication and exponentiation functions (\unit{mpmont}).
+\item Prime number searching and testing (\unit{pgen}, \unit{rabin}).
+\end{itemize*}
+
+\input{mp-mpx}
+\input{mp-mp}
+\input{mp-mod}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: "catacomb"
+%%% End: