From d3f33b9a37f45bc5809af314b1b436b712d59a80 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 1 Feb 2024 12:33:33 +0000 Subject: [PATCH] base/permute.h, utils/permute.lisp, symm/...: Formalize bit permutations. Add a bunch of Lisp code for messing with permutations. Add a C header file with macros for implementing interesting primitive permutations. Apply these to DES and Keccak-1600 as a demonstration. --- base/Makefile.am | 3 + base/permute.h | 285 +++++++++++++++++++++++++++++++++++++++++++++++++++++ symm/des-base.h | 89 +++++++++++++---- symm/des.c | 11 +++ symm/des3.c | 11 +++ symm/desx.c | 11 +++ symm/keccak1600.c | 47 ++++----- utils/permute.lisp | 280 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 695 insertions(+), 42 deletions(-) create mode 100644 base/permute.h create mode 100644 utils/permute.lisp diff --git a/base/Makefile.am b/base/Makefile.am index 8b7c0fcd..f4c99732 100644 --- a/base/Makefile.am +++ b/base/Makefile.am @@ -42,6 +42,9 @@ libbase_la_SOURCES += arena.c pkginclude_HEADERS += ct.h libbase_la_SOURCES += ct.c ct-test.c +## Bit permutations. +pkginclude_HEADERS += permute.h + ## CPU-specific dispatch. pkginclude_HEADERS += dispatch.h libbase_la_SOURCES += dispatch.c diff --git a/base/permute.h b/base/permute.h new file mode 100644 index 00000000..ef8eb169 --- /dev/null +++ b/base/permute.h @@ -0,0 +1,285 @@ +/* -*-c-*- + * + * Bit permutations + * + * (c) 2024 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software: you can redistribute it and/or modify it + * under the terms of the GNU Library General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb. If not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#ifndef CATACOMB_PERMUTE_H +#define CATACOMB_PERMUTE_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Header files ------------------------------------------------------*/ + +#include + +/*----- Macros provided ---------------------------------------------------*/ + +/* --- Theory lesson --- * + * + * It's often useful to rearrange the bits in a word, or a value split across + * two (or more) words, so it's worth taking a moment to consider how this + * might be done efficiently. Throughout this discussion, we use the + * standard bit numbering, where the least significant bit in a word is bit + * zero, with numbering increasing with significance. Equivalently, bit + * %$k$% has the numerical value %$2^k$%. + * + * An essential primitive is the `swizzle', which exchanges two similarly + * arranged but disjoint groups of bits within a word which are separated by + * a constant offset. The groups of bits don't have to be contiguous, but + * they must be identified by shifts of the same mask. + * + * An especially important class of swizzle permutations considers the + * individual bits of bit indices. Permutations of the bits in a word can be + * interpreted as operations on the bits of the indices. For %$i \ge 0$%, + * let %$\mu_i$% be the mask such that bit %$k$% of %$\mu_i$% is set if and + * only if bit %$i$% is clear in %$k$%. Hence + * + * %$\mu_0 = (\ldots 01010101010101010101010101010101)_2 = -1/3$% , + * %$\mu_1 = (\ldots 00110011001100110011001100110011)_2 = -1/5$% , + * %$\mu_2 = (\ldots 00001111000011110000111100001111)_2 = -1/17$% , + * %$\mu_3 = (\ldots 00000000111111110000000011111111)_2 = -1/257$% , + * etc. + * + * In general, the low %$2^i$% bits of %$\mu_i$% are set, the next least + * significant %$2^i$% bits are clear, the next %$2^i$% bits are set, and so + * on. Hence, %$\mu_i \lsl 2^i = \bar{\mu}_i$%, or, in the %$2$%-adic + * numbers %$\Z_2$%, %$2^{2^i} \mu_k = -1 - \mu_i$%, whence, in general, + * %$\mu_i = -1/(2^{2^i} + 1)$%. Let %$x$% be some binary value; now we can + * describe some important swizzles. + * + * * Let %$y=(x\bitand\mu_i)\lsl 2^i \bitor (x\bitand\bar{\mu}_i)\lsr 2^i%, + * or %$y = (x\bitand\mu_i) \lsl 2^k \bitor (x \lsr 2^k)\bitand\mu_i$%. + * This exchanges the two sub-blocks of %$2^i$% bits in each %$2^{i+1}$% + * block in %$x$%. In terms of indices, now the bits at indices in which + * bit %$i$% is set precede those in which bit %$i$% is clear. + * %%\emph{We have inverted index bit %$i$%.}%% + * + * * Suppose that %$i < j$%, and let %$m = \bar{\mu}_i \bitand \mu_j$% and + * %$s = 2^j - 2^i$%; let %$y = (x \bitand m) \lsl s \bitor {}$% + * %$(x \lsr s)\bitand m \bitor (x\bitand \overline{m \bitor m \lsl s$%. + * Now, %$m$% has its bit %$k$% set if and only if bit %$i$% of %$k$% is + * set and bit %$j$% of %$k$% is clear. The related mask %$m \lsl s$% + * has bit %$k + s$% set if %$k% has the same property; but, %$k$% + * will have bit %$i$% set and bit %$j$% clear if and only if bit %$i$% + * is clear and bit %$j$% is set in %$k + s$%. Combined, the mask + * %$m \bitor (m \lsl s)$% selects bits at indices in which bits %$i$% + * and %$j$% differ, so %$\overline{m \bitor (m \lsl s)}$% selects the + * bits at indices where bits %$i$% and %$j$% are equal. + * + * This swizzle therefore exchanges the bits of %$x$% at indices where + * bit %$i$% is set and bit %$j$% is clear with those at indices where + * bit %$j$% is set and %$i$% is clear, leaving alone those bits at + * indices where bits %$i$% and %$j$% are either both clear or both set. + * %%\emph{We have exchanged index bits %$i$% and %$j$%.}%% + * + * * Rounding off this little collection, suppose again that %$i < j$%, and + * let %$m = \mu_i \bitand \mu_j$% and %$s = 2^i + 2^j$%; and again, let + * %$y = (x \bitand m) \lsl s \bitor (x \lsr s) \bitand m \bitor {}$% + * %$(x \bitand \overline{m \bitor m \lsl s$%. Now, %$m$% has its bit + * %$k$% set if and only if bits %$i$% and %$j$% of %$k$% are both clear. + * This swizzle therefore exchanges the bits of %$x$% at indices where + * bits %$i$% and %$j$% are both clear with those at indices where + * bits %$i$% and %$j$% are both set, leaving alone those bits at indices + * where bits %$i$% and %$j$% differ. It takes a little work to (left as + * an exercise) to see that the effect combines the previous two. + * %%\emph{We have exchanged and inverted index bits %$i$% and %$j$%.}%% + * + * Related is the `twizzle', which exchanges similarly arranged groups of + * bits within two different words. This can be seen as a multiprecision + * variant of the swizzle. + * + * The machinery here expects some parameters to have been defined: + * + * * @regty@ should be an unsigned integer type, and + * + * * @REGWD@ should be a power of two such that @regty@ can store at least + * @REGWD@ bits. + */ + +/* We begin with some internal utilities. @CATACOMB__REPLICATE_n_(x)@ + * produces a hexadecimal constant consisting of %$n$% copies of the digits + * @x@. + */ +#define CATACOMB__REPLICATE_16_(x) CATACOMB__REPLICATE_8_(GLUE(x, x)) +#define CATACOMB__REPLICATE_8_(x) CATACOMB__REPLICATE_4_(GLUE(x, x)) +#define CATACOMB__REPLICATE_4_(x) CATACOMB__REPLICATE_2_(GLUE(x, x)) +#define CATACOMB__REPLICATE_2_(x) CATACOMB__REPLICATE_1_(GLUE(x, x)) +#define CATACOMB__REPLICATE_1_(x) GLUE(0x, x) + +/* More internal utilities. @CATACOMB__REPLi_Un(x)@ returns an %$n$%-bit + * hexadecimal constant formed by replicating the %$i$%-bit constant (which + * must have leading zeros) %$n/i$% times. + */ +#define CATACOMB__REPL8_U8 CATACOMB__REPLICATE_1_ +#define CATACOMB__REPL8_U16 CATACOMB__REPLICATE_2_ +#define CATACOMB__REPL8_U32 CATACOMB__REPLICATE_4_ +#define CATACOMB__REPL8_U64 CATACOMB__REPLICATE_8_ +#define CATACOMB__REPL8_U128 CATACOMB__REPLICATE_16_ + +#define CATACOMB__REPL16_U16 CATACOMB__REPLICATE_1_ +#define CATACOMB__REPL16_U32 CATACOMB__REPLICATE_2_ +#define CATACOMB__REPL16_U64 CATACOMB__REPLICATE_4_ +#define CATACOMB__REPL16_U128 CATACOMB__REPLICATE_8_ + +#define CATACOMB__REPL32_U32 CATACOMB__REPLICATE_1_ +#define CATACOMB__REPL32_U64 CATACOMB__REPLICATE_2_ +#define CATACOMB__REPL32_U128 CATACOMB__REPLICATE_4_ + +#define CATACOMB__REPL64_U64 CATACOMB__REPLICATE_1_ +#define CATACOMB__REPL64_U128 CATACOMB__REPLICATE_2_ + +#define CATACOMB__REPL128_U128 CATACOMB__REPLICATE_1_ + +/* Finally, @CATACOMB__REPLi(x)@ returns a hexadecimal constant formed by + * replicating the %$i$%-bit constant (including leading zeros) sufficiently + * many times as to fill a @REGWD@-bit wide register. + */ +#define CATACOMB__REPL8(x) GLUE(CATACOMB__REPL8_U, REGWD)(x) +#define CATACOMB__REPL16(x) GLUE(CATACOMB__REPL16_U, REGWD)(x) +#define CATACOMB__REPL32(x) GLUE(CATACOMB__REPL32_U, REGWD)(x) +#define CATACOMB__REPL64(x) GLUE(CATACOMB__REPL64_U, REGWD)(x) +#define CATACOMB__REPL128(x) GLUE(CATACOMB__REPL128_U, REGWD)(x) + +/* The macro @CATACOMB__IXMASK_Bi(_)@ evaluates to the low @REGWD@ bits of + * the constant %$\mu_i$% defined above. The argument is ignored; it's + * necessary to prevent technical problems with macro expansion + * (specifically, to allow the blue paint on @GLUE@ to be washed off before + * invoking @CATACOMB__REPLi@). + */ +#define CATACOMB__IXMASK_B0(_) CATACOMB__REPL8(55) +#define CATACOMB__IXMASK_B1(_) CATACOMB__REPL8(33) +#define CATACOMB__IXMASK_B2(_) CATACOMB__REPL8(0f) +#define CATACOMB__IXMASK_B3(_) CATACOMB__REPL16(00ff) +#define CATACOMB__IXMASK_B4(_) CATACOMB__REPL32(0000ffff) +#define CATACOMB__IXMASK_B5(_) CATACOMB__REPL64(00000000ffffffff) +#define CATACOMB__IXMASK_B6(_) \ + CATACOMB__REPL128(0000000000000000ffffffffffffffff) + +/* @IXMASK(i)@ returns the low @REGWD@ bits of %$\mu_i$%. The argument @i@ + * must be a decimal integer constant, without leading zeros. + */ +#define IXMASK(i) GLUE(CATACOMB__IXMASK_B, i)(hunoz) + +/* @IXMASK_xy(i, j)@ returns a @REGWD@-bit mask in which bit %$k$% is set if + * bit %$i$% of %$k$% is equal to %$x$% and bit %$j$% of %$k$% is equal to + * %$y$%. The arguments @i@ and @j@ must be decimal integer constants, + * without leading zeros. + */ +#define IXMASK_00(i, j) (IXMASK(i)&IXMASK(j)) +#define IXMASK_01(i, j) (IXMASK(i)&~IXMASK(j)) +#define IXMASK_10(i, j) (~IXMASK(i)&IXMASK(j)) +#define IXMASK_11(i, j) (~IXMASK(i)&~IXMASK(j)) + +/* The general swizzle operation. Exchange the bits in @x@ selected by + * @mask@ with those selected by @mask << shift@. + */ +#define SWIZZLE(x, shift, mask) do { \ + regty _t = ((x) ^ ((x) >> (shift)))&(mask); \ + (x) ^= _t | (_t << (shift)); \ +} while (0) + +/* A swizzle on two words @x@ and @y@, using the same shift, but different + * masks @mask0@ and @mask1@. This is just a simple abbreviation. + */ +#define SWIZZLE_2(x, y, shift, mask0, mask1) do { \ + SWIZZLE(x, shift, mask0); SWIZZLE(y, shift, mask1); \ +} while (0) + +/* A `twizzle', or a swizzle across two words. + * + * The @TWIZZLE_0@ macro exchanges the bits of @x@ and @y selected by + * @mask@. The @TWIZZLE_L@ and @TWIZZLE_R@ macros exchange the bits selected + * by @mask@ in @y@ with the bits in @x@ selected by @mask << shift@ or + * @mask >> shift@ respectively. (The names are from the direction in which + * @x@ is shifted, not the direction the mask is shifted.) + * + * These are used to synthesize swizzles within multiprecision words: if the + * intended shift is %$a w + b$%, where %$w$% is the word width, then %$a$% + * gives the difference between word indices of the words to be processed, + * and %$\abs{b$}% gives the @shift@ argument; use @TWIZZLE_R@ if + * %$b \ge 0$%, @TWIZZLE_L@ if %$b \le 0$% is nonpositive, or @TWIZZLE_0@ if + * %$b = 0$%. (We can easily distinguish which of %$a w \pm b$% or + * %$(a \pm 1) w \mp (w - b)$%, since one kind of shift will keep @mask@ + * within the same word, and the other will shift it out completely.) + */ +#define TWIZZLE_0(x, y, mask) do { \ + regty _t = ((y) ^ ((x)))&(mask); \ + (x) ^= _t; (y) ^= _t; \ +} while (0) +#define TWIZZLE_L(x, y, shift, mask) do { \ + regty _t = ((y) ^ ((x) << (shift)))&(mask); \ + (x) ^= _t >> (shift); (y) ^= _t; \ +} while (0) +#define TWIZZLE_R(x, y, shift, mask) do { \ + regty _t = ((y) ^ ((x) >> (shift)))&(mask); \ + (x) ^= _t << (shift); (y) ^= _t; \ +} while (0) + +/* @SWIZZLE_CPL@ applies a swizzle to @x@ which complements index bit @i@; + * @SWIZZLE_EXCH@ applies a swizzle to exchange index bits @i@ and @j@; and + * @SWIZZLE_XCPL@ applies a swizzle to exchange and invert index bits @i@ and + * @j@. The arguments @i@ and @j@ must be decimal integer constants without + * leading zeros, with %$i \le j$%. (The macros do nothing if %$i = j$%.) + * + * The variants with @2@ in their names act identically on @x@ and @y@, and + * are intended as a simple convenience. + */ +#define SWIZZLE_CPL(x, i) SWIZZLE(x, (1 << (i)), IXMASK(i)) +#define SWIZZLE_EXCH(x, i, j) \ + SWIZZLE(x, (1 << (j)) - (1 << (i)), IXMASK_10(i, j)) +#define SWIZZLE_XCPL(x, i, j) \ + SWIZZLE(x, (1 << (j)) + (1 << (i)), IXMASK_00(i, j)) + +#define SWIZZLE_CPL2(x, y, i) \ + SWIZZLE_2(x, y, (1 << (i)), IXMASK(i), IXMASK(i)) +#define SWIZZLE_EXCH2(x, y, i, j) \ + SWIZZLE_2(x, y, (1 << (j)) - (1 << (i)), \ + IXMASK_10(i, j), IXMASK_10(i, j)) +#define SWIZZLE_XCPL2(x, y, i, j) \ + SWIZZLE_2(x, y, (1 << (j)) + (1 << (i)), \ + IXMASK_00(i, j), IXMASK_00(i, j)) + +/* The @TWIZZLE_EXCH@ and @TWIZZLE_XCPL@ macros act like the similarly named + * @SWIZZLE_...@ macros above, except that (a) they act on two words from a + * multiprecision value, and the @j@ index is implicit in the selection of + * the operands @x@ and @y@. If the word width is %$w$%, and %$2^j = n w$%, + * then %$x$% should be chosen to be %$n$% slots more significant than + * %$y$%. + * + * Note that there is no @TWIZZLE_CPL@, since this would simply involve + * exchanging two entries in an array. + */ +#define TWIZZLE_EXCH(x, y, i) TWIZZLE_L(x, y, (1 << (i)), ~IXMASK(i)) +#define TWIZZLE_XCPL(x, y, i) TWIZZLE_R(x, y, (1 << (i)), IXMASK(i)) + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/symm/des-base.h b/symm/des-base.h index 49bbed7e..7e55e044 100644 --- a/symm/des-base.h +++ b/symm/des-base.h @@ -36,6 +36,10 @@ #include +#ifndef CATACOMB_PERMUTE_H +# include "permute.h" +#endif + /*----- External data -----------------------------------------------------*/ extern const uint32 des_sp[8][64]; @@ -68,32 +72,77 @@ extern const uint32 des_sp[8][64]; * The cryptographically useless initial and final permutations. The initial * permutation also rotates the two block halves left by one place. This is * undone by the inverse permutation at the end. + * + * The initial permutation is traditionally given by the table + * + * 58 50 42 34 26 18 10 2 + * 60 52 44 36 28 20 12 4 + * 62 54 46 38 30 22 14 6 + * 64 56 48 40 32 24 16 8 + * 57 49 41 33 25 17 9 1 + * 59 51 43 35 27 19 11 3 + * 61 53 45 37 29 21 13 5 + * 63 55 47 39 31 23 15 7 + * + * The bit numbering is terrible. If the two halves are X and Y, then the + * numbering starts with the most significant bit of X, which is bit 1, + * working down towards the least significant bit, and then continuing with + * the bits of Y, again in order of decreasing significance. The table + * entries are in this same order, indicating that `bit 1' of the output is + * `bit 58' of the input and so on. + * + * I opt instead to number the bits starting from the least significant bit + * of Y, which is bit 0, up to the most significant bit of X, which is + * bit 63. This means that we need to reverse the table (because we're going + * to read it in the other direction) and subtract each entry from 64 (to + * correct the bit numbering). The resulting table is + * + * 57 49 41 33 25 17 9 1 + * 59 51 43 35 27 19 11 3 + * 61 53 45 37 29 21 13 5 + * 63 55 47 39 31 23 15 7 + * 56 48 40 32 24 16 8 0 + * 58 50 42 34 26 18 10 2 + * 60 52 44 36 28 20 12 4 + * 62 54 46 38 30 22 14 6 + * + * which, interestingly, is /also/ what you get if you just subtract one from + * each of the original table's entries. + * + * If we look at this table in binary, the patterns are much clearer. + * + * 111001 110001 101001 100001 011001 010001 001001 000001 + * 111011 110011 101011 100011 011011 010011 001011 000011 + * 111101 110101 101101 100101 011101 010101 001101 000101 + * 111111 110111 101111 100111 011111 010111 001111 000111 + * 111000 110000 101000 100000 011000 010000 001000 000000 + * 111010 110010 101010 100010 011010 010010 001010 000010 + * 111100 110100 101100 100100 011100 010100 001100 000100 + * 111110 110110 101110 100110 011110 010110 001110 000110 + * + * We can implement this efficiently using our permutation machinery. + * Writing ~k for index bit @k@ inverted, this permutation reflects the index + * transformation given by ~0, 2, 1, ~5, ~4, ~3. The following sequence of + * operations is traditional. Essentially this is an antitranspose -- a + * reflection about the antidiagonal -- followed by a couple of fixup stages. */ -#define DES_IP(x, y) do { \ - uint32 _t; \ - _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4; \ - _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18; \ - _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18; \ - _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9; \ - _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9; \ - _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24; \ - _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24; \ - _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16; \ +#define DES_IP(x, y) do { /* 5, 4, 3, 2, 1, 0 */ \ + TWIZZLE_XCPL (x, y, 2); /* ~2, 4, 3, ~5, 1, 0 */ \ + SWIZZLE_XCPL2(x, y, 1, 4); /* ~2, ~1, 3, ~5, ~4, 0 */ \ + SWIZZLE_XCPL2(x, y, 0, 3); /* ~2, ~1, ~0, ~5, ~4, ~3 */ \ + SWIZZLE_XCPL2(x, y, 3, 4); /* ~2, 0, 1, ~5, ~4, ~3 */ \ + TWIZZLE_XCPL (x, y, 4); /* ~0, 2, 1, ~5, ~4, ~3 */ \ x = ROL32(x, 1); y = ROL32(y, 1); \ } while (0) #define DES_IPINV(x, y) do { \ - uint32 _t; \ - x = ROR32(x, 1); y = ROR32(y, 1); \ - _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16; \ - _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24; \ - _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24; \ - _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4; \ - _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18; \ - _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18; \ - _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9; \ - _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9; \ + x = ROR32(x, 1); y = ROR32(y, 1); /* ~0, 2, 1, ~5, ~4, ~3 */ \ + TWIZZLE_XCPL (x, y, 4); /* ~2, 0, 1, ~5, ~4, ~3 */ \ + SWIZZLE_XCPL2(x, y, 3, 4); /* ~2, ~1, ~0, ~5, ~4, ~3 */ \ + SWIZZLE_XCPL2(x, y, 0, 3); /* ~2, ~1, 3, ~5, ~4, 0 */ \ + SWIZZLE_XCPL2(x, y, 1, 4); /* ~2, 4, 3, ~5, 1, 0 */ \ + TWIZZLE_XCPL (x, y, 2); /* 5, 4, 3, 2, 1, 0 */ \ } while (0) /* --- @DES_EBLK@, @DES_DBLK@ --- * diff --git a/symm/des.c b/symm/des.c index 06400a15..e751cf0e 100644 --- a/symm/des.c +++ b/symm/des.c @@ -37,6 +37,7 @@ #include "blkc.h" #include "des-base.h" #include "des.h" +#include "permute.h" #include "gcipher.h" /*----- Global variables --------------------------------------------------*/ @@ -258,20 +259,30 @@ void des_init(des_ctx *k, const void *buf, size_t sz) void des_eblk(const des_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; DES_IP(x, y); DES_EBLK(k->k, x, y, x, y); DES_IPINV(x, y); d[0] = x, d[1] = y; + +#undef REGWD } void des_dblk(const des_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; DES_IP(x, y); DES_DBLK(k->k, x, y, x, y); DES_IPINV(x, y); d[0] = x, d[1] = y; + +#undef REGWD } BLKC_TEST(DES, des) diff --git a/symm/des3.c b/symm/des3.c index dedd9f28..92a6f2af 100644 --- a/symm/des3.c +++ b/symm/des3.c @@ -38,6 +38,7 @@ #include "des-base.h" #include "des.h" #include "des3.h" +#include "permute.h" #include "gcipher.h" /*----- Global variables --------------------------------------------------*/ @@ -92,6 +93,9 @@ void des3_init(des3_ctx *k, const void *buf, size_t sz) void des3_eblk(const des3_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; DES_IP(x, y); DES_EBLK(k->a.k, x, y, x, y); @@ -99,10 +103,15 @@ void des3_eblk(const des3_ctx *k, const uint32 *s, uint32 *d) DES_EBLK(k->c.k, x, y, x, y); DES_IPINV(x, y); d[0] = x, d[1] = y; + +#undef REGWD } void des3_dblk(const des3_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; DES_IP(x, y); DES_DBLK(k->c.k, x, y, x, y); @@ -110,6 +119,8 @@ void des3_dblk(const des3_ctx *k, const uint32 *s, uint32 *d) DES_DBLK(k->a.k, x, y, x, y); DES_IPINV(x, y); d[0] = x, d[1] = y; + +#undef REGWD } BLKC_TEST(DES3, des3) diff --git a/symm/desx.c b/symm/desx.c index 14115a57..e3737ccb 100644 --- a/symm/desx.c +++ b/symm/desx.c @@ -39,6 +39,7 @@ #include "des.h" #include "desx.h" #include "gcipher.h" +#include "permute.h" /*----- Tables ------------------------------------------------------------*/ @@ -123,6 +124,9 @@ void desx_init(desx_ctx *k, const void *buf, size_t sz) void desx_eblk(const desx_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; x ^= k->prea; y ^= k->preb; DES_IP(x, y); @@ -130,10 +134,15 @@ void desx_eblk(const desx_ctx *k, const uint32 *s, uint32 *d) DES_IPINV(x, y); x ^= k->posta; y ^= k->postb; d[0] = x, d[1] = y; + +#undef REGWD } void desx_dblk(const desx_ctx *k, const uint32 *s, uint32 *d) { +#define REGWD 32 + typedef uint32 regty; + uint32 x = s[0], y = s[1]; x ^= k->posta; y ^= k->postb; DES_IP(x, y); @@ -141,6 +150,8 @@ void desx_dblk(const desx_ctx *k, const uint32 *s, uint32 *d) DES_IPINV(x, y); x ^= k->prea; y ^= k->preb; d[0] = x, d[1] = y; + +#undef REGWD } BLKC_TEST(DESX, desx) diff --git a/symm/keccak1600.c b/symm/keccak1600.c index cfbfdefe..a4a6564f 100644 --- a/symm/keccak1600.c +++ b/symm/keccak1600.c @@ -33,6 +33,7 @@ #include #include "keccak1600.h" +#include "permute.h" /* #define KECCAK_DEBUG */ @@ -66,25 +67,22 @@ static lane interlace(kludge64 x) { /* Given a 64-bit string X, return a lane Z containing the even- and * odd-numbered bits of X. - * - * This becomes more manageable if we look at what happens to the bit - * indices: bit i of X becomes bit ROR_6(i, 1) of Z. We can effectively - * swap two bits of the indices by swapping the object bits where those - * index bits differ. Fortunately, this is fairly easy. - * - * We arrange to swap bits between the two halves of X, rather than within - * a half. */ - uint32 x0 = LO64(x), x1 = HI64(x), t; +typedef uint32 regty; +#define REGWD 32 + + uint32 x0 = LO64(x), x1 = HI64(x); lane z; - /* 543210 */ - t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 453210 */ - t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 354210 */ - t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 254310 */ - t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 154320 */ - t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 054321 */ + /* 5, 4, 3, 2, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 4); /* 4, 5, 3, 2, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 3); /* 3, 5, 4, 2, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 2); /* 2, 5, 4, 3, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 1); /* 1, 5, 4, 3, 2, 0 */ + TWIZZLE_EXCH(x1, x0, 0); /* 0, 5, 4, 3, 2, 1 */ z.even = x0; z.odd = x1; return (z); + +#undef REGWD } static kludge64 deinterlace(lane x) @@ -93,15 +91,20 @@ static kludge64 deinterlace(lane x) * to `interlace' above, and the principle is the same */ - uint32 x0 = x.even, x1 = x.odd, t; +typedef uint32 regty; +#define REGWD 32 + + uint32 x0 = x.even, x1 = x.odd; kludge64 z; - /* 054321 */ - t = ((x0 >> 1) ^ x1)&0x55555555; x0 ^= t << 1; x1 ^= t; /* 154320 */ - t = ((x0 >> 2) ^ x1)&0x33333333; x0 ^= t << 2; x1 ^= t; /* 254310 */ - t = ((x0 >> 4) ^ x1)&0x0f0f0f0f; x0 ^= t << 4; x1 ^= t; /* 354210 */ - t = ((x0 >> 8) ^ x1)&0x00ff00ff; x0 ^= t << 8; x1 ^= t; /* 453210 */ - t = ((x0 >> 16) ^ x1)&0x0000ffff; x0 ^= t << 16; x1 ^= t; /* 543210 */ + /* 0, 5, 4, 3, 2, 1 */ + TWIZZLE_EXCH(x1, x0, 0); /* 1, 5, 4, 3, 2, 0 */ + TWIZZLE_EXCH(x1, x0, 1); /* 2, 5, 4, 3, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 2); /* 3, 5, 4, 2, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 3); /* 4, 5, 3, 2, 1, 0 */ + TWIZZLE_EXCH(x1, x0, 4); /* 5, 4, 3, 2, 1, 0 */ SET64(z, x1, x0); return (z); + +#undef REGWD } #define TO_LANE(x) (interlace(x)) diff --git a/utils/permute.lisp b/utils/permute.lisp new file mode 100644 index 00000000..372f5cfc --- /dev/null +++ b/utils/permute.lisp @@ -0,0 +1,280 @@ +;;; -*-lisp-*- + +;;; This file isn't a program as such: rather, it's a collection of handy +;;; functions which can be used in an interactive session. + +;;;-------------------------------------------------------------------------- +;;; General permutation utilities. + +(defun shuffle (v) + "Randomly permute the elements of the vector V. Return V." + (let ((n (length v))) + (do ((k n (1- k))) + ((<= k 1) v) + (let ((i (random k))) + (unless (= i (1- k)) + (rotatef (aref v i) (aref v (1- k)))))))) + +(defun identity-permutation (n) + "Return the do-nothing permutation on N elements." + (let ((v (make-array n :element-type 'fixnum))) + (dotimes (i n v) (setf (aref v i) i)))) + +(defun invert-permutation (p) + "Given a permutation P, return its inverse." + (let* ((n (length p)) (p-inv (make-array n :element-type 'fixnum))) + (dotimes (i n) (setf (aref p-inv (aref p i)) i)) + p-inv)) + +(defun next-permutation (v) + "Adjust V so that it reflects the next permutation in ascending order. + + V should be a vector of real numbers. Returns V if successful, or nil if + there are no more permutations." + + ;; The tail of the vector consists of a sequence ... A, Z, Y, X, ..., where + ;; Z > Y > X ... is in reverse order, and A < Z. The next permutation is + ;; then the smallest out of Z, Y, X, ... which is larger than A, followed + ;; by the remaining elements in ascending order. + ;; + ;; Equivalently, reverse the tail Z, Y, X, ... so we have A, ... X, Y, Z, + ;; and swap A with the next larger element. + + (let ((n (length v))) + (cond ((< n 2) nil) + (t (let* ((k (1- n)) + (x (aref v k))) + (loop (when (zerop k) (return-from next-permutation nil)) + (decf k) + (let ((y (aref v k))) + (when (prog1 (< y x) + (setf x y)) + (return)))) + (do ((i (1+ k) (1+ i)) + (j (1- n) (1- j))) + ((> i j)) + (rotatef (aref v i) (aref v j))) + (do ((i (- n 2) (1- i))) + ((or (<= i k) (< (aref v i) x)) + (rotatef (aref v k) (aref v (1+ i))))) + v))))) + +(defun make-index-mask (w mask-expr) + "Construct a bitmask based on bitwise properties of the bit indices. + + The function returns a W-bit mask in which each bit is set if MASK-EXPR + of true of the bit's index. MASK-EXPR may be one of the following: + + * I -- an integer I is true if bit I of the bit index is set; + * (not EXPR) -- is true if EXPR is false; + * (and EXPR EXPR ...) -- is true if all of the EXPRs are true; and + * (or EXPR EXPR ...) -- is true if any of the EXPRs is true." + + (let ((max-bit (1- (integer-length (1- w)))) + (mask 0)) + (dotimes (i w mask) + (labels ((interpret (expr) + (cond ((and (integerp expr) (<= 0 expr max-bit)) + (logbitp expr i)) + ((and (consp expr) (eq (car expr) 'not) + (null (cddr expr))) + (not (interpret (cadr expr)))) + ((and (consp expr) (eq (car expr) 'and)) + (every #'interpret (cdr expr))) + ((and (consp expr) (eq (car expr) 'or)) + (some #'interpret (cdr expr))) + (t + (error "unknown mask expression ~S" expr))))) + (when (interpret mask-expr) + (setf (ldb (byte 1 i) mask) 1)))))) + +(defun make-permutation-network (w steps) + "Construct a permutation network. + + The integer W gives the number of bits to be acted upon. The STEPS are a + list of instructions of the following forms: + + * (SHIFT . MASK) -- a pair of integers is treated literally; + + * (SHIFT MASK-EXPR) -- the SHIFT is literal, but the MASK-EXPR is + processed by `make-index-mask' to calculate the mask; + + * (:invert I) -- make an instruction which inverts the sense of the + index bit I; + + * (:exchange I J) -- make an instruction which exchanges index bits I + and J; or + + * (:exchange-invert I J) -- make an instruction which exchanges and + inverts index bits I and J. + + The output is a list of primitive (SHIFT . MASK) steps, indicating that + the bits of the input selected by MASK are to be swapped with the bits + selected by (ash MASK SHIFT)." + + (let ((max-mask (1- (ash 1 w))) + (max-shift (1- w)) + (max-bit (1- (integer-length (1- w)))) + (list nil)) + (dolist (step steps) + (cond ((and (consp step) + (integerp (car step)) (<= 0 (car step) max-shift) + (integerp (cdr step)) (<= 0 (cdr step) max-mask)) + (push step list)) + ((and (consp step) + (integerp (car step)) (<= 0 (car step) max-shift) + (null (cddr step))) + (push (cons (car step) (make-index-mask w (cadr step))) list)) + ((and (consp step) + (eq (car step) :invert) + (integerp (cadr step)) (<= 0 (cadr step) max-bit) + (null (cddr step))) + (let ((i (cadr step))) + (push (cons (ash 1 i) (make-index-mask w `(not ,i))) list))) + ((and (consp step) + (eq (car step) :exchange) + (integerp (cadr step)) (integerp (caddr step)) + (<= 0 (cadr step) (caddr step) max-bit) + (null (cdddr step))) + (let ((i (cadr step)) (j (caddr step))) + (push (cons (- (ash 1 j) (ash 1 i)) + (make-index-mask w `(and ,i (not ,j)))) + list))) + ((and (consp step) + (eq (car step) :exchange-invert) + (integerp (cadr step)) (integerp (caddr step)) + (<= 0 (cadr step) (caddr step) max-bit) + (null (cdddr step))) + (let ((i (cadr step)) (j (caddr step))) + (push (cons (+ (ash 1 i) (ash 1 j)) + (make-index-mask w `(and (not ,i) (not ,j)))) + list))) + (t + (error "unknown permutation step ~S" step)))) + (nreverse list))) + +;;;-------------------------------------------------------------------------- +;;; Permutation network diagnostics. + +(defun print-permutation-network (steps &optional (stream *standard-output*)) + "Print a description of the permutation network STEPS to STREAM. + + A permutation network consists of a list of pairs + + (SHIFT . MASK) + + indicating that the bits selected by MASK, and those SHIFT bits to the + left, should be exchanged. + + The output is intended to be human-readable and is subject to change." + + (let ((shiftwd 1) (maskwd 2)) + + ;; Determine suitable print widths for shifts and masks. + (dolist (step steps) + (let ((shift (car step)) (mask (cdr step))) + (let ((swd (1+ (floor (log shift 10)))) + (mwd (ash 1 (- (integer-length (1- (integer-length mask))) + 2)))) + (when (> swd shiftwd) (setf shiftwd swd)) + (when (> mwd maskwd) (setf maskwd mwd))))) + + ;; Print the display. + (pprint-logical-block (stream steps :prefix "(" :suffix ")") + (let ((first t)) + (dolist (step steps) + (let ((shift (car step)) (mask (cdr step))) + + ;; Separate entries with newlines. + (cond (first (setf first nil)) + (t (pprint-newline :mandatory stream))) + + (let ((swaps nil)) + + ;; Determine the list of exchanges implied by the mask. + (dotimes (i (integer-length mask)) + (when (logbitp i mask) + (push (cons i (+ i shift)) swaps))) + (setf swaps (nreverse swaps)) + + ;; Print the entry. + (format stream "~@<(~;~vD #x~(~v,'0X~) ~8I~:@_~W~;)~:>" + shiftwd shift maskwd mask swaps)))))) + + ;; Print a final newline following the close parenthesis. + (terpri stream))) + +(defun demonstrate-permutation-network + (n steps &optional reference (stream *standard-output*)) + "Print, on STREAM, a demonstration of the permutation STEPS. + + Begin, on the left, with the integers from 0 up to N - 1. For each + (SHIFT . MASK) element in STEPS, print an additional column showing the + effect of that step on the vector. If REFERENCE is not nil, then it + should be a vector of length at least N: on the right, print the REFERENCE + vector, showing where the result of the permutation STEPS differs from the + REFERENCE. Return non-nil if the output matches the reference; return nil + if the output doesn't match, or no reference was supplied." + + (let ((v (make-array n))) + + ;; Initialize a vector of lists which will record, for each step in the + ;; permutation network, which value is in that position. The lists are + ;; reversed, so the `current' value is at the front. + (dotimes (i n) (setf (aref v i) (cons i nil))) + + ;; Work through the permutation steps updating the vector. + (dolist (step steps) + (let ((shift (car step)) (mask (cdr step))) + + (dotimes (i n) (push (car (aref v i)) (aref v i))) + + (dotimes (i n) + (when (logbitp i mask) + (rotatef (car (aref v i)) + (car (aref v (+ i shift)))))))) + + ;; Print the result. + (let ((ok (not (null reference)))) + (dotimes (i n) + (let* ((entry (aref v i)) + (final (car entry))) + (format stream "~{ ~7D~}" (reverse entry)) + (when reference + (let* ((want (aref reference i)) + (match (eql final want))) + (format stream " ~:[/=~;==~] ~7D" match want) + (unless match (setf ok nil)))) + (terpri stream))) + (when reference + (format stream "~:[FAIL~;pass~]~%" ok)) + ok))) + +;;;-------------------------------------------------------------------------- +;;; Examples and useful runes. + +#+example +(let* ((ip #(58 50 42 34 26 18 10 2 + 60 52 44 36 28 20 12 4 + 62 54 46 38 30 22 14 6 + 64 56 48 40 32 24 16 8 + 57 49 41 33 25 17 9 1 + 59 51 43 35 27 19 11 3 + 61 53 45 37 29 21 13 5 + 63 55 47 39 31 23 15 7)) + (fixed-ip (map '(vector fixnum) + (lambda (x) (- 64 x)) + (reverse ip))) + (trad-network + (make-permutation-network + 64 ; 5 4 3 2 1 0 + '((:exchange-invert 2 5) ; ~2 4 3 ~5 1 0 + (:exchange-invert 1 4) ; ~2 ~1 3 ~5 ~4 0 + (:exchange-invert 0 3) ; ~2 ~1 ~0 ~5 ~4 ~3 + (:exchange-invert 3 4) ; ~2 0 1 ~5 ~4 ~3 + (:exchange-invert 4 5))))) ; ~0 2 1 ~5 ~4 ~3 + + (fresh-line) + + (print-permutation-network trad-network) + (demonstrate-permutation-network 64 trad-network fixed-ip)) -- 2.11.0