X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a..HEAD:/symm/des-base.h

diff --git a/symm/des-base.h b/symm/des-base.h
index 49bbed7e..325af534 100644
--- a/symm/des-base.h
+++ b/symm/des-base.h
@@ -36,6 +36,10 @@
 
 #include <mLib/bits.h>
 
+#ifndef CATACOMB_PERMUTE_H
+#  include "permute.h"
+#endif
+
 /*----- External data -----------------------------------------------------*/
 
 extern const uint32 des_sp[8][64];
@@ -68,32 +72,89 @@ extern const uint32 des_sp[8][64];
  * The cryptographically useless initial and final permutations.  The initial
  * permutation also rotates the two block halves left by one place.  This is
  * undone by the inverse permutation at the end.
+ *
+ * The initial permutation is traditionally given by the table
+ *
+ *	58 50 42 34 26 18 10  2
+ *	60 52 44 36 28 20 12  4
+ *	62 54 46 38 30 22 14  6
+ *	64 56 48 40 32 24 16  8
+ *	57 49 41 33 25 17  9  1
+ *	59 51 43 35 27 19 11  3
+ *	61 53 45 37 29 21 13  5
+ *	63 55 47 39 31 23 15  7
+ *
+ * The bit numbering is terrible.  If the two halves are X and Y, then the
+ * numbering starts with the most significant bit of X, which is bit 1,
+ * working down towards the least significant bit, and then continuing with
+ * the bits of Y, again in order of decreasing significance.  The table
+ * entries are in this same order, indicating that `bit 1' of the output is
+ * `bit 58' of the input and so on.
+ *
+ * I opt instead to number the bits starting from the least significant bit
+ * of Y, which is bit 0, up to the most significant bit of X, which is
+ * bit 63.  This means that we need to reverse the table (because we're going
+ * to read it in the other direction) and subtract each entry from 64 (to
+ * correct the bit numbering).  The resulting table is
+ *
+ *	57 49 41 33 25 17  9  1
+ *	59 51 43 35 27 19 11  3
+ *	61 53 45 37 29 21 13  5
+ *	63 55 47 39 31 23 15  7
+ *	56 48 40 32 24 16  8  0
+ *	58 50 42 34 26 18 10  2
+ *	60 52 44 36 28 20 12  4
+ *	62 54 46 38 30 22 14  6
+ *
+ * which, interestingly, is /also/ what you get if you just subtract one from
+ * each of the original table's entries.
+ *
+ * If we look at this table in binary, the patterns are much clearer.
+ *
+ *	111001 110001 101001 100001 011001 010001 001001 000001
+ *	111011 110011 101011 100011 011011 010011 001011 000011
+ *	111101 110101 101101 100101 011101 010101 001101 000101
+ *	111111 110111 101111 100111 011111 010111 001111 000111
+ *	111000 110000 101000 100000 011000 010000 001000 000000
+ *	111010 110010 101010 100010 011010 010010 001010 000010
+ *	111100 110100 101100 100100 011100 010100 001100 000100
+ *	111110 110110 101110 100110 011110 010110 001110 000110
+ *
+ * We can implement this efficiently using our permutation machinery.
+ * Writing ~k for index bit @k@ inverted, this permutation reflects the index
+ * transformation given by ~0, 2, 1, ~5, ~4, ~3.  There's a traditional
+ * swizzle sequence for this, which we used to use, namely:
+ *
+ *					//  5,  4,  3,  2,  1,  0
+ *	TWIZZLE_XCPL (x, y, 2);		// ~2,  4,  3, ~5,  1,  0
+ *	SWIZZLE_XCPL2(x, y, 1, 4);	// ~2, ~1,  3, ~5, ~4,  0
+ *	SWIZZLE_XCPL2(x, y, 0, 3);	// ~2, ~1, ~0, ~5, ~4, ~3
+ *	SWIZZLE_XCPL2(x, y, 3, 4);	// ~2,  0,  1, ~5, ~4, ~3
+ *	TWIZZLE_XCPL (x, y, 4);		// ~0,  2,  1, ~5, ~4, ~3
+ *
+ * Essentially this is an antitranspose -- a reflection about the
+ * antidiagonal -- followed by a couple of fixup stages.  But the non-twizzle
+ * steps require more operations, and it's easy to find a sequence which
+ * always acts on the (current) index bit 5, moving it to where it's wanted,
+ * and inverting it if necessary, so we only need twizzles.
  */
 
 #define DES_IP(x, y) do {						\
-  uint32 _t;								\
-  _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4;		\
-  _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18;		\
-  _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18;		\
-  _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9;		\
-  _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9;		\
-  _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24;		\
-  _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24;		\
-  _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16;		\
+  TWIZZLE_XCPL(x, y, 2);		/* ~2,  4,  3, ~5,  1,  0 */	\
+  TWIZZLE_XCPL(x, y, 4);		/* ~4,  2,  3, ~5,  1,  0 */	\
+  TWIZZLE_EXCH(x, y, 1);		/*  1,  2,  3, ~5, ~4,  0 */	\
+  TWIZZLE_EXCH(x, y, 3);		/*  3,  2,  1, ~5, ~4,  0 */	\
+  TWIZZLE_XCPL(x, y, 0);		/* ~0,  2,  1, ~5, ~4, ~3 */	\
   x = ROL32(x, 1); y = ROL32(y, 1);					\
 } while (0)
 
 #define DES_IPINV(x, y) do {						\
-  uint32 _t;								\
-  x = ROR32(x, 1); y = ROR32(y, 1);					\
-  _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16;		\
-  _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24;		\
-  _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24;		\
-  _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4;		\
-  _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18;		\
-  _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18;		\
-  _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9;		\
-  _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9;		\
+  x = ROR32(x, 1); y = ROR32(y, 1);	/* ~0,  2,  1, ~5, ~4, ~3 */	\
+  TWIZZLE_XCPL(x, y, 0);		/*  3,  2,  1, ~5, ~4,  0 */	\
+  TWIZZLE_EXCH(x, y, 3);		/*  1,  2,  3, ~5, ~4,  0 */	\
+  TWIZZLE_EXCH(x, y, 1);		/* ~4,  2,  3, ~5,  1,  0 */	\
+  TWIZZLE_XCPL(x, y, 4);		/*  3,  2,  1, ~5, ~4,  0 */	\
+  TWIZZLE_XCPL(x, y, 2);		/*  5,  4,  3,  2,  1,  0 */	\
 } while (0)
 
 /* --- @DES_EBLK@, @DES_DBLK@ --- *