X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/0f00dc4c8eb47e67bc0f148c2dd109f73a451e0a..HEAD:/symm/des-base.h diff --git a/symm/des-base.h b/symm/des-base.h index 49bbed7e..325af534 100644 --- a/symm/des-base.h +++ b/symm/des-base.h @@ -36,6 +36,10 @@ #include +#ifndef CATACOMB_PERMUTE_H +# include "permute.h" +#endif + /*----- External data -----------------------------------------------------*/ extern const uint32 des_sp[8][64]; @@ -68,32 +72,89 @@ extern const uint32 des_sp[8][64]; * The cryptographically useless initial and final permutations. The initial * permutation also rotates the two block halves left by one place. This is * undone by the inverse permutation at the end. + * + * The initial permutation is traditionally given by the table + * + * 58 50 42 34 26 18 10 2 + * 60 52 44 36 28 20 12 4 + * 62 54 46 38 30 22 14 6 + * 64 56 48 40 32 24 16 8 + * 57 49 41 33 25 17 9 1 + * 59 51 43 35 27 19 11 3 + * 61 53 45 37 29 21 13 5 + * 63 55 47 39 31 23 15 7 + * + * The bit numbering is terrible. If the two halves are X and Y, then the + * numbering starts with the most significant bit of X, which is bit 1, + * working down towards the least significant bit, and then continuing with + * the bits of Y, again in order of decreasing significance. The table + * entries are in this same order, indicating that `bit 1' of the output is + * `bit 58' of the input and so on. + * + * I opt instead to number the bits starting from the least significant bit + * of Y, which is bit 0, up to the most significant bit of X, which is + * bit 63. This means that we need to reverse the table (because we're going + * to read it in the other direction) and subtract each entry from 64 (to + * correct the bit numbering). The resulting table is + * + * 57 49 41 33 25 17 9 1 + * 59 51 43 35 27 19 11 3 + * 61 53 45 37 29 21 13 5 + * 63 55 47 39 31 23 15 7 + * 56 48 40 32 24 16 8 0 + * 58 50 42 34 26 18 10 2 + * 60 52 44 36 28 20 12 4 + * 62 54 46 38 30 22 14 6 + * + * which, interestingly, is /also/ what you get if you just subtract one from + * each of the original table's entries. + * + * If we look at this table in binary, the patterns are much clearer. + * + * 111001 110001 101001 100001 011001 010001 001001 000001 + * 111011 110011 101011 100011 011011 010011 001011 000011 + * 111101 110101 101101 100101 011101 010101 001101 000101 + * 111111 110111 101111 100111 011111 010111 001111 000111 + * 111000 110000 101000 100000 011000 010000 001000 000000 + * 111010 110010 101010 100010 011010 010010 001010 000010 + * 111100 110100 101100 100100 011100 010100 001100 000100 + * 111110 110110 101110 100110 011110 010110 001110 000110 + * + * We can implement this efficiently using our permutation machinery. + * Writing ~k for index bit @k@ inverted, this permutation reflects the index + * transformation given by ~0, 2, 1, ~5, ~4, ~3. There's a traditional + * swizzle sequence for this, which we used to use, namely: + * + * // 5, 4, 3, 2, 1, 0 + * TWIZZLE_XCPL (x, y, 2); // ~2, 4, 3, ~5, 1, 0 + * SWIZZLE_XCPL2(x, y, 1, 4); // ~2, ~1, 3, ~5, ~4, 0 + * SWIZZLE_XCPL2(x, y, 0, 3); // ~2, ~1, ~0, ~5, ~4, ~3 + * SWIZZLE_XCPL2(x, y, 3, 4); // ~2, 0, 1, ~5, ~4, ~3 + * TWIZZLE_XCPL (x, y, 4); // ~0, 2, 1, ~5, ~4, ~3 + * + * Essentially this is an antitranspose -- a reflection about the + * antidiagonal -- followed by a couple of fixup stages. But the non-twizzle + * steps require more operations, and it's easy to find a sequence which + * always acts on the (current) index bit 5, moving it to where it's wanted, + * and inverting it if necessary, so we only need twizzles. */ #define DES_IP(x, y) do { \ - uint32 _t; \ - _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4; \ - _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18; \ - _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18; \ - _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9; \ - _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9; \ - _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24; \ - _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24; \ - _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16; \ + TWIZZLE_XCPL(x, y, 2); /* ~2, 4, 3, ~5, 1, 0 */ \ + TWIZZLE_XCPL(x, y, 4); /* ~4, 2, 3, ~5, 1, 0 */ \ + TWIZZLE_EXCH(x, y, 1); /* 1, 2, 3, ~5, ~4, 0 */ \ + TWIZZLE_EXCH(x, y, 3); /* 3, 2, 1, ~5, ~4, 0 */ \ + TWIZZLE_XCPL(x, y, 0); /* ~0, 2, 1, ~5, ~4, ~3 */ \ x = ROL32(x, 1); y = ROL32(y, 1); \ } while (0) #define DES_IPINV(x, y) do { \ - uint32 _t; \ - x = ROR32(x, 1); y = ROR32(y, 1); \ - _t = (y ^ (x >> 16)) & 0x0000ffff; y ^= _t; x ^= _t << 16; \ - _t = (x ^ (x >> 24)) & 0x000000ff; x ^= _t; x ^= _t << 24; \ - _t = (y ^ (y >> 24)) & 0x000000ff; y ^= _t; y ^= _t << 24; \ - _t = (y ^ (x >> 4)) & 0x0f0f0f0f; y ^= _t; x ^= _t << 4; \ - _t = (x ^ (x >> 18)) & 0x00003333; x ^= _t; x ^= _t << 18; \ - _t = (y ^ (y >> 18)) & 0x00003333; y ^= _t; y ^= _t << 18; \ - _t = (x ^ (x >> 9)) & 0x00550055; x ^= _t; x ^= _t << 9; \ - _t = (y ^ (y >> 9)) & 0x00550055; y ^= _t; y ^= _t << 9; \ + x = ROR32(x, 1); y = ROR32(y, 1); /* ~0, 2, 1, ~5, ~4, ~3 */ \ + TWIZZLE_XCPL(x, y, 0); /* 3, 2, 1, ~5, ~4, 0 */ \ + TWIZZLE_EXCH(x, y, 3); /* 1, 2, 3, ~5, ~4, 0 */ \ + TWIZZLE_EXCH(x, y, 1); /* ~4, 2, 3, ~5, 1, 0 */ \ + TWIZZLE_XCPL(x, y, 4); /* 3, 2, 1, ~5, ~4, 0 */ \ + TWIZZLE_XCPL(x, y, 2); /* 5, 4, 3, 2, 1, 0 */ \ } while (0) /* --- @DES_EBLK@, @DES_DBLK@ --- *