From fe371977a223059f0b28e6edae1458d6c1c6f3a2 Mon Sep 17 00:00:00 2001 From: mdw Date: Tue, 1 Aug 2000 00:28:34 +0000 Subject: [PATCH] Performance improvement: read keys in as 32-bit words and deal them out byte-by-byte. --- skipjack.c | 120 +++++++++++++++++++++++++++++++++---------------------------- skipjack.h | 8 +++-- 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/skipjack.c b/skipjack.c index 5ee5674..98bb48f 100644 --- a/skipjack.c +++ b/skipjack.c @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: skipjack.c,v 1.2 2000/07/15 20:48:45 mdw Exp $ + * $Id: skipjack.c,v 1.3 2000/08/01 00:28:34 mdw Exp $ * * The Skipjack block cipher * @@ -30,6 +30,10 @@ /*----- Revision history --------------------------------------------------* * * $Log: skipjack.c,v $ + * Revision 1.3 2000/08/01 00:28:34 mdw + * Performance improvement: read keys in as 32-bit words and deal them out + * byte-by-byte. + * * Revision 1.2 2000/07/15 20:48:45 mdw * Remove some useless tests in the G function. * @@ -71,8 +75,16 @@ static octet f[256] = SKIPJACK_S; void skipjack_init(skipjack_ctx *k, const void *buf, size_t sz) { + const octet *b = buf; + uint32 crud; KSZ_ASSERT(skipjack, sz); - memcpy(k->k, buf, sz); + k->ka = LOAD32(b); + k->kb = LOAD32(b + 4); + crud = LOAD16(b + 8); + k->kc = U32((crud << 16) | (k->ka >> 16)); + k->kd = U32((k->ka << 16) | (k->kb >> 16)); + k->ke = U32((k->kb << 16) | crud); + crud = 0; } /* --- @skipjack_eblk@, @skipjack_dblk@ --- * @@ -86,58 +98,58 @@ void skipjack_init(skipjack_ctx *k, const void *buf, size_t sz) * Use: Low-level block encryption and decryption. */ -#define G(x, i) do { \ +#define G(x, k) do { \ octet _x = U8(x >> 8), _y = U8(x); \ - _x ^= f[_y ^ k->k[i++]]; \ - _y ^= f[_x ^ k->k[i++]]; if (i >= 10) i = 0; \ - _x ^= f[_y ^ k->k[i++]]; \ - _y ^= f[_x ^ k->k[i++]]; if (i >= 10) i = 0; \ - x = U16((_x << 8) | _y); \ + _x ^= f[_y ^ U8(k >> 24)]; \ + _y ^= f[_x ^ U8(k >> 16)]; \ + _x ^= f[_y ^ U8(k >> 8)]; \ + _y ^= f[_x ^ U8(k >> 0)]; \ + x = (_x << 8) | _y; \ } while (0) -#define RULE_A(w, x, y, z, n, i) do { \ - G(w, i); z ^= w ^ n++; \ +#define RULE_A(w, x, y, z, n, k) do { \ + G(w, k); z ^= w ^ n++; \ } while (0) -#define RULE_B(w, x, y, z, n, i) do { \ - x ^= w ^ n++; G(w, i); \ +#define RULE_B(w, x, y, z, n, k) do { \ + x ^= w ^ n++; G(w, k); \ } while (0) void skipjack_eblk(const skipjack_ctx *k, const uint32 *s, uint32 *d) { - unsigned i = 0; unsigned n = 1; uint16 w = U16(s[0] >> 16), x = U16(s[0]); uint16 y = U16(s[1] >> 16), z = U16(s[1]); - - RULE_A(w, x, y, z, n, i); RULE_A(z, w, x, y, n, i); - RULE_A(y, z, w, x, n, i); RULE_A(x, y, z, w, n, i); - RULE_A(w, x, y, z, n, i); RULE_A(z, w, x, y, n, i); - RULE_A(y, z, w, x, n, i); RULE_A(x, y, z, w, n, i); - RULE_B(w, x, y, z, n, i); RULE_B(z, w, x, y, n, i); - RULE_B(y, z, w, x, n, i); RULE_B(x, y, z, w, n, i); - RULE_B(w, x, y, z, n, i); RULE_B(z, w, x, y, n, i); - RULE_B(y, z, w, x, n, i); RULE_B(x, y, z, w, n, i); - RULE_A(w, x, y, z, n, i); RULE_A(z, w, x, y, n, i); - RULE_A(y, z, w, x, n, i); RULE_A(x, y, z, w, n, i); - RULE_A(w, x, y, z, n, i); RULE_A(z, w, x, y, n, i); - RULE_A(y, z, w, x, n, i); RULE_A(x, y, z, w, n, i); - RULE_B(w, x, y, z, n, i); RULE_B(z, w, x, y, n, i); - RULE_B(y, z, w, x, n, i); RULE_B(x, y, z, w, n, i); - RULE_B(w, x, y, z, n, i); RULE_B(z, w, x, y, n, i); - RULE_B(y, z, w, x, n, i); RULE_B(x, y, z, w, n, i); + uint32 ka = k->ka, kb = k->kb, kc = k->kc, kd = k->kd, ke = k->ke; + + RULE_A(w, x, y, z, n, ka); RULE_A(z, w, x, y, n, kb); + RULE_A(y, z, w, x, n, kc); RULE_A(x, y, z, w, n, kd); + RULE_A(w, x, y, z, n, ke); RULE_A(z, w, x, y, n, ka); + RULE_A(y, z, w, x, n, kb); RULE_A(x, y, z, w, n, kc); + RULE_B(w, x, y, z, n, kd); RULE_B(z, w, x, y, n, ke); + RULE_B(y, z, w, x, n, ka); RULE_B(x, y, z, w, n, kb); + RULE_B(w, x, y, z, n, kc); RULE_B(z, w, x, y, n, kd); + RULE_B(y, z, w, x, n, ke); RULE_B(x, y, z, w, n, ka); + RULE_A(w, x, y, z, n, kb); RULE_A(z, w, x, y, n, kc); + RULE_A(y, z, w, x, n, kd); RULE_A(x, y, z, w, n, ke); + RULE_A(w, x, y, z, n, ka); RULE_A(z, w, x, y, n, kb); + RULE_A(y, z, w, x, n, kc); RULE_A(x, y, z, w, n, kd); + RULE_B(w, x, y, z, n, ke); RULE_B(z, w, x, y, n, ka); + RULE_B(y, z, w, x, n, kb); RULE_B(x, y, z, w, n, kc); + RULE_B(w, x, y, z, n, kd); RULE_B(z, w, x, y, n, ke); + RULE_B(y, z, w, x, n, ka); RULE_B(x, y, z, w, n, kb); d[0] = ((uint32)w << 16) | (uint32)x; d[1] = ((uint32)y << 16) | (uint32)z; } -#define G_INV(x, i) do { \ +#define G_INV(x, k) do { \ octet _x = U8(x >> 8), _y = U8(x); \ - _y ^= f[_x ^ k->k[--i]]; \ - _x ^= f[_y ^ k->k[--i]]; if (i == 0) i = 10; \ - _y ^= f[_x ^ k->k[--i]]; \ - _x ^= f[_y ^ k->k[--i]]; if (i == 0) i = 10; \ - x = U16((_x << 8) | _y); \ + _y ^= f[_x ^ U8(k >> 0)]; \ + _x ^= f[_y ^ U8(k >> 8)]; \ + _y ^= f[_x ^ U8(k >> 16)]; \ + _x ^= f[_y ^ U8(k >> 24)]; \ + x = (_x << 8) | _y; \ } while (0) #define RULE_A_INV(w, x, y, z, n, i) do { \ @@ -150,27 +162,27 @@ void skipjack_eblk(const skipjack_ctx *k, const uint32 *s, uint32 *d) void skipjack_dblk(const skipjack_ctx *k, const uint32 *s, uint32 *d) { - unsigned i = 8; unsigned n = 33; uint16 w = U16(s[0] >> 16), x = U16(s[0]); uint16 y = U16(s[1] >> 16), z = U16(s[1]); - - RULE_B_INV(w, x, y, z, n, i); RULE_B_INV(x, y, z, w, n, i); - RULE_B_INV(y, z, w, x, n, i); RULE_B_INV(z, w, x, y, n, i); - RULE_B_INV(w, x, y, z, n, i); RULE_B_INV(x, y, z, w, n, i); - RULE_B_INV(y, z, w, x, n, i); RULE_B_INV(z, w, x, y, n, i); - RULE_A_INV(w, x, y, z, n, i); RULE_A_INV(x, y, z, w, n, i); - RULE_A_INV(y, z, w, x, n, i); RULE_A_INV(z, w, x, y, n, i); - RULE_A_INV(w, x, y, z, n, i); RULE_A_INV(x, y, z, w, n, i); - RULE_A_INV(y, z, w, x, n, i); RULE_A_INV(z, w, x, y, n, i); - RULE_B_INV(w, x, y, z, n, i); RULE_B_INV(x, y, z, w, n, i); - RULE_B_INV(y, z, w, x, n, i); RULE_B_INV(z, w, x, y, n, i); - RULE_B_INV(w, x, y, z, n, i); RULE_B_INV(x, y, z, w, n, i); - RULE_B_INV(y, z, w, x, n, i); RULE_B_INV(z, w, x, y, n, i); - RULE_A_INV(w, x, y, z, n, i); RULE_A_INV(x, y, z, w, n, i); - RULE_A_INV(y, z, w, x, n, i); RULE_A_INV(z, w, x, y, n, i); - RULE_A_INV(w, x, y, z, n, i); RULE_A_INV(x, y, z, w, n, i); - RULE_A_INV(y, z, w, x, n, i); RULE_A_INV(z, w, x, y, n, i); + uint32 ka = k->ka, kb = k->kb, kc = k->kc, kd = k->kd, ke = k->ke; + + RULE_B_INV(w, x, y, z, n, kb); RULE_B_INV(x, y, z, w, n, ka); + RULE_B_INV(y, z, w, x, n, ke); RULE_B_INV(z, w, x, y, n, kd); + RULE_B_INV(w, x, y, z, n, kc); RULE_B_INV(x, y, z, w, n, kb); + RULE_B_INV(y, z, w, x, n, ka); RULE_B_INV(z, w, x, y, n, ke); + RULE_A_INV(w, x, y, z, n, kd); RULE_A_INV(x, y, z, w, n, kc); + RULE_A_INV(y, z, w, x, n, kb); RULE_A_INV(z, w, x, y, n, ka); + RULE_A_INV(w, x, y, z, n, ke); RULE_A_INV(x, y, z, w, n, kd); + RULE_A_INV(y, z, w, x, n, kc); RULE_A_INV(z, w, x, y, n, kb); + RULE_B_INV(w, x, y, z, n, ka); RULE_B_INV(x, y, z, w, n, ke); + RULE_B_INV(y, z, w, x, n, kd); RULE_B_INV(z, w, x, y, n, kc); + RULE_B_INV(w, x, y, z, n, kb); RULE_B_INV(x, y, z, w, n, ka); + RULE_B_INV(y, z, w, x, n, ke); RULE_B_INV(z, w, x, y, n, kd); + RULE_A_INV(w, x, y, z, n, kc); RULE_A_INV(x, y, z, w, n, kb); + RULE_A_INV(y, z, w, x, n, ka); RULE_A_INV(z, w, x, y, n, ke); + RULE_A_INV(w, x, y, z, n, kd); RULE_A_INV(x, y, z, w, n, kc); + RULE_A_INV(y, z, w, x, n, kb); RULE_A_INV(z, w, x, y, n, ka); d[0] = ((uint32)w << 16) | (uint32)x; d[1] = ((uint32)y << 16) | (uint32)z; diff --git a/skipjack.h b/skipjack.h index 9a0b3ca..0740a69 100644 --- a/skipjack.h +++ b/skipjack.h @@ -1,6 +1,6 @@ /* -*-c-*- * - * $Id: skipjack.h,v 1.1 2000/07/15 15:39:33 mdw Exp $ + * $Id: skipjack.h,v 1.2 2000/08/01 00:28:34 mdw Exp $ * * The Skipjack block cipher * @@ -30,6 +30,10 @@ /*----- Revision history --------------------------------------------------* * * $Log: skipjack.h,v $ + * Revision 1.2 2000/08/01 00:28:34 mdw + * Performance improvement: read keys in as 32-bit words and deal them out + * byte-by-byte. + * * Revision 1.1 2000/07/15 15:39:33 mdw * The NSA's Skipjack block cipher. * @@ -75,7 +79,7 @@ extern const octet skipjack_keysz[]; /*----- Data structures ---------------------------------------------------*/ typedef struct skipjack_ctx { - octet k[10]; + uint32 ka, kb, kc, kd, ke; } skipjack_ctx; /*----- Functions provided ------------------------------------------------*/ -- 2.11.0