X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/8ae4c946c49862151bf7fd6ae65b116ec62be606..c410f911b7a30fb364c2c599ea679245ccc3c708:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index 12d4267f..c22d23a8 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -34,14 +34,6 @@ .globl F(rijndael_rcon) ///-------------------------------------------------------------------------- -/// Local utilities. - -// Magic constants for shuffling. -#define ROTL 0x93 -#define ROT2 0x4e -#define ROTR 0x39 - -///-------------------------------------------------------------------------- /// Main code. .arch .aes @@ -165,7 +157,10 @@ FUNC(rijndael_setup_x86ish_aesni) // We'll need the index registers, which belong to the caller in this // ABI. push rsi + .seh_pushreg rsi push rdi + .seh_pushreg rdi + .seh_endprologue // Move arguments to more useful places. mov SI, r8 // key material @@ -229,16 +224,16 @@ FUNC(rijndael_setup_x86ish_aesni) // Fourth word of the cycle, and seven or eight words of key. Do a // byte substitution. movd xmm0, eax - pshufd xmm0, xmm0, ROTL + pshufd xmm0, xmm0, SHUF(2, 1, 0, 3) aeskeygenassist xmm1, xmm0, 0 movd eax, xmm1 jmp 2f // First word of the cycle. This is the complicated piece. 1: movd xmm0, eax - pshufd xmm0, xmm0, ROTR + pshufd xmm0, xmm0, SHUF(0, 3, 2, 1) aeskeygenassist xmm1, xmm0, 0 - pshufd xmm1, xmm1, ROTL + pshufd xmm1, xmm1, SHUF(2, 1, 0, 3) movd eax, xmm1 xor al, [RCON] inc RCON @@ -391,18 +386,14 @@ ENDFUNC .macro encdec op, aes, koff FUNC(rijndael_\op\()_x86ish_aesni) - // Find the magic endianness-swapping table. - ldgot ecx - movdqa xmm5, [INTADDR(endswap_tab, ecx)] - #if CPUFAM_X86 // Arguments come in on the stack, and need to be collected. We // don't have a shortage of registers. -# define K ecx +# define K eax # define SRC edx # define DST edx -# define NR eax +# define NR ecx mov K, [esp + 4] mov SRC, [esp + 8] @@ -424,8 +415,13 @@ ENDFUNC # define SRC rdx # define DST r8 # define NR eax + .seh_endprologue #endif + // Find the magic endianness-swapping table. + ldgot ecx + movdqa xmm5, [INTADDR(endswap_tab, ecx)] + // Initial setup. movdqu xmm0, [SRC] pshufb xmm0, xmm5