X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/28321c96c5c2ea4ee8d9a2b48ad5b63499029d5b..645fcce0830342b644cc16e71e28790c838d9415:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index 3dcdfc58..3cca50f7 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -34,14 +34,6 @@ .globl F(rijndael_rcon) ///-------------------------------------------------------------------------- -/// Local utilities. - -// Magic constants for shuffling. -#define ROTL 0x93 -#define ROT2 0x4e -#define ROTR 0x39 - -///-------------------------------------------------------------------------- /// Main code. .arch .aes @@ -165,7 +157,10 @@ FUNC(rijndael_setup_x86ish_aesni) // We'll need the index registers, which belong to the caller in this // ABI. push rsi + .seh_pushreg rsi push rdi + .seh_pushreg rdi + .seh_endprologue // Move arguments to more useful places. mov SI, r8 // key material @@ -229,16 +224,16 @@ FUNC(rijndael_setup_x86ish_aesni) // Fourth word of the cycle, and seven or eight words of key. Do a // byte substitution. movd xmm0, eax - pshufd xmm0, xmm0, ROTL + pshufd xmm0, xmm0, SHUF(2, 1, 0, 3) aeskeygenassist xmm1, xmm0, 0 movd eax, xmm1 jmp 2f // First word of the cycle. This is the complicated piece. 1: movd xmm0, eax - pshufd xmm0, xmm0, ROTR + pshufd xmm0, xmm0, SHUF(0, 3, 2, 1) aeskeygenassist xmm1, xmm0, 0 - pshufd xmm1, xmm1, ROTL + pshufd xmm1, xmm1, SHUF(2, 1, 0, 3) movd eax, xmm1 xor al, [RCON] inc RCON @@ -395,10 +390,10 @@ ENDFUNC // Arguments come in on the stack, and need to be collected. We // don't have a shortage of registers. -# define K ecx +# define K eax # define SRC edx # define DST edx -# define NR eax +# define NR ecx mov K, [esp + 4] mov SRC, [esp + 8] @@ -420,6 +415,7 @@ ENDFUNC # define SRC rdx # define DST r8 # define NR eax + .seh_endprologue #endif // Find the magic endianness-swapping table. @@ -436,6 +432,9 @@ ENDFUNC movdqu xmm1, [K] add K, 16 pxor xmm0, xmm1 +#if CPUFAM_X86 + mov DST, [esp + 12] +#endif // Dispatch to the correct code. cmp NR, 10 @@ -513,9 +512,6 @@ ENDFUNC // Unpermute the ciphertext block and store it. pshufb xmm0, xmm5 -#if CPUFAM_X86 - mov DST, [esp + 12] -#endif movdqu [DST], xmm0 // And we're done. @@ -549,6 +545,8 @@ bogus: callext F(abort) ///-------------------------------------------------------------------------- /// Data tables. + RODATA + .align 16 endswap_tab: .byte 3, 2, 1, 0