X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/81bc2bb831d1fea04ea819b573bd6ba9ea08f3cf..981a9e5d5e3af6c06ad8b3f821928852068227e4:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index e556aa53..ad9236a8 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -25,20 +25,21 @@ /// MA 02111-1307, USA. ///-------------------------------------------------------------------------- -/// External definitions. +/// Preliminaries. #include "config.h" #include "asm-common.h" + .arch .aes + .extern F(abort) .extern F(rijndael_rcon) + .text + ///-------------------------------------------------------------------------- /// Main code. - .arch .aes - .text - /// The AESNI instructions implement a little-endian version of AES, but /// Catacomb's internal interface presents as big-endian so as to work better /// with things like GCM. We therefore maintain the round keys in @@ -61,17 +62,20 @@ ///-------------------------------------------------------------------------- /// Key setup. -FUNC(rijndael_setup_x86ish_aesni) +FUNC(rijndael_setup_x86ish_aesni_avx) + vzeroupper // avoid penalty on `legacy' XMM access + endprologue + // and drop through... +ENDFUNC -#define SI WHOLE(si) -#define DI WHOLE(di) +FUNC(rijndael_setup_x86ish_aesni) #if CPUFAM_X86 // Arguments are on the stack. We'll need to stack the caller's // register veriables, but we'll manage. -# define CTX ebp // context pointer -# define BLKSZ [esp + 24] // block size +# define CTX BP // context pointer +# define BLKSZ [SP + 24] // block size # define KSZ ebx // key size # define NKW edx // total number of key words @@ -85,15 +89,15 @@ FUNC(rijndael_setup_x86ish_aesni) # define BLKOFF edx // block size in bytes // Stack the caller's registers. - pushreg ebp + pushreg BP pushreg ebx pushreg esi pushreg edi // Set up our own variables. - mov CTX, [esp + 20] // context base pointer - mov SI, [esp + 28] // key material - mov KSZ, [esp + 32] // key size, in words + mov CTX, [SP + 20] // context base pointer + mov SI, [SP + 28] // key material + mov KSZ, [SP + 32] // key size, in words #endif #if CPUFAM_AMD64 && ABI_SYSV @@ -323,7 +327,7 @@ FUNC(rijndael_setup_x86ish_aesni) popreg edi popreg esi popreg ebx - popreg ebp + popreg BP #endif #if CPUFAM_AMD64 && ABI_WIN popreg rdi @@ -365,6 +369,12 @@ ENDFUNC /// Encrypting and decrypting blocks. .macro encdec op, aes, koff + FUNC(rijndael_\op\()_x86ish_aesni_avx) + vzeroupper // avoid XMM penalties + endprologue + // and drop through... + ENDFUNC + FUNC(rijndael_\op\()_x86ish_aesni) #if CPUFAM_X86 @@ -376,8 +386,8 @@ ENDFUNC # define DST edx # define NR ecx - mov K, [esp + 4] - mov SRC, [esp + 8] + mov K, [SP + 4] + mov SRC, [SP + 8] #endif #if CPUFAM_AMD64 && ABI_SYSV @@ -415,7 +425,7 @@ ENDFUNC add K, 16 pxor xmm0, xmm1 #if CPUFAM_X86 - mov DST, [esp + 12] + mov DST, [SP + 12] #endif // Dispatch to the correct code.