X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/81bc2bb831d1fea04ea819b573bd6ba9ea08f3cf..8f2287ef5c05d496fcb9b012629af007fe56f897:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index e556aa53..6d9b3b22 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -25,20 +25,21 @@ /// MA 02111-1307, USA. ///-------------------------------------------------------------------------- -/// External definitions. +/// Preliminaries. #include "config.h" #include "asm-common.h" + .arch .aes + .extern F(abort) .extern F(rijndael_rcon) + .text + ///-------------------------------------------------------------------------- /// Main code. - .arch .aes - .text - /// The AESNI instructions implement a little-endian version of AES, but /// Catacomb's internal interface presents as big-endian so as to work better /// with things like GCM. We therefore maintain the round keys in @@ -61,6 +62,12 @@ ///-------------------------------------------------------------------------- /// Key setup. +FUNC(rijndael_setup_x86ish_aesni_avx) + vzeroupper // avoid penalty on `legacy' XMM access + endprologue + // and drop through... +ENDFUNC + FUNC(rijndael_setup_x86ish_aesni) #define SI WHOLE(si) @@ -205,16 +212,16 @@ FUNC(rijndael_setup_x86ish_aesni) // Fourth word of the cycle, and seven or eight words of key. Do a // byte substitution. movd xmm0, eax - pshufd xmm0, xmm0, SHUF(2, 1, 0, 3) + pshufd xmm0, xmm0, SHUF(3, 0, 1, 2) aeskeygenassist xmm1, xmm0, 0 movd eax, xmm1 jmp 2f // First word of the cycle. This is the complicated piece. 1: movd xmm0, eax - pshufd xmm0, xmm0, SHUF(0, 3, 2, 1) + pshufd xmm0, xmm0, SHUF(1, 2, 3, 0) aeskeygenassist xmm1, xmm0, 0 - pshufd xmm1, xmm1, SHUF(2, 1, 0, 3) + pshufd xmm1, xmm1, SHUF(3, 0, 1, 2) movd eax, xmm1 xor al, [RCON] inc RCON @@ -365,6 +372,12 @@ ENDFUNC /// Encrypting and decrypting blocks. .macro encdec op, aes, koff + FUNC(rijndael_\op\()_x86ish_aesni_avx) + vzeroupper // avoid XMM penalties + endprologue + // and drop through... + ENDFUNC + FUNC(rijndael_\op\()_x86ish_aesni) #if CPUFAM_X86