X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/b9b279b4105524d5d4e5dcd389141645d904aa0c..19d642aad1d7d7962c7b3a2a88a9d7fc67dfe306:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index a7a1ece3..6d9b3b22 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -25,20 +25,21 @@ /// MA 02111-1307, USA. ///-------------------------------------------------------------------------- -/// External definitions. +/// Preliminaries. #include "config.h" #include "asm-common.h" + .arch .aes + .extern F(abort) .extern F(rijndael_rcon) + .text + ///-------------------------------------------------------------------------- /// Main code. - .arch .aes - .text - /// The AESNI instructions implement a little-endian version of AES, but /// Catacomb's internal interface presents as big-endian so as to work better /// with things like GCM. We therefore maintain the round keys in @@ -211,16 +212,16 @@ FUNC(rijndael_setup_x86ish_aesni) // Fourth word of the cycle, and seven or eight words of key. Do a // byte substitution. movd xmm0, eax - pshufd xmm0, xmm0, SHUF(2, 1, 0, 3) + pshufd xmm0, xmm0, SHUF(3, 0, 1, 2) aeskeygenassist xmm1, xmm0, 0 movd eax, xmm1 jmp 2f // First word of the cycle. This is the complicated piece. 1: movd xmm0, eax - pshufd xmm0, xmm0, SHUF(0, 3, 2, 1) + pshufd xmm0, xmm0, SHUF(1, 2, 3, 0) aeskeygenassist xmm1, xmm0, 0 - pshufd xmm1, xmm1, SHUF(2, 1, 0, 3) + pshufd xmm1, xmm1, SHUF(3, 0, 1, 2) movd eax, xmm1 xor al, [RCON] inc RCON