.globl F(rijndael_rcon)
///--------------------------------------------------------------------------
-/// Local utilities.
-
-// Magic constants for shuffling.
-#define ROTL 0x93
-#define ROT2 0x4e
-#define ROTR 0x39
-
-///--------------------------------------------------------------------------
/// Main code.
.arch .aes
// We'll need the index registers, which belong to the caller in this
// ABI.
push rsi
+ .seh_pushreg rsi
push rdi
+ .seh_pushreg rdi
+ .seh_endprologue
// Move arguments to more useful places.
mov SI, r8 // key material
// Fourth word of the cycle, and seven or eight words of key. Do a
// byte substitution.
movd xmm0, eax
- pshufd xmm0, xmm0, ROTL
+ pshufd xmm0, xmm0, SHUF(2, 1, 0, 3)
aeskeygenassist xmm1, xmm0, 0
movd eax, xmm1
jmp 2f
// First word of the cycle. This is the complicated piece.
1: movd xmm0, eax
- pshufd xmm0, xmm0, ROTR
+ pshufd xmm0, xmm0, SHUF(0, 3, 2, 1)
aeskeygenassist xmm1, xmm0, 0
- pshufd xmm1, xmm1, ROTL
+ pshufd xmm1, xmm1, SHUF(2, 1, 0, 3)
movd eax, xmm1
xor al, [RCON]
inc RCON
// Arguments come in on the stack, and need to be collected. We
// don't have a shortage of registers.
-# define K ecx
+# define K eax
# define SRC edx
# define DST edx
-# define NR eax
+# define NR ecx
mov K, [esp + 4]
mov SRC, [esp + 8]
# define SRC rdx
# define DST r8
# define NR eax
+ .seh_endprologue
#endif
// Find the magic endianness-swapping table.
movdqu xmm1, [K]
add K, 16
pxor xmm0, xmm1
+#if CPUFAM_X86
+ mov DST, [esp + 12]
+#endif
// Dispatch to the correct code.
cmp NR, 10
// Unpermute the ciphertext block and store it.
pshufb xmm0, xmm5
-#if CPUFAM_X86
- mov DST, [esp + 12]
-#endif
movdqu [DST], xmm0
// And we're done.
///--------------------------------------------------------------------------
/// Data tables.
+ RODATA
+
.align 16
endswap_tab:
.byte 3, 2, 1, 0