.globl F(rijndael_rcon)
///--------------------------------------------------------------------------
-/// Local utilities.
-
-// Magic constants for shuffling.
-#define ROTL 0x93
-#define ROT2 0x4e
-#define ROTR 0x39
-
-///--------------------------------------------------------------------------
/// Main code.
.arch .aes
// We'll need the index registers, which belong to the caller in this
// ABI.
push rsi
+ .seh_pushreg rsi
push rdi
+ .seh_pushreg rdi
+ .seh_endprologue
// Move arguments to more useful places.
mov SI, r8 // key material
// Fourth word of the cycle, and seven or eight words of key. Do a
// byte substitution.
movd xmm0, eax
- pshufd xmm0, xmm0, ROTL
+ pshufd xmm0, xmm0, SHUF(2, 1, 0, 3)
aeskeygenassist xmm1, xmm0, 0
movd eax, xmm1
jmp 2f
// First word of the cycle. This is the complicated piece.
1: movd xmm0, eax
- pshufd xmm0, xmm0, ROTR
+ pshufd xmm0, xmm0, SHUF(0, 3, 2, 1)
aeskeygenassist xmm1, xmm0, 0
- pshufd xmm1, xmm1, ROTL
+ pshufd xmm1, xmm1, SHUF(2, 1, 0, 3)
movd eax, xmm1
xor al, [RCON]
inc RCON
.macro encdec op, aes, koff
FUNC(rijndael_\op\()_x86ish_aesni)
- // Find the magic endianness-swapping table.
- ldgot ecx
- movdqa xmm5, [INTADDR(endswap_tab, ecx)]
-
#if CPUFAM_X86
// Arguments come in on the stack, and need to be collected. We
// don't have a shortage of registers.
-# define K ecx
+# define K eax
# define SRC edx
# define DST edx
-# define NR eax
+# define NR ecx
mov K, [esp + 4]
mov SRC, [esp + 8]
# define SRC rdx
# define DST r8
# define NR eax
+ .seh_endprologue
#endif
+ // Find the magic endianness-swapping table.
+ ldgot ecx
+ movdqa xmm5, [INTADDR(endswap_tab, ecx)]
+
// Initial setup.
movdqu xmm0, [SRC]
pshufb xmm0, xmm5
movdqu xmm1, [K]
add K, 16
pxor xmm0, xmm1
+#if CPUFAM_X86
+ mov DST, [esp + 12]
+#endif
// Dispatch to the correct code.
cmp NR, 10
// Unpermute the ciphertext block and store it.
pshufb xmm0, xmm5
-#if CPUFAM_X86
- mov DST, [esp + 12]
-#endif
movdqu [DST], xmm0
// And we're done.
0: hlt
jmp 0b
- gotaux ecx
-
///--------------------------------------------------------------------------
/// Data tables.
+ RODATA
+
.align 16
endswap_tab:
.byte 3, 2, 1, 0