FUNC(rijndael_setup_x86ish_aesni)
+#define SI WHOLE(si)
+#define DI WHOLE(di)
+
#if CPUFAM_X86
// Arguments are on the stack. We'll need to stack the caller's
// register veriables, but we'll manage.
# define CTX ebp // context pointer
# define BLKSZ [esp + 24] // block size
-# define SI esi // source pointer
-# define DI edi // destination pointer
-
# define KSZ ebx // key size
-# define KSZo ebx // ... as address offset
# define NKW edx // total number of key words
# define NKW_NEEDS_REFRESH 1 // ... needs recalculating
# define RCON ecx // round constants table
# define LIM edx // limit pointer
-# define LIMn edx // ... as integer offset from base
# define CYIX edi // index in shift-register cycle
# define NR ecx // number of rounds
# define LRK eax // distance to last key
-# define LRKo eax // ... as address offset
# define BLKOFF edx // block size in bytes
-# define BLKOFFo edx // ... as address offset
// Stack the caller's registers.
push ebp
# define CTX r8 // context pointer
# define BLKSZ r9d // block size
-# define SI rsi // source pointer
-# define DI rdi // destination pointer
-
# define KSZ edx // key size
-# define KSZo rdx // ... as address offset
# define NKW r10d // total number of key words
# define RCON rdi // round constants table
-# define LIMn ecx // limit pointer
-# define LIM rcx // ... as integer offset from base
+# define LIM rcx // limit pointer
# define CYIX r11d // index in shift-register cycle
# define NR ecx // number of rounds
# define LRK eax // distance to last key
-# define LRKo rax // ... as address offset
# define BLKOFF r9d // block size in bytes
-# define BLKOFFo r9 // ... as address offset
// Move arguments to more useful places.
mov CTX, rdi // context base pointer
# define CTX r8 // context pointer
# define BLKSZ edx // block size
-# define SI rsi // source pointer
-# define DI rdi // destination pointer
-
# define KSZ r9d // key size
-# define KSZo r9 // ... as address offset
# define NKW r10d // total number of key words
# define RCON rdi // round constants table
-# define LIMn ecx // limit pointer
-# define LIM rcx // ... as integer offset from base
+# define LIM rcx // limit pointer
# define CYIX r11d // index in shift-register cycle
# define NR ecx // number of rounds
# define LRK eax // distance to last key
-# define LRKo rax // ... as address offset
# define BLKOFF edx // block size in bytes
-# define BLKOFFo rdx // ... as address offset
// We'll need the index registers, which belong to the caller in this
// ABI.
.seh_endprologue
// Move arguments to more useful places.
- mov SI, r8 // key material
+ mov rsi, r8 // key material
mov CTX, rcx // context base pointer
#endif
#if CPUFAM_AMD64 && ABI_SYSV
// We've been lucky. We already have a copy of the context pointer
// in rdi, and the key size in ecx.
- add DI, w
+ add rdi, w
#else
lea DI, [CTX + w]
mov ecx, KSZ
#if !NKW_NEEDS_REFRESH
// If we can't keep NKW for later, then we use the same register for
// it and LIM, so this move is unnecessary.
- mov LIMn, NKW
+ mov DWORD(LIM), NKW
#endif
- sub LIMn, KSZ // offset by the key size
+ sub DWORD(LIM), KSZ // offset by the key size
// Find the round constants.
- ldgot ecx
- leaext RCON, F(rijndael_rcon), ecx
+ ldgot WHOLE(c)
+ leaext RCON, F(rijndael_rcon), WHOLE(c)
// Prepare for the main loop.
lea SI, [CTX + w]
- mov eax, [SI + 4*KSZo - 4] // most recent key word
+ mov eax, [SI + 4*WHOLE(KSZ) - 4] // most recent key word
lea LIM, [SI + 4*LIM] // limit, offset by one key expansion
xor CYIX, CYIX // start of new cycle
// Common tail. Mix in the corresponding word from the previous
// cycle and prepare for the next loop.
2: xor eax, [SI]
- mov [SI + 4*KSZo], eax
+ mov [SI + 4*WHOLE(KSZ)], eax
add SI, 4
inc CYIX
cmp SI, LIM
sub LRK, BLKSZ
#endif
lea DI, [CTX + wi]
- lea SI, [CTX + w + 4*LRKo] // last round's keys
+ lea SI, [CTX + w + 4*WHOLE(LRK)] // last round's keys
shl BLKOFF, 2 // block size (in bytes now)
// Copy the last encryption round's keys.
movdqu [DI + 16], xmm0
// Update the loop variables and stop if we've finished.
-0: add DI, BLKOFFo
- sub SI, BLKOFFo
+0: add DI, WHOLE(BLKOFF)
+ sub SI, WHOLE(BLKOFF)
sub NR, 1
jbe 9f
#undef SI
#undef DI
#undef KSZ
-#undef KSZo
#undef RCON
-#undef LIMn
#undef LIM
#undef NR
#undef LRK
-#undef LRKo
#undef BLKOFF
-#undef BLKOFFo
ENDFUNC