X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/898f32b33bea7f1e26c8ba906facdd5038edcd57..43ea7558955ca453d4687c62d12efcb585124e8a:/symm/rijndael-x86ish-aesni.S diff --git a/symm/rijndael-x86ish-aesni.S b/symm/rijndael-x86ish-aesni.S index 3cca50f7..8090bca6 100644 --- a/symm/rijndael-x86ish-aesni.S +++ b/symm/rijndael-x86ish-aesni.S @@ -63,6 +63,9 @@ FUNC(rijndael_setup_x86ish_aesni) +#define SI WHOLE(si) +#define DI WHOLE(di) + #if CPUFAM_X86 // Arguments are on the stack. We'll need to stack the caller's // register veriables, but we'll manage. @@ -70,23 +73,16 @@ FUNC(rijndael_setup_x86ish_aesni) # define CTX ebp // context pointer # define BLKSZ [esp + 24] // block size -# define SI esi // source pointer -# define DI edi // destination pointer - # define KSZ ebx // key size -# define KSZo ebx // ... as address offset # define NKW edx // total number of key words # define NKW_NEEDS_REFRESH 1 // ... needs recalculating # define RCON ecx // round constants table # define LIM edx // limit pointer -# define LIMn edx // ... as integer offset from base # define CYIX edi // index in shift-register cycle # define NR ecx // number of rounds # define LRK eax // distance to last key -# define LRKo eax // ... as address offset # define BLKOFF edx // block size in bytes -# define BLKOFFo edx // ... as address offset // Stack the caller's registers. push ebp @@ -107,22 +103,15 @@ FUNC(rijndael_setup_x86ish_aesni) # define CTX r8 // context pointer # define BLKSZ r9d // block size -# define SI rsi // source pointer -# define DI rdi // destination pointer - # define KSZ edx // key size -# define KSZo rdx // ... as address offset # define NKW r10d // total number of key words # define RCON rdi // round constants table -# define LIMn ecx // limit pointer -# define LIM rcx // ... as integer offset from base +# define LIM rcx // limit pointer # define CYIX r11d // index in shift-register cycle # define NR ecx // number of rounds # define LRK eax // distance to last key -# define LRKo rax // ... as address offset # define BLKOFF r9d // block size in bytes -# define BLKOFFo r9 // ... as address offset // Move arguments to more useful places. mov CTX, rdi // context base pointer @@ -137,22 +126,15 @@ FUNC(rijndael_setup_x86ish_aesni) # define CTX r8 // context pointer # define BLKSZ edx // block size -# define SI rsi // source pointer -# define DI rdi // destination pointer - # define KSZ r9d // key size -# define KSZo r9 // ... as address offset # define NKW r10d // total number of key words # define RCON rdi // round constants table -# define LIMn ecx // limit pointer -# define LIM rcx // ... as integer offset from base +# define LIM rcx // limit pointer # define CYIX r11d // index in shift-register cycle # define NR ecx // number of rounds # define LRK eax // distance to last key -# define LRKo rax // ... as address offset # define BLKOFF edx // block size in bytes -# define BLKOFFo rdx // ... as address offset // We'll need the index registers, which belong to the caller in this // ABI. @@ -163,7 +145,7 @@ FUNC(rijndael_setup_x86ish_aesni) .seh_endprologue // Move arguments to more useful places. - mov SI, r8 // key material + mov rsi, r8 // key material mov CTX, rcx // context base pointer #endif @@ -172,7 +154,7 @@ FUNC(rijndael_setup_x86ish_aesni) #if CPUFAM_AMD64 && ABI_SYSV // We've been lucky. We already have a copy of the context pointer // in rdi, and the key size in ecx. - add DI, w + add rdi, w #else lea DI, [CTX + w] mov ecx, KSZ @@ -186,17 +168,17 @@ FUNC(rijndael_setup_x86ish_aesni) #if !NKW_NEEDS_REFRESH // If we can't keep NKW for later, then we use the same register for // it and LIM, so this move is unnecessary. - mov LIMn, NKW + mov DWORD(LIM), NKW #endif - sub LIMn, KSZ // offset by the key size + sub DWORD(LIM), KSZ // offset by the key size // Find the round constants. - ldgot ecx - leaext RCON, F(rijndael_rcon), ecx + ldgot WHOLE(c) + leaext RCON, F(rijndael_rcon), WHOLE(c) // Prepare for the main loop. lea SI, [CTX + w] - mov eax, [SI + 4*KSZo - 4] // most recent key word + mov eax, [SI + 4*WHOLE(KSZ) - 4] // most recent key word lea LIM, [SI + 4*LIM] // limit, offset by one key expansion xor CYIX, CYIX // start of new cycle @@ -241,7 +223,7 @@ FUNC(rijndael_setup_x86ish_aesni) // Common tail. Mix in the corresponding word from the previous // cycle and prepare for the next loop. 2: xor eax, [SI] - mov [SI + 4*KSZo], eax + mov [SI + 4*WHOLE(KSZ)], eax add SI, 4 inc CYIX cmp SI, LIM @@ -276,7 +258,7 @@ FUNC(rijndael_setup_x86ish_aesni) sub LRK, BLKSZ #endif lea DI, [CTX + wi] - lea SI, [CTX + w + 4*LRKo] // last round's keys + lea SI, [CTX + w + 4*WHOLE(LRK)] // last round's keys shl BLKOFF, 2 // block size (in bytes now) // Copy the last encryption round's keys. @@ -288,8 +270,8 @@ FUNC(rijndael_setup_x86ish_aesni) movdqu [DI + 16], xmm0 // Update the loop variables and stop if we've finished. -0: add DI, BLKOFFo - sub SI, BLKOFFo +0: add DI, WHOLE(BLKOFF) + sub SI, WHOLE(BLKOFF) sub NR, 1 jbe 9f @@ -368,15 +350,11 @@ endswap_block: #undef SI #undef DI #undef KSZ -#undef KSZo #undef RCON -#undef LIMn #undef LIM #undef NR #undef LRK -#undef LRKo #undef BLKOFF -#undef BLKOFFo ENDFUNC