///--------------------------------------------------------------------------
/// Encrypting and decrypting blocks.
-FUNC(rijndael_eblk_x86_aesni)
+ .macro encdec op, aes, koff
+FUNC(rijndael_\op\()_x86_aesni)
// On entry, we have:
// [esp + 4] points to the context block
movdqu xmm0, [eax]
pshufb xmm0, xmm5
mov eax, [esp + 4]
- lea edx, [eax + w]
+ lea edx, [eax + \koff]
mov eax, [eax + nr]
// Initial whitening.
// Dispatch to the correct code.
cmp eax, 10
- je er10
+ je 10f
jb bogus
cmp eax, 14
- je er14
+ je 14f
ja bogus
cmp eax, 12
- je er12
- jb er11
- jmp er13
+ je 12f
+ jb 11f
+ jmp 13f
.align 2
// 14 rounds...
-er14: movdqu xmm1, [edx]
+14: movdqu xmm1, [edx]
add edx, 16
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 13 rounds...
-er13: movdqu xmm1, [edx]
+13: movdqu xmm1, [edx]
add edx, 16
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 12 rounds...
-er12: movdqu xmm1, [edx]
+12: movdqu xmm1, [edx]
add edx, 16
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 11 rounds...
-er11: movdqu xmm1, [edx]
+11: movdqu xmm1, [edx]
add edx, 16
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 10 rounds...
-er10: movdqu xmm1, [edx]
- aesenc xmm0, xmm1
+10: movdqu xmm1, [edx]
+ \aes xmm0, xmm1
// 9 rounds...
movdqu xmm1, [edx + 16]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 8 rounds...
movdqu xmm1, [edx + 32]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 7 rounds...
movdqu xmm1, [edx + 48]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 6 rounds...
movdqu xmm1, [edx + 64]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 5 rounds...
movdqu xmm1, [edx + 80]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 4 rounds...
movdqu xmm1, [edx + 96]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 3 rounds...
movdqu xmm1, [edx + 112]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// 2 rounds...
movdqu xmm1, [edx + 128]
- aesenc xmm0, xmm1
+ \aes xmm0, xmm1
// Final round...
movdqu xmm1, [edx + 144]
- aesenclast xmm0, xmm1
+ \aes\()last xmm0, xmm1
// Unpermute the ciphertext block and store it.
pshufb xmm0, xmm5
ret
ENDFUNC
+ .endm
-FUNC(rijndael_dblk_x86_aesni)
-
- // On entry, we have:
- // [esp + 4] points to the context block
- // [esp + 8] points to the input data block
- // [esp + 12] points to the output buffer
-
- // Find the magic endianness-swapping table.
- ldgot ecx
- movdqa xmm5, [INTADDR(endswap_tab, ecx)]
-
- // Load the input block and end-swap it. Also, start loading the
- // keys.
- mov eax, [esp + 8]
- movdqu xmm0, [eax]
- pshufb xmm0, xmm5
- mov eax, [esp + 4]
- lea edx, [eax + wi]
- mov eax, [eax + nr]
-
- // Initial whitening.
- movdqu xmm1, [edx]
- add edx, 16
- pxor xmm0, xmm1
-
- // Dispatch to the correct code.
- cmp eax, 10
- je dr10
- jb bogus
- cmp eax, 14
- je dr14
- ja bogus
- cmp eax, 12
- je dr12
- jb dr11
- jmp dr13
-
- .align 2
-
- // 14 rounds...
-dr14: movdqu xmm1, [edx]
- add edx, 16
- aesdec xmm0, xmm1
-
- // 13 rounds...
-dr13: movdqu xmm1, [edx]
- add edx, 16
- aesdec xmm0, xmm1
-
- // 12 rounds...
-dr12: movdqu xmm1, [edx]
- add edx, 16
- aesdec xmm0, xmm1
-
- // 11 rounds...
-dr11: movdqu xmm1, [edx]
- add edx, 16
- aesdec xmm0, xmm1
-
- // 10 rounds...
-dr10: movdqu xmm1, [edx]
- aesdec xmm0, xmm1
-
- // 9 rounds...
- movdqu xmm1, [edx + 16]
- aesdec xmm0, xmm1
-
- // 8 rounds...
- movdqu xmm1, [edx + 32]
- aesdec xmm0, xmm1
-
- // 7 rounds...
- movdqu xmm1, [edx + 48]
- aesdec xmm0, xmm1
-
- // 6 rounds...
- movdqu xmm1, [edx + 64]
- aesdec xmm0, xmm1
-
- // 5 rounds...
- movdqu xmm1, [edx + 80]
- aesdec xmm0, xmm1
-
- // 4 rounds...
- movdqu xmm1, [edx + 96]
- aesdec xmm0, xmm1
-
- // 3 rounds...
- movdqu xmm1, [edx + 112]
- aesdec xmm0, xmm1
-
- // 2 rounds...
- movdqu xmm1, [edx + 128]
- aesdec xmm0, xmm1
-
- // Final round...
- movdqu xmm1, [edx + 144]
- aesdeclast xmm0, xmm1
-
- // Unpermute the ciphertext block and store it.
- pshufb xmm0, xmm5
- mov eax, [esp + 12]
- movdqu [eax], xmm0
-
- // And we're done.
- ret
-
-ENDFUNC
+ encdec eblk, aesenc, w
+ encdec dblk, aesdec, wi
///--------------------------------------------------------------------------
/// Random utilities.