From e297526c6cfe427a9d70204966745651eac50fdb Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Wed, 18 May 2016 10:29:03 +0100 Subject: [PATCH] symm/rijndael-x86-aseni.S: Unify encryption and decryption with a macro. --- symm/rijndael-x86-aesni.S | 164 +++++++++------------------------------------- 1 file changed, 30 insertions(+), 134 deletions(-) diff --git a/symm/rijndael-x86-aesni.S b/symm/rijndael-x86-aesni.S index 13c1ad56..c0cd437a 100644 --- a/symm/rijndael-x86-aesni.S +++ b/symm/rijndael-x86-aesni.S @@ -309,7 +309,8 @@ ENDFUNC ///-------------------------------------------------------------------------- /// Encrypting and decrypting blocks. -FUNC(rijndael_eblk_x86_aesni) + .macro encdec op, aes, koff +FUNC(rijndael_\op\()_x86_aesni) // On entry, we have: // [esp + 4] points to the context block @@ -326,7 +327,7 @@ FUNC(rijndael_eblk_x86_aesni) movdqu xmm0, [eax] pshufb xmm0, xmm5 mov eax, [esp + 4] - lea edx, [eax + w] + lea edx, [eax + \koff] mov eax, [eax + nr] // Initial whitening. @@ -336,77 +337,77 @@ FUNC(rijndael_eblk_x86_aesni) // Dispatch to the correct code. cmp eax, 10 - je er10 + je 10f jb bogus cmp eax, 14 - je er14 + je 14f ja bogus cmp eax, 12 - je er12 - jb er11 - jmp er13 + je 12f + jb 11f + jmp 13f .align 2 // 14 rounds... -er14: movdqu xmm1, [edx] +14: movdqu xmm1, [edx] add edx, 16 - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 13 rounds... -er13: movdqu xmm1, [edx] +13: movdqu xmm1, [edx] add edx, 16 - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 12 rounds... -er12: movdqu xmm1, [edx] +12: movdqu xmm1, [edx] add edx, 16 - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 11 rounds... -er11: movdqu xmm1, [edx] +11: movdqu xmm1, [edx] add edx, 16 - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 10 rounds... -er10: movdqu xmm1, [edx] - aesenc xmm0, xmm1 +10: movdqu xmm1, [edx] + \aes xmm0, xmm1 // 9 rounds... movdqu xmm1, [edx + 16] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 8 rounds... movdqu xmm1, [edx + 32] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 7 rounds... movdqu xmm1, [edx + 48] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 6 rounds... movdqu xmm1, [edx + 64] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 5 rounds... movdqu xmm1, [edx + 80] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 4 rounds... movdqu xmm1, [edx + 96] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 3 rounds... movdqu xmm1, [edx + 112] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // 2 rounds... movdqu xmm1, [edx + 128] - aesenc xmm0, xmm1 + \aes xmm0, xmm1 // Final round... movdqu xmm1, [edx + 144] - aesenclast xmm0, xmm1 + \aes\()last xmm0, xmm1 // Unpermute the ciphertext block and store it. pshufb xmm0, xmm5 @@ -417,115 +418,10 @@ er10: movdqu xmm1, [edx] ret ENDFUNC + .endm -FUNC(rijndael_dblk_x86_aesni) - - // On entry, we have: - // [esp + 4] points to the context block - // [esp + 8] points to the input data block - // [esp + 12] points to the output buffer - - // Find the magic endianness-swapping table. - ldgot ecx - movdqa xmm5, [INTADDR(endswap_tab, ecx)] - - // Load the input block and end-swap it. Also, start loading the - // keys. - mov eax, [esp + 8] - movdqu xmm0, [eax] - pshufb xmm0, xmm5 - mov eax, [esp + 4] - lea edx, [eax + wi] - mov eax, [eax + nr] - - // Initial whitening. - movdqu xmm1, [edx] - add edx, 16 - pxor xmm0, xmm1 - - // Dispatch to the correct code. - cmp eax, 10 - je dr10 - jb bogus - cmp eax, 14 - je dr14 - ja bogus - cmp eax, 12 - je dr12 - jb dr11 - jmp dr13 - - .align 2 - - // 14 rounds... -dr14: movdqu xmm1, [edx] - add edx, 16 - aesdec xmm0, xmm1 - - // 13 rounds... -dr13: movdqu xmm1, [edx] - add edx, 16 - aesdec xmm0, xmm1 - - // 12 rounds... -dr12: movdqu xmm1, [edx] - add edx, 16 - aesdec xmm0, xmm1 - - // 11 rounds... -dr11: movdqu xmm1, [edx] - add edx, 16 - aesdec xmm0, xmm1 - - // 10 rounds... -dr10: movdqu xmm1, [edx] - aesdec xmm0, xmm1 - - // 9 rounds... - movdqu xmm1, [edx + 16] - aesdec xmm0, xmm1 - - // 8 rounds... - movdqu xmm1, [edx + 32] - aesdec xmm0, xmm1 - - // 7 rounds... - movdqu xmm1, [edx + 48] - aesdec xmm0, xmm1 - - // 6 rounds... - movdqu xmm1, [edx + 64] - aesdec xmm0, xmm1 - - // 5 rounds... - movdqu xmm1, [edx + 80] - aesdec xmm0, xmm1 - - // 4 rounds... - movdqu xmm1, [edx + 96] - aesdec xmm0, xmm1 - - // 3 rounds... - movdqu xmm1, [edx + 112] - aesdec xmm0, xmm1 - - // 2 rounds... - movdqu xmm1, [edx + 128] - aesdec xmm0, xmm1 - - // Final round... - movdqu xmm1, [edx + 144] - aesdeclast xmm0, xmm1 - - // Unpermute the ciphertext block and store it. - pshufb xmm0, xmm5 - mov eax, [esp + 12] - movdqu [eax], xmm0 - - // And we're done. - ret - -ENDFUNC + encdec eblk, aesenc, w + encdec dblk, aesdec, wi ///-------------------------------------------------------------------------- /// Random utilities. -- 2.11.0