From: Mark Wooding Date: Wed, 18 May 2016 09:29:03 +0000 (+0100) Subject: symm/rijndael-x86-aesni.S: Use xmm5 instead of xmm7. X-Git-Tag: 2.2.3~1^2~22^2~4 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/8d6ca5549ea58b040ba25dacafeb45a309de57dc symm/rijndael-x86-aesni.S: Use xmm5 instead of xmm7. The only reason is that (stupidly) the Windows 64-bit ABI designates (the bottom 128 bits of) xmm7 as being callee-saved. --- diff --git a/symm/rijndael-x86-aesni.S b/symm/rijndael-x86-aesni.S index eba7b058..13c1ad56 100644 --- a/symm/rijndael-x86-aesni.S +++ b/symm/rijndael-x86-aesni.S @@ -267,7 +267,7 @@ FUNC(rijndael_setup_x86_aesni) // Find the byte-reordering table. ldgot ecx - movdqa xmm7, [INTADDR(endswap_tab, ecx)] + movdqa xmm5, [INTADDR(endswap_tab, ecx)] // Calculate the number of subkey words again. (It's a good job // we've got a fast multiplier.) @@ -295,9 +295,9 @@ FUNC(rijndael_setup_x86_aesni) .align 16 endswap_block: // End-swap ECX words starting at ESI. The end-swapping table is - // already loaded into XMM7; and it's OK to work in 16-byte chunks. + // already loaded into XMM5; and it's OK to work in 16-byte chunks. movdqu xmm1, [esi] - pshufb xmm1, xmm7 + pshufb xmm1, xmm5 movdqu [esi], xmm1 add esi, 16 sub ecx, 4 @@ -318,13 +318,13 @@ FUNC(rijndael_eblk_x86_aesni) // Find the magic endianness-swapping table. ldgot ecx - movdqa xmm7, [INTADDR(endswap_tab, ecx)] + movdqa xmm5, [INTADDR(endswap_tab, ecx)] // Load the input block and end-swap it. Also, start loading the // keys. mov eax, [esp + 8] movdqu xmm0, [eax] - pshufb xmm0, xmm7 + pshufb xmm0, xmm5 mov eax, [esp + 4] lea edx, [eax + w] mov eax, [eax + nr] @@ -409,7 +409,7 @@ er10: movdqu xmm1, [edx] aesenclast xmm0, xmm1 // Unpermute the ciphertext block and store it. - pshufb xmm0, xmm7 + pshufb xmm0, xmm5 mov eax, [esp + 12] movdqu [eax], xmm0 @@ -427,13 +427,13 @@ FUNC(rijndael_dblk_x86_aesni) // Find the magic endianness-swapping table. ldgot ecx - movdqa xmm7, [INTADDR(endswap_tab, ecx)] + movdqa xmm5, [INTADDR(endswap_tab, ecx)] // Load the input block and end-swap it. Also, start loading the // keys. mov eax, [esp + 8] movdqu xmm0, [eax] - pshufb xmm0, xmm7 + pshufb xmm0, xmm5 mov eax, [esp + 4] lea edx, [eax + wi] mov eax, [eax + nr] @@ -518,7 +518,7 @@ dr10: movdqu xmm1, [edx] aesdeclast xmm0, xmm1 // Unpermute the ciphertext block and store it. - pshufb xmm0, xmm7 + pshufb xmm0, xmm5 mov eax, [esp + 12] movdqu [eax], xmm0