X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/609affae0305784d87f2357488fba35699a04098..981a9e5d5e3af6c06ad8b3f821928852068227e4:/symm/salsa20-x86ish-sse2.S diff --git a/symm/salsa20-x86ish-sse2.S b/symm/salsa20-x86ish-sse2.S index 9cbaeff4..ccf912b6 100644 --- a/symm/salsa20-x86ish-sse2.S +++ b/symm/salsa20-x86ish-sse2.S @@ -25,16 +25,24 @@ /// MA 02111-1307, USA. ///-------------------------------------------------------------------------- -/// External definitions. +/// Preliminaries. #include "config.h" #include "asm-common.h" + .text + ///-------------------------------------------------------------------------- /// Main code. - .arch pentium4 - .text +FUNC(salsa20_core_x86ish_avx) + .arch .avx + vzeroupper + endprologue + // drop through... +ENDFUNC + + .arch pentium4 FUNC(salsa20_core_x86ish_sse2) @@ -57,16 +65,16 @@ FUNC(salsa20_core_x86ish_sse2) # define OUT edx # define SAVE0 xmm6 # define SAVE1 xmm7 -# define SAVE2 [esp + 0] -# define SAVE3 [esp + 16] - - pushreg ebp - setfp ebp - sub esp, 32 - mov IN, [ebp + 12] - mov OUT, [ebp + 16] - and esp, ~15 - mov NR, [ebp + 8] +# define SAVE2 [SP + 0] +# define SAVE3 [SP + 16] + + pushreg BP + setfp + stalloc 32 + mov IN, [BP + 12] + mov OUT, [BP + 16] + and SP, ~15 + mov NR, [BP + 8] #endif #if CPUFAM_AMD64 && ABI_SYSV @@ -99,8 +107,8 @@ FUNC(salsa20_core_x86ish_sse2) # define OUT r8 # define SAVE0 xmm6 # define SAVE1 xmm7 -# define SAVE2 [rsp + 32] -# define SAVE3 [rsp + 48] +# define SAVE2 [SP + 32] +# define SAVE3 [SP + 48] stalloc 64 + 8 savexmm xmm6, 0 @@ -293,11 +301,11 @@ FUNC(salsa20_core_x86ish_sse2) // Tidy things up. #if CPUFAM_X86 dropfp - popreg ebp + popreg BP #endif #if CPUFAM_AMD64 && ABI_WIN rstrxmm xmm6, 0 - rsrrxmm xmm7, 16 + rstrxmm xmm7, 16 stfree 64 + 8 #endif