X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/1a517bb3785891ff6940c73af7c5a136d0250ebf..0923a413958b0e778a3f059c76355ab58e5be414:/symm/salsa20-x86ish-sse2.S diff --git a/symm/salsa20-x86ish-sse2.S b/symm/salsa20-x86ish-sse2.S index a05cb4e4..ca677f17 100644 --- a/symm/salsa20-x86ish-sse2.S +++ b/symm/salsa20-x86ish-sse2.S @@ -60,8 +60,8 @@ FUNC(salsa20_core_x86ish_sse2) # define SAVE2 [esp + 0] # define SAVE3 [esp + 16] - push ebp - mov ebp, esp + pushreg ebp + setfp ebp sub esp, 32 mov IN, [ebp + 12] mov OUT, [ebp + 16] @@ -102,15 +102,13 @@ FUNC(salsa20_core_x86ish_sse2) # define SAVE2 [rsp + 32] # define SAVE3 [rsp + 48] - sub rsp, 64 + 8 - .seh_stackalloc 64 + 8 - movdqa [rsp + 0], xmm6 - .seh_savexmm xmm6, 0 - movdqa [rsp + 16], xmm7 - .seh_savexmm xmm7, 16 - .seh_endprologue + stalloc 64 + 8 + savexmm xmm6, 0 + savexmm xmm7, 16 #endif + endprologue + // First job is to slurp the matrix into XMM registers. The words // have already been permuted conveniently to make them line up // better for SIMD processing. @@ -294,13 +292,13 @@ FUNC(salsa20_core_x86ish_sse2) // Tidy things up. #if CPUFAM_X86 - mov esp, ebp - pop ebp + dropfp + popreg ebp #endif #if CPUFAM_AMD64 && ABI_WIN - movdqa xmm6, [rsp + 0] - movdqa xmm7, [rsp + 16] - add rsp, 64 + 8 + rstrxmm xmm6, 0 + rsrrxmm xmm7, 16 + stfree 64 + 8 #endif // And with that, we're done.