From: Mark Wooding Date: Thu, 7 Nov 2019 01:51:37 +0000 (+0000) Subject: math/mpx-mul4-amd64-sse2.S: Save a spill by better register allocation. X-Git-Tag: 2.6.0~14 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/f79c8756d054b2979f79d8277affb988c4f39c49 math/mpx-mul4-amd64-sse2.S: Save a spill by better register allocation. The Windows code doesn't need to spill r12, because we don't need the `mi' register after we've loaded and expanded the Montgomery factor. This doesn't save any stack space because we need 16-byte alignment, but it does avoid saving and restoring the register. --- diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index 17c4f1ad..da3e6d61 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -1155,7 +1155,7 @@ FUNC(mpxmont_redc4_amd64_sse2) // outer loop dv r10 rcx // outer loop dv limit r11 r11 // nv base rdx r8 - // nv limit r9 r12* + // nv limit r9 r10* // n rcx r9 // c rcx r9 @@ -1183,14 +1183,13 @@ FUNC(mpxmont_redc4_amd64_sse2) # define DV rcx # define DVLO r11 # define NV r8 -# define NVL r12 +# define NVL r10 # define N r9 # define C r9d pushreg rbx pushreg rdi - pushreg r12 - stalloc 160 + stalloc 168 savexmm xmm6, 0 savexmm xmm7, 16 @@ -1300,8 +1299,7 @@ FUNC(mpxmont_redc4_amd64_sse2) rstrxmm xmm14, 128 rstrxmm xmm15, 144 - stfree 160 - popreg r12 + stfree 168 popreg rdi popreg rbx #endif