From f79c8756d054b2979f79d8277affb988c4f39c49 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 7 Nov 2019 01:51:37 +0000 Subject: [PATCH] math/mpx-mul4-amd64-sse2.S: Save a spill by better register allocation. The Windows code doesn't need to spill r12, because we don't need the `mi' register after we've loaded and expanded the Montgomery factor. This doesn't save any stack space because we need 16-byte alignment, but it does avoid saving and restoring the register. --- math/mpx-mul4-amd64-sse2.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index 17c4f1ad..da3e6d61 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -1155,7 +1155,7 @@ FUNC(mpxmont_redc4_amd64_sse2) // outer loop dv r10 rcx // outer loop dv limit r11 r11 // nv base rdx r8 - // nv limit r9 r12* + // nv limit r9 r10* // n rcx r9 // c rcx r9 @@ -1183,14 +1183,13 @@ FUNC(mpxmont_redc4_amd64_sse2) # define DV rcx # define DVLO r11 # define NV r8 -# define NVL r12 +# define NVL r10 # define N r9 # define C r9d pushreg rbx pushreg rdi - pushreg r12 - stalloc 160 + stalloc 168 savexmm xmm6, 0 savexmm xmm7, 16 @@ -1300,8 +1299,7 @@ FUNC(mpxmont_redc4_amd64_sse2) rstrxmm xmm14, 128 rstrxmm xmm15, 144 - stfree 160 - popreg r12 + stfree 168 popreg rdi popreg rbx #endif -- 2.11.0