From 59aa39f8a0065d8bc3b2a353706f41f0dd20be12 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Thu, 7 Nov 2019 01:54:57 +0000 Subject: [PATCH] math/mpx-mul4-amd64-sse2.S: Improve the end-of-loop condition testing. Previously, I waited until `rdi' was set up for the next iteration before comparing it against the limit. But in fact, `DV' already has the right value, so we can compare earlier. --- math/mpx-mul4-amd64-sse2.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index da3e6d61..1c205a73 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -1270,10 +1270,10 @@ FUNC(mpxmont_redc4_amd64_sse2) jb 7b // All done for this iteration. Start the next. -8: mov rdi, DV // -> Z = dv[i] - mov rbx, NV // -> X = nv[0] - cmp rdi, DVLO // all done yet? + cmp DV, DVLO // all done yet? jae 9f + mov rdi, DV // -> Z = dv[i] + mov rbx, NV // -> X = nv[0] add DV, 16 call mont4 add rdi, 16 -- 2.11.0