From: Mark Wooding Date: Thu, 7 Nov 2019 01:46:50 +0000 (+0000) Subject: math/mpx-mul4-*-sse2.S (mpxmont_redc4): Fix end-of-outer-loop commentary. X-Git-Tag: 2.6.0~15 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/6966e7a60a87415d3d02230608a98016c03a7a51 math/mpx-mul4-*-sse2.S (mpxmont_redc4): Fix end-of-outer-loop commentary. * The carry loop is wrong if the destination is an exact multiple of four limbs. Fortunately, it isn't. * The initial pass feeds into the main loop unconditionally, unlike `mpxmont_mul4_...' (from which I think the commentary was uncritically copied), so being at the end of it doesn't tell you anything about whether to start another. And, indeed, we do indeed check the loop-end condition. --- diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index a37aba69..17c4f1ad 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -1260,7 +1260,9 @@ FUNC(mpxmont_redc4_amd64_sse2) cmp rdi, DVL4 jb 0b - // Deal with the tail end. + // Deal with the tail end. Note that the actual destination length + // won't be an exacty number of blocks of four, so it's safe to just + // drop through here. 7: add [rdi], C mov C, 0 add rdi, 4 @@ -1268,9 +1270,7 @@ FUNC(mpxmont_redc4_amd64_sse2) cmp rdi, DVL jb 7b - // All done for this iteration. Start the next. (This must have at - // least one follow-on iteration, or we'd not have started this outer - // loop.) + // All done for this iteration. Start the next. 8: mov rdi, DV // -> Z = dv[i] mov rbx, NV // -> X = nv[0] cmp rdi, DVLO // all done yet? diff --git a/math/mpx-mul4-x86-sse2.S b/math/mpx-mul4-x86-sse2.S index 9e1d4782..7e7173fc 100644 --- a/math/mpx-mul4-x86-sse2.S +++ b/math/mpx-mul4-x86-sse2.S @@ -1062,7 +1062,9 @@ FUNC(mpxmont_redc4_x86_sse2) cmp edi, esi jb 0b - // Deal with the tail end. + // Deal with the tail end. Note that the actual destination length + // won't be an exact number of blocks of four, so it's safe to just + // drop through here. 7: add [edi], eax mov eax, 0 add edi, 4 @@ -1070,9 +1072,7 @@ FUNC(mpxmont_redc4_x86_sse2) cmp edi, edx jb 7b - // All done for this iteration. Start the next. (This must have at - // least one follow-on iteration, or we'd not have started this outer - // loop.) + // All done for this iteration. Start the next. 8: mov edi, [SP + 0] // -> dv[i - 1] mov ebx, [BP + 28] // -> X = nv[0] lea edx, [SP + 44] // -> space for Y