X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/92edc356a312bc64abca0c30bc03d4b6676f3d39..6966e7a60a87415d3d02230608a98016c03a7a51:/math/mpx-mul4-amd64-sse2.S?ds=sidebyside diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index a37aba69..17c4f1ad 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -1260,7 +1260,9 @@ FUNC(mpxmont_redc4_amd64_sse2) cmp rdi, DVL4 jb 0b - // Deal with the tail end. + // Deal with the tail end. Note that the actual destination length + // won't be an exacty number of blocks of four, so it's safe to just + // drop through here. 7: add [rdi], C mov C, 0 add rdi, 4 @@ -1268,9 +1270,7 @@ FUNC(mpxmont_redc4_amd64_sse2) cmp rdi, DVL jb 7b - // All done for this iteration. Start the next. (This must have at - // least one follow-on iteration, or we'd not have started this outer - // loop.) + // All done for this iteration. Start the next. 8: mov rdi, DV // -> Z = dv[i] mov rbx, NV // -> X = nv[0] cmp rdi, DVLO // all done yet?