From: Mark Wooding Date: Sat, 27 Oct 2018 09:43:24 +0000 (+0100) Subject: math/mpx-mul4-*-sse2.S (squash): We don't care about the top half of c3 here. X-Git-Tag: 2.5.0~34 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/aa80ad5e5a5363c53db3a63793cc10849411c1bf math/mpx-mul4-*-sse2.S (squash): We don't care about the top half of c3 here. The previous version of the comment erroneously claimed that the top half of c3 held y_1; in fact it holds y_2, but we'll clobber it anyway because the objective is to carry up into y_1, so mark it as don't-care (like lo). --- diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index d8f54e1f..84f9e3fe 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -260,7 +260,7 @@ // Finally extract the answer. This complicated dance is better than // storing to memory and loading, because the piecemeal stores // inhibit store forwarding. - movdqa \c3, \t // (y_0, y_1) + movdqa \c3, \t // (y_0, ?) movdqa \lo, \t // (y^*_0, ?, ?, ?) psrldq \t, 8 // (y_2, 0) psrlq \c3, 32 // (floor(y_0/B), ?) diff --git a/math/mpx-mul4-x86-sse2.S b/math/mpx-mul4-x86-sse2.S index cdc35967..ee741d21 100644 --- a/math/mpx-mul4-x86-sse2.S +++ b/math/mpx-mul4-x86-sse2.S @@ -260,7 +260,7 @@ // Finally extract the answer. This complicated dance is better than // storing to memory and loading, because the piecemeal stores // inhibit store forwarding. - movdqa \c3, \t // (y_0, y_1) + movdqa \c3, \t // (y_0, ?) movdqa \lo, \t // (y^*_0, ?, ?, ?) psrldq \t, 8 // (y_2, 0) psrlq \c3, 32 // (floor(y_0/B), ?)