From aa80ad5e5a5363c53db3a63793cc10849411c1bf Mon Sep 17 00:00:00 2001
From: Mark Wooding <mdw@distorted.org.uk>
Date: Sat, 27 Oct 2018 10:43:24 +0100
Subject: [PATCH] math/mpx-mul4-*-sse2.S (squash): We don't care about the top
 half of c3 here.

The previous version of the comment erroneously claimed that the top
half of c3 held y_1; in fact it holds y_2, but we'll clobber it anyway
because the objective is to carry up into y_1, so mark it as
don't-care (like lo).
---
 math/mpx-mul4-amd64-sse2.S | 2 +-
 math/mpx-mul4-x86-sse2.S   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S
index d8f54e1f..84f9e3fe 100644
--- a/math/mpx-mul4-amd64-sse2.S
+++ b/math/mpx-mul4-amd64-sse2.S
@@ -260,7 +260,7 @@
 	// Finally extract the answer.  This complicated dance is better than
 	// storing to memory and loading, because the piecemeal stores
 	// inhibit store forwarding.
-	movdqa	\c3, \t			// (y_0, y_1)
+	movdqa	\c3, \t			// (y_0, ?)
 	movdqa	\lo, \t			// (y^*_0, ?, ?, ?)
 	psrldq	\t, 8			// (y_2, 0)
 	psrlq	\c3, 32			// (floor(y_0/B), ?)
diff --git a/math/mpx-mul4-x86-sse2.S b/math/mpx-mul4-x86-sse2.S
index cdc35967..ee741d21 100644
--- a/math/mpx-mul4-x86-sse2.S
+++ b/math/mpx-mul4-x86-sse2.S
@@ -260,7 +260,7 @@
 	// Finally extract the answer.  This complicated dance is better than
 	// storing to memory and loading, because the piecemeal stores
 	// inhibit store forwarding.
-	movdqa	\c3, \t			// (y_0, y_1)
+	movdqa	\c3, \t			// (y_0, ?)
 	movdqa	\lo, \t			// (y^*_0, ?, ?, ?)
 	psrldq	\t, 8			// (y_2, 0)
 	psrlq	\c3, 32			// (floor(y_0/B), ?)
-- 
2.11.0