+ mov NR, [ebp + 8]
+#endif
+
+#if CPUFAM_AMD64 && ABI_SYSV
+ // This is nice. We have plenty of XMM registers, and the arguments
+ // are in useful places. There's no need to spill anything and we
+ // can just get on with the code.
+
+# define NR edi
+# define IN rsi
+# define OUT rdx
+# define SAVE0 xmm5
+# define SAVE1 xmm6
+# define SAVE2 xmm7
+# define SAVE3 xmm8
+#endif
+
+#if CPUFAM_AMD64 && ABI_WIN
+ // Arguments come in registers, but they're different between Windows
+ // and everyone else (and everyone else is saner).
+ //
+ // The Windows ABI insists that we preserve some of the XMM
+ // registers, but we want more than we can use as scratch space. We
+ // only need to save a copy of the input for the feedforward at the
+ // end, so we might as well use memory rather than spill extra
+ // registers. (We need an extra 8 bytes to align the stack.)
+
+# define NR ecx
+# define IN rdx
+# define OUT r8
+# define SAVE0 xmm5
+# define SAVE1 [rsp + 0]
+# define SAVE2 [rsp + 16]
+# define SAVE3 [rsp + 32]
+
+ sub rsp, 48 + 8
+#endif