// u v = SUM_{0<=i,j<n} u_i v_j t^{i+j}
//
// Suppose instead that we're given ũ = SUM_{0<=i<n} u_{n-i-1} t^i
- // and ṽ = SUM_{0<=j<n} v_{n-j-1} t^j, so the bits are backwards.
+ // and ṽ = SUM_{0<=j<n} v_{n-j-1} t^j, so the bits are backwards.
// Then
//
- // ũ ṽ = SUM_{0<=i,j<n} u_{n-i-1} v_{n-j-1} t^{i+j}
+ // ũ ṽ = SUM_{0<=i,j<n} u_{n-i-1} v_{n-j-1} t^{i+j}
// = SUM_{0<=i,j<n} u_i v_j t^{2n-2-(i+j)}
//
// which is almost the bit-reversal of u v, only it's shifted right
setfp
mov A, [SP + 8]
mov K, [SP + 12]
+ stalloc 16
and SP, ~15
- sub SP, 16
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8
setfp
mov A, [SP + 8]
mov K, [SP + 12]
- and SP, ~15
+ stalloc 16
ldgot ecx
- sub SP, 16
+ and SP, ~15
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8