Also, prefer aligning afterwards, except where that won't work.
pushreg esi
pushreg edi
setfp
pushreg esi
pushreg edi
setfp
endprologue
// Prepare for the first iteration.
endprologue
// Prepare for the first iteration.
pushreg esi
pushreg edi
setfp
pushreg esi
pushreg edi
setfp
endprologue
// Establish the expanded operands.
endprologue
// Establish the expanded operands.
pushreg edi
setfp
and SP, ~15
pushreg edi
setfp
and SP, ~15
endprologue
// Establish the expanded operands and the blocks-of-4 dv limit.
endprologue
// Establish the expanded operands and the blocks-of-4 dv limit.
pushreg esi
pushreg edi
setfp
pushreg esi
pushreg edi
setfp
endprologue
mov eax, \n
mov [SP + 104], eax
endprologue
mov eax, \n
mov [SP + 104], eax
mov IN, [BP + 12]
mov OUT, [BP + 16]
and SP, ~15
mov IN, [BP + 12]
mov OUT, [BP + 16]
and SP, ~15
setfp
mov A, [SP + 8]
mov K, [SP + 12]
setfp
mov A, [SP + 8]
mov K, [SP + 12]
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8
setfp
mov A, [SP + 8]
mov K, [SP + 12]
setfp
mov A, [SP + 8]
mov K, [SP + 12]
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8
#endif
#if CPUFAM_AMD64 && ABI_WIN
stalloc 3*16 + 8
mov IN, [BP + 12]
mov OUT, [BP + 16]
and SP, ~15
mov IN, [BP + 12]
mov OUT, [BP + 16]
and SP, ~15