testtop r11
call mmul4
testtail
+ pshufd xmm10, xmm10, SHUF(0, 2, 1, 3)
+ pshufd xmm11, xmm11, SHUF(0, 2, 1, 3)
movdqu [r10 + 0], xmm10
movdqu [r10 + 16], xmm11
testcarryout
testtop r11
call mmla4
testtail
+ pshufd xmm10, xmm10, SHUF(0, 2, 1, 3)
+ pshufd xmm11, xmm11, SHUF(0, 2, 1, 3)
movdqu [r10 + 0], xmm10
movdqu [r10 + 16], xmm11
testcarryout
testtop
call mont4
testtail
+ pshufd xmm10, xmm10, SHUF(0, 2, 1, 3)
+ pshufd xmm11, xmm11, SHUF(0, 2, 1, 3)
movdqu [r10 + 0], xmm10
movdqu [r10 + 16], xmm11
testcarryout