X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/6ecc0b8facfd2f1f13abc03b0f2013112af3430b..6a024d24d97cb5d42c0091571735475b849f59f4:/math/mpx-mul4-x86-sse2.S diff --git a/math/mpx-mul4-x86-sse2.S b/math/mpx-mul4-x86-sse2.S index 14052fd0..f6c81673 100644 --- a/math/mpx-mul4-x86-sse2.S +++ b/math/mpx-mul4-x86-sse2.S @@ -1068,7 +1068,7 @@ ENDFUNC mov [ebx + ecx*8 + 4], edx .endm -.macro testprologue +.macro testprologue n pushreg ebp pushreg ebx pushreg esi @@ -1077,11 +1077,14 @@ ENDFUNC and esp, ~15 sub esp, 3*32 + 4*4 endprologue + mov eax, \n + mov [esp + 104], eax // vars: // esp + 0 = v expanded // esp + 32 = y expanded // esp + 64 = ? expanded // esp + 96 = cycles + // esp + 104 = count .endm .macro testepilogue @@ -1139,8 +1142,8 @@ ENDFUNC .endif .endm -.macro testtail cyv, n - cystore esp + 96, \cyv, \n +.macro testtail cyv + cystore esp + 96, \cyv, esp + 104 jnz 0b .endm @@ -1152,60 +1155,60 @@ ENDFUNC .endm FUNC(test_dmul4) - testprologue + testprologue [ebp + 44] testldcarry [ebp + 24] testexpand [ebp + 36], [ebp + 40] mov edi, [ebp + 20] testtop [ebp + 28], [ebp + 32] call dmul4 - testtail [ebp + 48], [ebp + 44] + testtail [ebp + 48] testcarryout [ebp + 24] testepilogue ENDFUNC FUNC(test_dmla4) - testprologue + testprologue [ebp + 44] testldcarry [ebp + 24] testexpand [ebp + 36], [ebp + 40] mov edi, [ebp + 20] testtop [ebp + 28], [ebp + 32] call dmla4 - testtail [ebp + 48], [ebp + 44] + testtail [ebp + 48] testcarryout [ebp + 24] testepilogue ENDFUNC FUNC(test_mul4) - testprologue + testprologue [ebp + 36] testldcarry [ebp + 24] testexpand nil, [ebp + 32] mov edi, [ebp + 20] testtop nil, [ebp + 28] call mul4 - testtail [ebp + 40], [ebp + 36] + testtail [ebp + 40] testcarryout [ebp + 24] testepilogue ENDFUNC FUNC(test_mla4) - testprologue + testprologue [ebp + 36] testldcarry [ebp + 24] testexpand nil, [ebp + 32] mov edi, [ebp + 20] testtop nil, [ebp + 28] call mla4 - testtail [ebp + 40], [ebp + 36] + testtail [ebp + 40] testcarryout [ebp + 24] testepilogue ENDFUNC FUNC(test_mmul4) - testprologue + testprologue [ebp + 48] testexpand [ebp + 40], [ebp + 44] mov edi, [ebp + 20] testtop [ebp + 32], [ebp + 36], mont call mmul4 - testtail [ebp + 52], [ebp + 48] + testtail [ebp + 52] mov edi, [ebp + 28] movdqa xmm0, [esp + 64] movdqa xmm1, [esp + 80] @@ -1216,12 +1219,12 @@ FUNC(test_mmul4) ENDFUNC FUNC(test_mmla4) - testprologue + testprologue [ebp + 48] testexpand [ebp + 40], [ebp + 44] mov edi, [ebp + 20] testtop [ebp + 32], [ebp + 36], mont call mmla4 - testtail [ebp + 52], [ebp + 48] + testtail [ebp + 52] mov edi, [ebp + 28] movdqa xmm0, [esp + 64] movdqa xmm1, [esp + 80] @@ -1232,12 +1235,12 @@ FUNC(test_mmla4) ENDFUNC FUNC(test_mont4) - testprologue + testprologue [ebp + 40] testexpand nil, [ebp + 36] mov edi, [ebp + 20] testtop nil, [ebp + 32], mont call mont4 - testtail [ebp + 44], [ebp + 40] + testtail [ebp + 44] mov edi, [ebp + 28] movdqa xmm0, [esp + 64] movdqa xmm1, [esp + 80]