///--------------------------------------------------------------------------
/// Bulk multipliers.
+FUNC(mpx_umul4_x86_avx)
+ .arch .avx
+ vzeroupper
+ endprologue
+ // and drop through...
+ .arch pentium4
+ENDFUNC
+
FUNC(mpx_umul4_x86_sse2)
// void mpx_umul4_x86_sse2(mpw *dv, const mpw *av, const mpw *avl,
// const mpw *bv, const mpw *bvl);
ENDFUNC
+FUNC(mpxmont_mul4_x86_avx)
+ .arch .avx
+ vzeroupper
+ endprologue
+ // and drop through...
+ .arch pentium4
+ENDFUNC
+
FUNC(mpxmont_mul4_x86_sse2)
// void mpxmont_mul4_x86_sse2(mpw *dv, const mpw *av, const mpw *bv,
// const mpw *nv, size_t n, const mpw *mi);
ENDFUNC
+FUNC(mpxmont_redc4_x86_avx)
+ .arch .avx
+ vzeroupper
+ endprologue
+ // and drop through...
+ .arch pentium4
+ENDFUNC
+
FUNC(mpxmont_redc4_x86_sse2)
// void mpxmont_redc4_x86_sse2(mpw *dv, mpw *dvl, const mpw *nv,
// size_t n, const mpw *mi);
mov [ebx + ecx*8 + 4], edx
.endm
-.macro testprologue
+.macro testprologue n
pushreg ebp
pushreg ebx
pushreg esi
and esp, ~15
sub esp, 3*32 + 4*4
endprologue
+ mov eax, \n
+ mov [esp + 104], eax
// vars:
// esp + 0 = v expanded
// esp + 32 = y expanded
// esp + 64 = ? expanded
// esp + 96 = cycles
+ // esp + 104 = count
.endm
.macro testepilogue
.endif
.endm
-.macro testtail cyv, n
- cystore esp + 96, \cyv, \n
+.macro testtail cyv
+ cystore esp + 96, \cyv, esp + 104
jnz 0b
.endm
.endm
FUNC(test_dmul4)
- testprologue
+ testprologue [ebp + 44]
testldcarry [ebp + 24]
testexpand [ebp + 36], [ebp + 40]
mov edi, [ebp + 20]
testtop [ebp + 28], [ebp + 32]
call dmul4
- testtail [ebp + 48], [ebp + 44]
+ testtail [ebp + 48]
testcarryout [ebp + 24]
testepilogue
ENDFUNC
FUNC(test_dmla4)
- testprologue
+ testprologue [ebp + 44]
testldcarry [ebp + 24]
testexpand [ebp + 36], [ebp + 40]
mov edi, [ebp + 20]
testtop [ebp + 28], [ebp + 32]
call dmla4
- testtail [ebp + 48], [ebp + 44]
+ testtail [ebp + 48]
testcarryout [ebp + 24]
testepilogue
ENDFUNC
FUNC(test_mul4)
- testprologue
+ testprologue [ebp + 36]
testldcarry [ebp + 24]
testexpand nil, [ebp + 32]
mov edi, [ebp + 20]
testtop nil, [ebp + 28]
call mul4
- testtail [ebp + 40], [ebp + 36]
+ testtail [ebp + 40]
testcarryout [ebp + 24]
testepilogue
ENDFUNC
FUNC(test_mla4)
- testprologue
+ testprologue [ebp + 36]
testldcarry [ebp + 24]
testexpand nil, [ebp + 32]
mov edi, [ebp + 20]
testtop nil, [ebp + 28]
call mla4
- testtail [ebp + 40], [ebp + 36]
+ testtail [ebp + 40]
testcarryout [ebp + 24]
testepilogue
ENDFUNC
FUNC(test_mmul4)
- testprologue
+ testprologue [ebp + 48]
testexpand [ebp + 40], [ebp + 44]
mov edi, [ebp + 20]
testtop [ebp + 32], [ebp + 36], mont
call mmul4
- testtail [ebp + 52], [ebp + 48]
+ testtail [ebp + 52]
mov edi, [ebp + 28]
movdqa xmm0, [esp + 64]
movdqa xmm1, [esp + 80]
ENDFUNC
FUNC(test_mmla4)
- testprologue
+ testprologue [ebp + 48]
testexpand [ebp + 40], [ebp + 44]
mov edi, [ebp + 20]
testtop [ebp + 32], [ebp + 36], mont
call mmla4
- testtail [ebp + 52], [ebp + 48]
+ testtail [ebp + 52]
mov edi, [ebp + 28]
movdqa xmm0, [esp + 64]
movdqa xmm1, [esp + 80]
ENDFUNC
FUNC(test_mont4)
- testprologue
+ testprologue [ebp + 40]
testexpand nil, [ebp + 36]
mov edi, [ebp + 20]
testtop nil, [ebp + 32], mont
call mont4
- testtail [ebp + 44], [ebp + 40]
+ testtail [ebp + 44]
mov edi, [ebp + 28]
movdqa xmm0, [esp + 64]
movdqa xmm1, [esp + 80]