X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/2eed595914ae2ec6f26832d9c55c226220a8c6b5..a1a9ee0a7240087e202a7855e470573de0e59c09:/math/mpx-mul4-amd64-sse2.S diff --git a/math/mpx-mul4-amd64-sse2.S b/math/mpx-mul4-amd64-sse2.S index 03e466c7..bd8ff2f9 100644 --- a/math/mpx-mul4-amd64-sse2.S +++ b/math/mpx-mul4-amd64-sse2.S @@ -319,7 +319,6 @@ INTFUNC(carryprop) movdqu [rdi], xmm0 ret - ENDFUNC INTFUNC(dmul4) @@ -357,7 +356,6 @@ INTFUNC(dmul4) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(dmla4) @@ -398,7 +396,6 @@ INTFUNC(dmla4) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(mul4zc) @@ -429,7 +426,6 @@ INTFUNC(mul4zc) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(mul4) @@ -462,7 +458,6 @@ INTFUNC(mul4) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(mla4zc) @@ -498,7 +493,6 @@ INTFUNC(mla4zc) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(mla4) @@ -533,7 +527,6 @@ INTFUNC(mla4) movdqu [rdi], xmm6 ret - ENDFUNC INTFUNC(mmul4) @@ -557,7 +550,6 @@ INTFUNC(mmul4) mulcore xmm4, 0, xmm8, xmm9, xmm12, xmm13, xmm14, xmm15 propout xmm7, lo, xmm12, xmm13 jmp 5f - ENDFUNC INTFUNC(mmla4) @@ -575,10 +567,10 @@ INTFUNC(mmla4) movdqu xmm4, [rax] #if ABI_WIN stalloc 48 + 8 // space for the carries -# define STKTMP(i) [rsp + i] +# define STKTMP(i) [SP + i] #endif #if ABI_SYSV -# define STKTMP(i) [rsp + i - 48 - 8] // use red zone +# define STKTMP(i) [SP + i - 48 - 8] // use red zone #endif endprologue @@ -744,7 +736,6 @@ INTFUNC(mont4) // And, with that, we're done. movdqu [rdi], xmm6 ret - ENDFUNC ///-------------------------------------------------------------------------- @@ -783,7 +774,6 @@ FUNC(mpx_umul4_amd64_sse2) endprologue mov DV, rdi - #endif #if ABI_WIN @@ -811,8 +801,7 @@ FUNC(mpx_umul4_amd64_sse2) endprologue mov rdi, DV - mov BVL, [rsp + 224] - + mov BVL, [SP + 224] #endif // Prepare for the first iteration. @@ -878,7 +867,6 @@ FUNC(mpx_umul4_amd64_sse2) #endif #if ABI_WIN - rstrxmm xmm6, 0 rstrxmm xmm7, 16 rstrxmm xmm8, 32 @@ -893,7 +881,6 @@ FUNC(mpx_umul4_amd64_sse2) stfree 160 + 8 popreg rdi popreg rbx - #endif ret @@ -946,7 +933,6 @@ FUNC(mpxmont_mul4_amd64_sse2) endprologue mov DV, rdi - #endif #if ABI_WIN @@ -978,9 +964,8 @@ FUNC(mpxmont_mul4_amd64_sse2) endprologue mov rdi, DV - mov N, [rsp + 224] - mov MI, [rsp + 232] - + mov N, [SP + 224] + mov MI, [SP + 232] #endif // Establish the expanded operands. @@ -1062,7 +1047,6 @@ FUNC(mpxmont_mul4_amd64_sse2) #endif #if ABI_WIN - rstrxmm xmm6, 0 rstrxmm xmm7, 16 rstrxmm xmm8, 32 @@ -1078,7 +1062,6 @@ FUNC(mpxmont_mul4_amd64_sse2) popreg r12 popreg rdi popreg rbx - #endif ret @@ -1134,7 +1117,6 @@ FUNC(mpxmont_redc4_amd64_sse2) // c rcx r9 #if ABI_SYSV - # define DVL rax # define DVL4 rsi # define MI r8 @@ -1149,11 +1131,9 @@ FUNC(mpxmont_redc4_amd64_sse2) endprologue mov DV, rdi - #endif #if ABI_WIN - # define DVL rax # define DVL4 rdx # define MI r10 @@ -1183,8 +1163,7 @@ FUNC(mpxmont_redc4_amd64_sse2) endprologue mov rdi, DV - mov MI, [rsp + 224] - + mov MI, [SP + 224] #endif // Establish the expanded operands and the blocks-of-4 dv limit. @@ -1267,7 +1246,6 @@ FUNC(mpxmont_redc4_amd64_sse2) #endif #if ABI_WIN - rstrxmm xmm6, 0 rstrxmm xmm7, 16 rstrxmm xmm8, 32 @@ -1283,7 +1261,6 @@ FUNC(mpxmont_redc4_amd64_sse2) popreg r12 popreg rdi popreg rbx - #endif ret @@ -1329,7 +1306,7 @@ ENDFUNC # define ARG8 STKARG(4) # define STKARG_OFFSET 224 #endif -#define STKARG(i) [rsp + STKARG_OFFSET + 8*(i)] +#define STKARG(i) [SP + STKARG_OFFSET + 8*(i)] // sysv win // dmul smul mmul mont dmul smul mmul mont @@ -1548,6 +1525,16 @@ FUNC(test_mul4) testepilogue ENDFUNC +FUNC(test_mul4zc) + testprologue smul + testldcarry + testtop nil + call mul4zc + testtail + testcarryout + testepilogue +ENDFUNC + FUNC(test_mla4) testprologue smul testldcarry @@ -1558,6 +1545,16 @@ FUNC(test_mla4) testepilogue ENDFUNC +FUNC(test_mla4zc) + testprologue smul + testldcarry + testtop nil + call mla4zc + testtail + testcarryout + testepilogue +ENDFUNC + FUNC(test_mmul4) testprologue mmul testtop r11