/// MA 02111-1307, USA.
///--------------------------------------------------------------------------
-/// External definitions.
+/// Preliminaries.
#include "config.h"
#include "asm-common.h"
-///--------------------------------------------------------------------------
-/// Prologue.
-
.arch pentium4
+
.text
///--------------------------------------------------------------------------
movdqu [rdi], xmm0
ret
-
ENDFUNC
INTFUNC(dmul4)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(dmla4)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mul4zc)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mul4)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mla4zc)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mla4)
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mmul4)
mulcore xmm4, 0, xmm8, xmm9, xmm12, xmm13, xmm14, xmm15
propout xmm7, lo, xmm12, xmm13
jmp 5f
-
ENDFUNC
INTFUNC(mmla4)
movdqu xmm4, [rax]
#if ABI_WIN
stalloc 48 + 8 // space for the carries
-# define STKTMP(i) [rsp + i]
+# define STKTMP(i) [SP + i]
#endif
#if ABI_SYSV
-# define STKTMP(i) [rsp + i - 48 - 8] // use red zone
+# define STKTMP(i) [SP + i - 48 - 8] // use red zone
#endif
endprologue
// And, with that, we're done.
movdqu [rdi], xmm6
ret
-
ENDFUNC
///--------------------------------------------------------------------------
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
endprologue
mov rdi, DV
- mov BVL, [rsp + 224]
-
+ mov BVL, [SP + 224]
#endif
// Prepare for the first iteration.
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
stfree 160 + 8
popreg rdi
popreg rbx
-
#endif
ret
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
endprologue
mov rdi, DV
- mov N, [rsp + 224]
- mov MI, [rsp + 232]
-
+ mov N, [SP + 224]
+ mov MI, [SP + 232]
#endif
// Establish the expanded operands.
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
popreg r12
popreg rdi
popreg rbx
-
#endif
ret
// c rcx r9
#if ABI_SYSV
-
# define DVL rax
# define DVL4 rsi
# define MI r8
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
-
# define DVL rax
# define DVL4 rdx
# define MI r10
endprologue
mov rdi, DV
- mov MI, [rsp + 224]
-
+ mov MI, [SP + 224]
#endif
// Establish the expanded operands and the blocks-of-4 dv limit.
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
popreg r12
popreg rdi
popreg rbx
-
#endif
ret
# define ARG6 STKARG(2)
# define ARG7 STKARG(3)
# define ARG8 STKARG(4)
-# define STKARG_OFFSET 40
+# define STKARG_OFFSET 224
#endif
-#define STKARG(i) [rsp + STKARG_OFFSET + 8*(i)]
+#define STKARG(i) [SP + STKARG_OFFSET + 8*(i)]
// sysv win
// dmul smul mmul mont dmul smul mmul mont
mov rbx, r8
movdqu xmm8, [r9]
movdqu xmm10, [rax]
- mov r8, STKARG(1)
+ mov r8d, STKARG(1)
mov r9, STKARG(2)
mov r10, rdx
mov r11, rcx
.ifeqs "\mode", "mont"
mov rbx, rcx
movdqu xmm8, [r8]
- mov r8, r9
+ mov r8d, r9d
mov r9, STKARG(0)
mov r10, rdx
mov rcx, rsi
mov rbx, r9
movdqu xmm8, [r10]
movdqu xmm10, [r11]
- mov r8, STKARG(2)
- mov r9, STKARG(3)
mov r11, r8
+ mov r8d, STKARG(2)
+ mov r9, STKARG(3)
.endif
.ifeqs "\mode", "smul"
mov rdi, rcx
mov rcx, rdx
mov rbx, r8
movdqu xmm10, [r9]
- mov r8, STKARG(0)
+ mov r8d, STKARG(0)
mov r9, STKARG(1)
.endif
.ifeqs "\mode", "mmul"
mov rbx, STKARG(0)
movdqu xmm8, [r10]
movdqu xmm10, [r11]
- mov r8, STKARG(3)
- mov r9, STKARG(4)
mov r10, r8
mov r11, r9
+ mov r8d, STKARG(3)
+ mov r9, STKARG(4)
.endif
.ifeqs "\mode", "mont"
mov r10, STKARG(0)
mov rcx, rdx
mov rbx, r9
movdqu xmm8, [r10]
- mov r8, STKARG(1)
- mov r9, STKARG(2)
mov r10, r8
+ mov r8d, STKARG(1)
+ mov r9, STKARG(2)
.endif
#endif
testepilogue
ENDFUNC
+FUNC(test_mul4zc)
+ testprologue smul
+ testldcarry
+ testtop nil
+ call mul4zc
+ testtail
+ testcarryout
+ testepilogue
+ENDFUNC
+
FUNC(test_mla4)
testprologue smul
testldcarry
testepilogue
ENDFUNC
+FUNC(test_mla4zc)
+ testprologue smul
+ testldcarry
+ testtop nil
+ call mla4zc
+ testtail
+ testcarryout
+ testepilogue
+ENDFUNC
+
FUNC(test_mmul4)
testprologue mmul
testtop r11