~mdw
/
catacomb
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
math/: Delete some unnecessary blank lines.
[catacomb]
/
math
/
mpx-mul4-amd64-sse2.S
diff --git
a/math/mpx-mul4-amd64-sse2.S
b/math/mpx-mul4-amd64-sse2.S
index
64460ca
..
bd8ff2f
100644
(file)
--- a/
math/mpx-mul4-amd64-sse2.S
+++ b/
math/mpx-mul4-amd64-sse2.S
@@
-25,15
+25,13
@@
/// MA 02111-1307, USA.
///--------------------------------------------------------------------------
/// MA 02111-1307, USA.
///--------------------------------------------------------------------------
-///
External definition
s.
+///
Preliminarie
s.
#include "config.h"
#include "asm-common.h"
#include "config.h"
#include "asm-common.h"
-///--------------------------------------------------------------------------
-/// Prologue.
-
.arch pentium4
.arch pentium4
+
.text
///--------------------------------------------------------------------------
.text
///--------------------------------------------------------------------------
@@
-321,7
+319,6
@@
INTFUNC(carryprop)
movdqu [rdi], xmm0
ret
movdqu [rdi], xmm0
ret
-
ENDFUNC
INTFUNC(dmul4)
ENDFUNC
INTFUNC(dmul4)
@@
-359,7
+356,6
@@
INTFUNC(dmul4)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(dmla4)
ENDFUNC
INTFUNC(dmla4)
@@
-400,7
+396,6
@@
INTFUNC(dmla4)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mul4zc)
ENDFUNC
INTFUNC(mul4zc)
@@
-431,7
+426,6
@@
INTFUNC(mul4zc)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mul4)
ENDFUNC
INTFUNC(mul4)
@@
-464,7
+458,6
@@
INTFUNC(mul4)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mla4zc)
ENDFUNC
INTFUNC(mla4zc)
@@
-500,7
+493,6
@@
INTFUNC(mla4zc)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mla4)
ENDFUNC
INTFUNC(mla4)
@@
-535,7
+527,6
@@
INTFUNC(mla4)
movdqu [rdi], xmm6
ret
movdqu [rdi], xmm6
ret
-
ENDFUNC
INTFUNC(mmul4)
ENDFUNC
INTFUNC(mmul4)
@@
-559,7
+550,6
@@
INTFUNC(mmul4)
mulcore xmm4, 0, xmm8, xmm9, xmm12, xmm13, xmm14, xmm15
propout xmm7, lo, xmm12, xmm13
jmp 5f
mulcore xmm4, 0, xmm8, xmm9, xmm12, xmm13, xmm14, xmm15
propout xmm7, lo, xmm12, xmm13
jmp 5f
-
ENDFUNC
INTFUNC(mmla4)
ENDFUNC
INTFUNC(mmla4)
@@
-577,10
+567,10
@@
INTFUNC(mmla4)
movdqu xmm4, [rax]
#if ABI_WIN
stalloc 48 + 8 // space for the carries
movdqu xmm4, [rax]
#if ABI_WIN
stalloc 48 + 8 // space for the carries
-# define STKTMP(i) [
rsp
+ i]
+# define STKTMP(i) [
SP
+ i]
#endif
#if ABI_SYSV
#endif
#if ABI_SYSV
-# define STKTMP(i) [
rsp + i - 48 - 8]
// use red zone
+# define STKTMP(i) [
SP + i - 48 - 8]
// use red zone
#endif
endprologue
#endif
endprologue
@@
-746,7
+736,6
@@
INTFUNC(mont4)
// And, with that, we're done.
movdqu [rdi], xmm6
ret
// And, with that, we're done.
movdqu [rdi], xmm6
ret
-
ENDFUNC
///--------------------------------------------------------------------------
ENDFUNC
///--------------------------------------------------------------------------
@@
-785,7
+774,6
@@
FUNC(mpx_umul4_amd64_sse2)
endprologue
mov DV, rdi
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
#endif
#if ABI_WIN
@@
-813,8
+801,7
@@
FUNC(mpx_umul4_amd64_sse2)
endprologue
mov rdi, DV
endprologue
mov rdi, DV
- mov BVL, [rsp + 224]
-
+ mov BVL, [SP + 224]
#endif
// Prepare for the first iteration.
#endif
// Prepare for the first iteration.
@@
-880,7
+867,6
@@
FUNC(mpx_umul4_amd64_sse2)
#endif
#if ABI_WIN
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
@@
-895,7
+881,6
@@
FUNC(mpx_umul4_amd64_sse2)
stfree 160 + 8
popreg rdi
popreg rbx
stfree 160 + 8
popreg rdi
popreg rbx
-
#endif
ret
#endif
ret
@@
-948,7
+933,6
@@
FUNC(mpxmont_mul4_amd64_sse2)
endprologue
mov DV, rdi
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
#endif
#if ABI_WIN
@@
-980,9
+964,8
@@
FUNC(mpxmont_mul4_amd64_sse2)
endprologue
mov rdi, DV
endprologue
mov rdi, DV
- mov N, [rsp + 224]
- mov MI, [rsp + 232]
-
+ mov N, [SP + 224]
+ mov MI, [SP + 232]
#endif
// Establish the expanded operands.
#endif
// Establish the expanded operands.
@@
-1064,7
+1047,6
@@
FUNC(mpxmont_mul4_amd64_sse2)
#endif
#if ABI_WIN
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
@@
-1080,7
+1062,6
@@
FUNC(mpxmont_mul4_amd64_sse2)
popreg r12
popreg rdi
popreg rbx
popreg r12
popreg rdi
popreg rbx
-
#endif
ret
#endif
ret
@@
-1136,7
+1117,6
@@
FUNC(mpxmont_redc4_amd64_sse2)
// c rcx r9
#if ABI_SYSV
// c rcx r9
#if ABI_SYSV
-
# define DVL rax
# define DVL4 rsi
# define MI r8
# define DVL rax
# define DVL4 rsi
# define MI r8
@@
-1151,11
+1131,9
@@
FUNC(mpxmont_redc4_amd64_sse2)
endprologue
mov DV, rdi
endprologue
mov DV, rdi
-
#endif
#if ABI_WIN
#endif
#if ABI_WIN
-
# define DVL rax
# define DVL4 rdx
# define MI r10
# define DVL rax
# define DVL4 rdx
# define MI r10
@@
-1185,8
+1163,7
@@
FUNC(mpxmont_redc4_amd64_sse2)
endprologue
mov rdi, DV
endprologue
mov rdi, DV
- mov MI, [rsp + 224]
-
+ mov MI, [SP + 224]
#endif
// Establish the expanded operands and the blocks-of-4 dv limit.
#endif
// Establish the expanded operands and the blocks-of-4 dv limit.
@@
-1269,7
+1246,6
@@
FUNC(mpxmont_redc4_amd64_sse2)
#endif
#if ABI_WIN
#endif
#if ABI_WIN
-
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
rstrxmm xmm6, 0
rstrxmm xmm7, 16
rstrxmm xmm8, 32
@@
-1285,7
+1261,6
@@
FUNC(mpxmont_redc4_amd64_sse2)
popreg r12
popreg rdi
popreg rbx
popreg r12
popreg rdi
popreg rbx
-
#endif
ret
#endif
ret
@@
-1329,9
+1304,9
@@
ENDFUNC
# define ARG6 STKARG(2)
# define ARG7 STKARG(3)
# define ARG8 STKARG(4)
# define ARG6 STKARG(2)
# define ARG7 STKARG(3)
# define ARG8 STKARG(4)
-# define STKARG_OFFSET
40
+# define STKARG_OFFSET
224
#endif
#endif
-#define STKARG(i) [
rsp
+ STKARG_OFFSET + 8*(i)]
+#define STKARG(i) [
SP
+ STKARG_OFFSET + 8*(i)]
// sysv win
// dmul smul mmul mont dmul smul mmul mont
// sysv win
// dmul smul mmul mont dmul smul mmul mont
@@
-1386,7
+1361,7
@@
ENDFUNC
mov rbx, r8
movdqu xmm8, [r9]
movdqu xmm10, [rax]
mov rbx, r8
movdqu xmm8, [r9]
movdqu xmm10, [rax]
- mov r8, STKARG(1)
+ mov r8
d
, STKARG(1)
mov r9, STKARG(2)
mov r10, rdx
mov r11, rcx
mov r9, STKARG(2)
mov r10, rdx
mov r11, rcx
@@
-1395,7
+1370,7
@@
ENDFUNC
.ifeqs "\mode", "mont"
mov rbx, rcx
movdqu xmm8, [r8]
.ifeqs "\mode", "mont"
mov rbx, rcx
movdqu xmm8, [r8]
- mov r8
, r9
+ mov r8
d, r9d
mov r9, STKARG(0)
mov r10, rdx
mov rcx, rsi
mov r9, STKARG(0)
mov r10, rdx
mov rcx, rsi
@@
-1423,16
+1398,16
@@
ENDFUNC
mov rbx, r9
movdqu xmm8, [r10]
movdqu xmm10, [r11]
mov rbx, r9
movdqu xmm8, [r10]
movdqu xmm10, [r11]
- mov r8, STKARG(2)
- mov r9, STKARG(3)
mov r11, r8
mov r11, r8
+ mov r8d, STKARG(2)
+ mov r9, STKARG(3)
.endif
.ifeqs "\mode", "smul"
mov rdi, rcx
mov rcx, rdx
mov rbx, r8
movdqu xmm10, [r9]
.endif
.ifeqs "\mode", "smul"
mov rdi, rcx
mov rcx, rdx
mov rbx, r8
movdqu xmm10, [r9]
- mov r8, STKARG(0)
+ mov r8
d
, STKARG(0)
mov r9, STKARG(1)
.endif
.ifeqs "\mode", "mmul"
mov r9, STKARG(1)
.endif
.ifeqs "\mode", "mmul"
@@
-1443,10
+1418,10
@@
ENDFUNC
mov rbx, STKARG(0)
movdqu xmm8, [r10]
movdqu xmm10, [r11]
mov rbx, STKARG(0)
movdqu xmm8, [r10]
movdqu xmm10, [r11]
- mov r8, STKARG(3)
- mov r9, STKARG(4)
mov r10, r8
mov r11, r9
mov r10, r8
mov r11, r9
+ mov r8d, STKARG(3)
+ mov r9, STKARG(4)
.endif
.ifeqs "\mode", "mont"
mov r10, STKARG(0)
.endif
.ifeqs "\mode", "mont"
mov r10, STKARG(0)
@@
-1454,9
+1429,9
@@
ENDFUNC
mov rcx, rdx
mov rbx, r9
movdqu xmm8, [r10]
mov rcx, rdx
mov rbx, r9
movdqu xmm8, [r10]
- mov r8, STKARG(1)
- mov r9, STKARG(2)
mov r10, r8
mov r10, r8
+ mov r8d, STKARG(1)
+ mov r9, STKARG(2)
.endif
#endif
.endif
#endif
@@
-1550,6
+1525,16
@@
FUNC(test_mul4)
testepilogue
ENDFUNC
testepilogue
ENDFUNC
+FUNC(test_mul4zc)
+ testprologue smul
+ testldcarry
+ testtop nil
+ call mul4zc
+ testtail
+ testcarryout
+ testepilogue
+ENDFUNC
+
FUNC(test_mla4)
testprologue smul
testldcarry
FUNC(test_mla4)
testprologue smul
testldcarry
@@
-1560,6
+1545,16
@@
FUNC(test_mla4)
testepilogue
ENDFUNC
testepilogue
ENDFUNC
+FUNC(test_mla4zc)
+ testprologue smul
+ testldcarry
+ testtop nil
+ call mla4zc
+ testtail
+ testcarryout
+ testepilogue
+ENDFUNC
+
FUNC(test_mmul4)
testprologue mmul
testtop r11
FUNC(test_mmul4)
testprologue mmul
testtop r11