math/mpx-mul4-x86-sse2.S: Use `SHUF' instead of hardwired constants.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
f8e509a9
MW
30// Some useful variables.
31 .L$_subsec = 0
32
33// Literal pools done the hard way.
34#define _LIT .text .L$_subsec + 1
35#define _ENDLIT .text .L$_subsec
6c54cbd3 36#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 37
1a0c09c4
MW
38// Announcing an external function.
39#define FUNC(name) \
40 .globl F(name); \
41 TYPE_FUNC(name); \
8a1aa284 42 .macro ENDFUNC; _ENDFUNC(name); .endm; \
1a0c09c4
MW
43 FUNC_PREHOOK(name); \
44F(name): \
45 FUNC_POSTHOOK(name)
46
47// Marking the end of a function.
48#define _ENDFUNC(name) \
49 .purgem ENDFUNC; \
50 SIZE_OBJ(name); \
f8e509a9 51 ENDFUNC_HOOK(name); \
6c54cbd3 52 _LTORG
1a0c09c4 53
8ae4c946
MW
54// Make a helper function, if necessary.
55#define AUXFN(name) \
56 .ifndef .L$_auxfn_def.name; \
57 .text 7128; \
58 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
59 FUNC_PREHOOK(name); \
60name:
61#define _ENDAUXFN_TAIL(name) \
62 .purgem _ENDAUXFN; \
63 .text .L$_subsec; \
64 .L$_auxfn_def.name = 1
65#define ENDAUXFN _ENDAUXFN; .endif
66
1a0c09c4
MW
67///--------------------------------------------------------------------------
68/// ELF-specific hacking.
69
70#if __ELF__
71
72#if __PIC__ || __PIE__
73# define WANT_PIC 1
74#endif
75
76#define TYPE_FUNC(name) .type name, STT_FUNC
77
78#define SIZE_OBJ(name) .size name, . - name
79
80#endif
81
82///--------------------------------------------------------------------------
0f23f75f
MW
83/// Windows-specific hacking.
84
85#if ABI_WIN
1a0c09c4
MW
86
87#if CPUFAM_X86
0f23f75f
MW
88# define F(name) _##name
89#endif
90
91#endif
92
93///--------------------------------------------------------------------------
94/// x86- and amd64-specific hacking.
95///
96/// It's (slightly) easier to deal with both of these in one go.
97
98#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4
MW
99
100// Set the function hooks.
101#define FUNC_PREHOOK(_) .balign 16
102
f71dd54d
MW
103// On Windows, arrange to install stack-unwinding data.
104#if CPUFAM_AMD64 && ABI_WIN
105# define FUNC_POSTHOOK(name) .seh_proc name
106# define ENDFUNC_HOOK(_) .seh_endproc
107// Procedures are expected to invoke `.seh_setframe' if necessary, and
108// `.seh_pushreg' and friends, and `.seh_endprologue'.
109#endif
110
1a0c09c4
MW
111// Don't use the wretched AT&T syntax. It's festooned with pointless
112// punctuation, and all of the data movement is backwards. Ugh!
113 .intel_syntax noprefix
114
115// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 116.macro callext addr
1a0c09c4
MW
117#if WANT_PIC
118 call \addr@PLT
119#else
120 call \addr
121#endif
8a1aa284 122.endm
1a0c09c4
MW
123
124// Do I need to arrange a spare GOT register?
125#if WANT_PIC && CPUFAM_X86
126# define NEED_GOT 1
127#endif
128#define GOTREG ebx // Not needed in AMD64 so don't care.
129
130// Maybe load GOT address into GOT.
8a1aa284 131.macro ldgot got=GOTREG
0f23f75f 132#if WANT_PIC && CPUFAM_X86
8ae4c946 133 AUXFN(_ldgot.\got)
1a0c09c4
MW
134 mov \got, [esp]
135 ret
8ae4c946
MW
136 ENDAUXFN
137 call _ldgot.\got
138 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 139#endif
8a1aa284 140.endm
1a0c09c4
MW
141
142// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 143.macro leaext reg, addr, got=GOTREG
1a0c09c4 144#if WANT_PIC
0f23f75f 145# if CPUFAM_X86
1a0c09c4 146 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
147# endif
148# if CPUFAM_AMD64
149 mov \reg, \addr@GOTPCREL[rip]
150# endif
1a0c09c4 151#else
0f23f75f 152# if CPUFAM_X86
1a0c09c4 153 mov \reg, offset \addr
0f23f75f
MW
154# endif
155# if CPUFAM_AMD64
156 lea \reg, \addr[rip]
157# endif
1a0c09c4 158#endif
8a1aa284 159.endm
1a0c09c4
MW
160
161// Address expression (possibly using a base register, and a displacement)
162// referring to ADDR, which is within our module, maybe using GOT.
163#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
164#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
165#if CPUFAM_AMD64
166# define INTADDR__1(addr, got) addr + rip
167#elif WANT_PIC
1a0c09c4
MW
168# define INTADDR__1(addr, got) got + addr@GOTOFF
169#else
170# define INTADDR__1(addr, got) addr
171#endif
172
a13b5730
MW
173// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
174// suitable for use in `pshufd' or `shufpd', which copies element D
175// (0 <= D < 4) of the source to element 3 of the destination, element C to
176// element 2, element B to element 1, and element A to element 0.
177#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
178
1a0c09c4
MW
179#endif
180
17de5b2e
MW
181#if CPUFAM_X86
182
183.macro _reg.0
184 // Stash GP registers and establish temporary stack frame.
185 pushfd
186 push eax
187 push ecx
188 push edx
189 push ebp
190 mov ebp, esp
191 and esp, ~15
192 sub esp, 512
193 fxsave [esp]
194.endm
195
196.macro _reg.1
197.endm
198
199.macro _reg.2
200.endm
201
202.macro _reg.3 fmt
203 // Print FMT and the other established arguments.
204 lea eax, .L$_reg$msg.\@
205 push eax
206 call printf
207 jmp .L$_reg$cont.\@
208.L$_reg$msg.\@:
209 .ascii ";; \fmt\n\0"
210.L$_reg$cont.\@:
211 mov eax, ebp
212 and eax, ~15
213 sub eax, 512
214 fxrstor [eax]
215 mov esp, ebp
216 pop ebp
217 pop edx
218 pop ecx
219 pop eax
220 popfd
221.endm
222
223.macro msg msg
224 _reg.0
225 _reg.1
226 _reg.2
227 _reg.3 "\msg"
228.endm
229
230.macro reg r, msg
231 _reg.0
232 .ifeqs "\r", "esp"
233 lea eax, [ebp + 20]
234 push eax
235 .else
236 .ifeqs "\r", "ebp"
237 push [ebp]
238 .else
239 push \r
240 .endif
241 .endif
242 _reg.1
243 _reg.2
244 _reg.3 "\msg: \r = %08x"
245.endm
246
247.macro xmmreg r, msg
248 _reg.0
249 _reg.1
250 _reg.2
251 movdqu xmm0, \r
252 pshufd xmm0, xmm0, 0x1b
253 sub esp, 16
254 movdqa [esp], xmm0
255 _reg.3 "\msg: \r = %08x %08x %08x %08x"
256.endm
257
258.macro mmreg r, msg
259 _reg.0
260 _reg.1
261 _reg.2
262 pshufw \r, \r, 0x4e
263 sub esp, 8
264 movq [esp], \r
265 _reg.3 "\msg: \r = %08x %08x"
266.endm
267
268.macro freg i, msg
269 _reg.0
270 _reg.1
271 _reg.2
272 finit
273 fldt [esp + 32 + 16*\i]
274 sub esp, 12
275 fstpt [esp]
276 _reg.3 "\msg: st(\i) = %.20Lg"
277.endm
278
279.macro fxreg i, msg
280 _reg.0
281 _reg.1
282 _reg.2
283 finit
284 fldt [esp + 32 + 16*\i]
285 sub esp, 12
286 fstpt [esp]
287 _reg.3 "\msg: st(\i) = %La"
288.endm
289
290#endif
291
1a0c09c4 292///--------------------------------------------------------------------------
61bd904b
MW
293/// ARM-specific hacking.
294
59d86860 295#if CPUFAM_ARMEL
61bd904b 296
9f6eb05d
MW
297// ARM/Thumb mode things. Use ARM by default.
298#define ARM .arm; .L$_pcoff = 8
299#define THUMB .thumb; .L$_pcoff = 4
300 ARM
301
61bd904b
MW
302// Set the function hooks.
303#define FUNC_PREHOOK(_) .balign 4
304#define ENDFUNC_HOOK(name) .ltorg
305
306// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 307.macro callext addr, cond=
61bd904b
MW
308#if WANT_PIC
309 bl\cond \addr(PLT)
310#else
311 bl\cond \addr
312#endif
8a1aa284 313.endm
61bd904b
MW
314
315// Do I need to arrange a spare GOT register?
316#if WANT_PIC
317# define NEED_GOT 1
318#endif
319#define GOTREG r9
320
321// Maybe load GOT address into GOT.
8a1aa284 322.macro ldgot cond=, got=GOTREG
61bd904b 323#if WANT_PIC
adca2a18
MW
324 ldr\cond \got, .L$_ldgot$\@
325.L$_ldgot_pc$\@:
2d03a881 326 add\cond \got, pc, \got
8a1aa284 327 _LIT
adca2a18
MW
328 .balign 4
329.L$_ldgot$\@:
9f6eb05d 330 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 331 _ENDLIT
61bd904b 332#endif
8a1aa284 333.endm
61bd904b
MW
334
335// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 336.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 337#if WANT_PIC
adca2a18 338 ldr\cond \reg, .L$_leaext$\@
2d03a881 339 ldr\cond \reg, [\got, \reg]
8a1aa284 340 _LIT
adca2a18
MW
341 .balign 4
342.L$_leaext$\@:
343 .word \addr(GOT)
8a1aa284 344 _ENDLIT
61bd904b 345#else
2d03a881 346 ldr\cond \reg, =\addr
61bd904b 347#endif
8a1aa284 348.endm
61bd904b 349
0c53ac58 350// Load address of external symbol ADDR into REG directly.
8a1aa284 351.macro leaextq reg, addr, cond=
0c53ac58
MW
352#if WANT_PIC
353 ldr\cond \reg, .L$_leaextq$\@
354.L$_leaextq_pc$\@:
9f6eb05d 355 .if .L$_pcoff == 8
0c53ac58 356 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
357 .else
358 add\cond \reg, pc
359 ldr\cond \reg, [\reg]
360 .endif
8a1aa284 361 _LIT
0c53ac58
MW
362 .balign 4
363.L$_leaextq$\@:
9f6eb05d 364 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 365 _ENDLIT
0c53ac58
MW
366#else
367 ldr\cond \reg, =\addr
368#endif
8a1aa284 369.endm
0c53ac58 370
61bd904b
MW
371#endif
372
373///--------------------------------------------------------------------------
1a0c09c4
MW
374/// Final stuff.
375
376// Default values for the various hooks.
377#ifndef FUNC_PREHOOK
378# define FUNC_PREHOOK(name)
379#endif
380#ifndef FUNC_POSTHOOK
381# define FUNC_POSTHOOK(name)
382#endif
383#ifndef ENDFUNC_HOOK
384# define ENDFUNC_HOOK(name)
385#endif
386
387#ifndef F
388# define F(name) name
389#endif
390
391#ifndef TYPE_FUNC
392# define TYPE_FUNC(name)
393#endif
394
395#ifndef SIZE_OBJ
396# define SIZE_OBJ(name)
397#endif
398
399///----- That's all, folks --------------------------------------------------