base/asm-common.h: Add some general C preprocessor utilities.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
898f32b3
MW
30// Preprocessor hacks.
31#define STRINGY(x) _STRINGY(x, y)
32#define _STRINGY(x) #x
33#define GLUE(x, y) _GLUE(x, y)
34#define _GLUE(x, y) x##y
35#define _EMPTY
36
f8e509a9
MW
37// Some useful variables.
38 .L$_subsec = 0
39
40// Literal pools done the hard way.
41#define _LIT .text .L$_subsec + 1
42#define _ENDLIT .text .L$_subsec
6c54cbd3 43#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 44
645fcce0
MW
45// ELF section types.
46#if __ELF__
47# if CPUFAM_ARMEL
48# define _SECTTY(ty) %ty
49# else
50# define _SECTTY(ty) @ty
51# endif
52#endif
53
54// Section selection.
55#define TEXT .text .L$_subsec
56#if ABI_WIN
57# define RODATA .section .rdata, "dr"
58#elif __ELF__
59# define RODATA .section .rodata, "a", _SECTTY(progbits)
60#else
61# define RODATA TEXT
62#endif
63#define DATA .data
64
1a0c09c4
MW
65// Announcing an external function.
66#define FUNC(name) \
67 .globl F(name); \
68 TYPE_FUNC(name); \
8a1aa284 69 .macro ENDFUNC; _ENDFUNC(name); .endm; \
1a0c09c4
MW
70 FUNC_PREHOOK(name); \
71F(name): \
72 FUNC_POSTHOOK(name)
73
74// Marking the end of a function.
75#define _ENDFUNC(name) \
76 .purgem ENDFUNC; \
77 SIZE_OBJ(name); \
f8e509a9 78 ENDFUNC_HOOK(name); \
6c54cbd3 79 _LTORG
1a0c09c4 80
8ae4c946
MW
81// Make a helper function, if necessary.
82#define AUXFN(name) \
83 .ifndef .L$_auxfn_def.name; \
84 .text 7128; \
85 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
86 FUNC_PREHOOK(name); \
87name:
88#define _ENDAUXFN_TAIL(name) \
89 .purgem _ENDAUXFN; \
90 .text .L$_subsec; \
91 .L$_auxfn_def.name = 1
92#define ENDAUXFN _ENDAUXFN; .endif
93
1a0c09c4
MW
94///--------------------------------------------------------------------------
95/// ELF-specific hacking.
96
97#if __ELF__
98
99#if __PIC__ || __PIE__
100# define WANT_PIC 1
101#endif
102
103#define TYPE_FUNC(name) .type name, STT_FUNC
104
105#define SIZE_OBJ(name) .size name, . - name
106
107#endif
108
109///--------------------------------------------------------------------------
0f23f75f
MW
110/// Windows-specific hacking.
111
112#if ABI_WIN
1a0c09c4
MW
113
114#if CPUFAM_X86
0f23f75f
MW
115# define F(name) _##name
116#endif
117
118#endif
119
120///--------------------------------------------------------------------------
121/// x86- and amd64-specific hacking.
122///
123/// It's (slightly) easier to deal with both of these in one go.
124
125#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4
MW
126
127// Set the function hooks.
128#define FUNC_PREHOOK(_) .balign 16
129
f71dd54d
MW
130// On Windows, arrange to install stack-unwinding data.
131#if CPUFAM_AMD64 && ABI_WIN
132# define FUNC_POSTHOOK(name) .seh_proc name
133# define ENDFUNC_HOOK(_) .seh_endproc
134// Procedures are expected to invoke `.seh_setframe' if necessary, and
135// `.seh_pushreg' and friends, and `.seh_endprologue'.
136#endif
137
1a0c09c4
MW
138// Don't use the wretched AT&T syntax. It's festooned with pointless
139// punctuation, and all of the data movement is backwards. Ugh!
140 .intel_syntax noprefix
141
142// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 143.macro callext addr
1a0c09c4
MW
144#if WANT_PIC
145 call \addr@PLT
146#else
147 call \addr
148#endif
8a1aa284 149.endm
1a0c09c4
MW
150
151// Do I need to arrange a spare GOT register?
152#if WANT_PIC && CPUFAM_X86
153# define NEED_GOT 1
154#endif
155#define GOTREG ebx // Not needed in AMD64 so don't care.
156
157// Maybe load GOT address into GOT.
8a1aa284 158.macro ldgot got=GOTREG
0f23f75f 159#if WANT_PIC && CPUFAM_X86
8ae4c946 160 AUXFN(_ldgot.\got)
1a0c09c4
MW
161 mov \got, [esp]
162 ret
8ae4c946
MW
163 ENDAUXFN
164 call _ldgot.\got
165 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 166#endif
8a1aa284 167.endm
1a0c09c4
MW
168
169// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 170.macro leaext reg, addr, got=GOTREG
1a0c09c4 171#if WANT_PIC
0f23f75f 172# if CPUFAM_X86
1a0c09c4 173 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
174# endif
175# if CPUFAM_AMD64
176 mov \reg, \addr@GOTPCREL[rip]
177# endif
1a0c09c4 178#else
0f23f75f 179# if CPUFAM_X86
1a0c09c4 180 mov \reg, offset \addr
0f23f75f
MW
181# endif
182# if CPUFAM_AMD64
183 lea \reg, \addr[rip]
184# endif
1a0c09c4 185#endif
8a1aa284 186.endm
1a0c09c4
MW
187
188// Address expression (possibly using a base register, and a displacement)
189// referring to ADDR, which is within our module, maybe using GOT.
190#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
191#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
192#if CPUFAM_AMD64
193# define INTADDR__1(addr, got) addr + rip
194#elif WANT_PIC
1a0c09c4
MW
195# define INTADDR__1(addr, got) got + addr@GOTOFF
196#else
197# define INTADDR__1(addr, got) addr
198#endif
199
a13b5730
MW
200// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
201// suitable for use in `pshufd' or `shufpd', which copies element D
202// (0 <= D < 4) of the source to element 3 of the destination, element C to
203// element 2, element B to element 1, and element A to element 0.
204#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
205
1a0c09c4
MW
206#endif
207
17de5b2e
MW
208#if CPUFAM_X86
209
210.macro _reg.0
211 // Stash GP registers and establish temporary stack frame.
212 pushfd
213 push eax
214 push ecx
215 push edx
216 push ebp
217 mov ebp, esp
218 and esp, ~15
219 sub esp, 512
220 fxsave [esp]
221.endm
222
223.macro _reg.1
224.endm
225
226.macro _reg.2
227.endm
228
229.macro _reg.3 fmt
230 // Print FMT and the other established arguments.
231 lea eax, .L$_reg$msg.\@
232 push eax
233 call printf
234 jmp .L$_reg$cont.\@
235.L$_reg$msg.\@:
236 .ascii ";; \fmt\n\0"
237.L$_reg$cont.\@:
238 mov eax, ebp
239 and eax, ~15
240 sub eax, 512
241 fxrstor [eax]
242 mov esp, ebp
243 pop ebp
244 pop edx
245 pop ecx
246 pop eax
247 popfd
248.endm
249
250.macro msg msg
251 _reg.0
252 _reg.1
253 _reg.2
254 _reg.3 "\msg"
255.endm
256
257.macro reg r, msg
258 _reg.0
259 .ifeqs "\r", "esp"
260 lea eax, [ebp + 20]
261 push eax
262 .else
263 .ifeqs "\r", "ebp"
264 push [ebp]
265 .else
266 push \r
267 .endif
268 .endif
269 _reg.1
270 _reg.2
271 _reg.3 "\msg: \r = %08x"
272.endm
273
274.macro xmmreg r, msg
275 _reg.0
276 _reg.1
277 _reg.2
278 movdqu xmm0, \r
279 pshufd xmm0, xmm0, 0x1b
280 sub esp, 16
281 movdqa [esp], xmm0
282 _reg.3 "\msg: \r = %08x %08x %08x %08x"
283.endm
284
285.macro mmreg r, msg
286 _reg.0
287 _reg.1
288 _reg.2
289 pshufw \r, \r, 0x4e
290 sub esp, 8
291 movq [esp], \r
292 _reg.3 "\msg: \r = %08x %08x"
293.endm
294
295.macro freg i, msg
296 _reg.0
297 _reg.1
298 _reg.2
299 finit
300 fldt [esp + 32 + 16*\i]
301 sub esp, 12
302 fstpt [esp]
303 _reg.3 "\msg: st(\i) = %.20Lg"
304.endm
305
306.macro fxreg i, msg
307 _reg.0
308 _reg.1
309 _reg.2
310 finit
311 fldt [esp + 32 + 16*\i]
312 sub esp, 12
313 fstpt [esp]
314 _reg.3 "\msg: st(\i) = %La"
315.endm
316
317#endif
318
1a0c09c4 319///--------------------------------------------------------------------------
61bd904b
MW
320/// ARM-specific hacking.
321
59d86860 322#if CPUFAM_ARMEL
61bd904b 323
9f6eb05d
MW
324// ARM/Thumb mode things. Use ARM by default.
325#define ARM .arm; .L$_pcoff = 8
326#define THUMB .thumb; .L$_pcoff = 4
327 ARM
328
61bd904b
MW
329// Set the function hooks.
330#define FUNC_PREHOOK(_) .balign 4
331#define ENDFUNC_HOOK(name) .ltorg
332
333// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 334.macro callext addr, cond=
61bd904b
MW
335#if WANT_PIC
336 bl\cond \addr(PLT)
337#else
338 bl\cond \addr
339#endif
8a1aa284 340.endm
61bd904b
MW
341
342// Do I need to arrange a spare GOT register?
343#if WANT_PIC
344# define NEED_GOT 1
345#endif
346#define GOTREG r9
347
348// Maybe load GOT address into GOT.
8a1aa284 349.macro ldgot cond=, got=GOTREG
61bd904b 350#if WANT_PIC
adca2a18
MW
351 ldr\cond \got, .L$_ldgot$\@
352.L$_ldgot_pc$\@:
2d03a881 353 add\cond \got, pc, \got
8a1aa284 354 _LIT
adca2a18
MW
355 .balign 4
356.L$_ldgot$\@:
9f6eb05d 357 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 358 _ENDLIT
61bd904b 359#endif
8a1aa284 360.endm
61bd904b
MW
361
362// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 363.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 364#if WANT_PIC
adca2a18 365 ldr\cond \reg, .L$_leaext$\@
2d03a881 366 ldr\cond \reg, [\got, \reg]
8a1aa284 367 _LIT
adca2a18
MW
368 .balign 4
369.L$_leaext$\@:
370 .word \addr(GOT)
8a1aa284 371 _ENDLIT
61bd904b 372#else
2d03a881 373 ldr\cond \reg, =\addr
61bd904b 374#endif
8a1aa284 375.endm
61bd904b 376
0c53ac58 377// Load address of external symbol ADDR into REG directly.
8a1aa284 378.macro leaextq reg, addr, cond=
0c53ac58
MW
379#if WANT_PIC
380 ldr\cond \reg, .L$_leaextq$\@
381.L$_leaextq_pc$\@:
9f6eb05d 382 .if .L$_pcoff == 8
0c53ac58 383 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
384 .else
385 add\cond \reg, pc
386 ldr\cond \reg, [\reg]
387 .endif
8a1aa284 388 _LIT
0c53ac58
MW
389 .balign 4
390.L$_leaextq$\@:
9f6eb05d 391 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 392 _ENDLIT
0c53ac58
MW
393#else
394 ldr\cond \reg, =\addr
395#endif
8a1aa284 396.endm
0c53ac58 397
61bd904b
MW
398#endif
399
400///--------------------------------------------------------------------------
1a0c09c4
MW
401/// Final stuff.
402
403// Default values for the various hooks.
404#ifndef FUNC_PREHOOK
405# define FUNC_PREHOOK(name)
406#endif
407#ifndef FUNC_POSTHOOK
408# define FUNC_POSTHOOK(name)
409#endif
410#ifndef ENDFUNC_HOOK
411# define ENDFUNC_HOOK(name)
412#endif
413
414#ifndef F
415# define F(name) name
416#endif
417
418#ifndef TYPE_FUNC
419# define TYPE_FUNC(name)
420#endif
421
422#ifndef SIZE_OBJ
423# define SIZE_OBJ(name)
424#endif
425
1aa5bfa8
MW
426#if __ELF__ && defined(WANT_EXECUTABLE_STACK)
427 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
428 .popsection
429#endif
430
1a0c09c4 431///----- That's all, folks --------------------------------------------------