base/asm-common.h: Include `.note.GNU-stack' section on ELF targets.
[catacomb] / base / asm-common.h
1 /// -*- mode: asm; asm-comment-char: ?/ -*-
2 ///
3 /// Fancy SIMD implementation of Salsa20
4 ///
5 /// (c) 2015 Straylight/Edgeware
6 ///
7
8 ///----- Licensing notice ---------------------------------------------------
9 ///
10 /// This file is part of Catacomb.
11 ///
12 /// Catacomb is free software; you can redistribute it and/or modify
13 /// it under the terms of the GNU Library General Public License as
14 /// published by the Free Software Foundation; either version 2 of the
15 /// License, or (at your option) any later version.
16 ///
17 /// Catacomb is distributed in the hope that it will be useful,
18 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 /// GNU Library General Public License for more details.
21 ///
22 /// You should have received a copy of the GNU Library General Public
23 /// License along with Catacomb; if not, write to the Free
24 /// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 /// MA 02111-1307, USA.
26
27 ///--------------------------------------------------------------------------
28 /// General definitions.
29
30 // Some useful variables.
31 .L$_subsec = 0
32
33 // Literal pools done the hard way.
34 #define _LIT .text .L$_subsec + 1
35 #define _ENDLIT .text .L$_subsec
36 #define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
37
38 // Announcing an external function.
39 #define FUNC(name) \
40 .globl F(name); \
41 TYPE_FUNC(name); \
42 .macro ENDFUNC; _ENDFUNC(name); .endm; \
43 FUNC_PREHOOK(name); \
44 F(name): \
45 FUNC_POSTHOOK(name)
46
47 // Marking the end of a function.
48 #define _ENDFUNC(name) \
49 .purgem ENDFUNC; \
50 SIZE_OBJ(name); \
51 ENDFUNC_HOOK(name); \
52 _LTORG
53
54 // Make a helper function, if necessary.
55 #define AUXFN(name) \
56 .ifndef .L$_auxfn_def.name; \
57 .text 7128; \
58 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
59 FUNC_PREHOOK(name); \
60 name:
61 #define _ENDAUXFN_TAIL(name) \
62 .purgem _ENDAUXFN; \
63 .text .L$_subsec; \
64 .L$_auxfn_def.name = 1
65 #define ENDAUXFN _ENDAUXFN; .endif
66
67 ///--------------------------------------------------------------------------
68 /// ELF-specific hacking.
69
70 #if __ELF__
71
72 #if __PIC__ || __PIE__
73 # define WANT_PIC 1
74 #endif
75
76 #define TYPE_FUNC(name) .type name, STT_FUNC
77
78 #define SIZE_OBJ(name) .size name, . - name
79
80 #endif
81
82 ///--------------------------------------------------------------------------
83 /// Windows-specific hacking.
84
85 #if ABI_WIN
86
87 #if CPUFAM_X86
88 # define F(name) _##name
89 #endif
90
91 #endif
92
93 ///--------------------------------------------------------------------------
94 /// x86- and amd64-specific hacking.
95 ///
96 /// It's (slightly) easier to deal with both of these in one go.
97
98 #if CPUFAM_X86 || CPUFAM_AMD64
99
100 // Set the function hooks.
101 #define FUNC_PREHOOK(_) .balign 16
102
103 // On Windows, arrange to install stack-unwinding data.
104 #if CPUFAM_AMD64 && ABI_WIN
105 # define FUNC_POSTHOOK(name) .seh_proc name
106 # define ENDFUNC_HOOK(_) .seh_endproc
107 // Procedures are expected to invoke `.seh_setframe' if necessary, and
108 // `.seh_pushreg' and friends, and `.seh_endprologue'.
109 #endif
110
111 // Don't use the wretched AT&T syntax. It's festooned with pointless
112 // punctuation, and all of the data movement is backwards. Ugh!
113 .intel_syntax noprefix
114
115 // Call external subroutine at ADDR, possibly via PLT.
116 .macro callext addr
117 #if WANT_PIC
118 call \addr@PLT
119 #else
120 call \addr
121 #endif
122 .endm
123
124 // Do I need to arrange a spare GOT register?
125 #if WANT_PIC && CPUFAM_X86
126 # define NEED_GOT 1
127 #endif
128 #define GOTREG ebx // Not needed in AMD64 so don't care.
129
130 // Maybe load GOT address into GOT.
131 .macro ldgot got=GOTREG
132 #if WANT_PIC && CPUFAM_X86
133 AUXFN(_ldgot.\got)
134 mov \got, [esp]
135 ret
136 ENDAUXFN
137 call _ldgot.\got
138 add \got, offset _GLOBAL_OFFSET_TABLE_
139 #endif
140 .endm
141
142 // Load address of external symbol ADDR into REG, maybe using GOT.
143 .macro leaext reg, addr, got=GOTREG
144 #if WANT_PIC
145 # if CPUFAM_X86
146 mov \reg, [\got + \addr@GOT]
147 # endif
148 # if CPUFAM_AMD64
149 mov \reg, \addr@GOTPCREL[rip]
150 # endif
151 #else
152 # if CPUFAM_X86
153 mov \reg, offset \addr
154 # endif
155 # if CPUFAM_AMD64
156 lea \reg, \addr[rip]
157 # endif
158 #endif
159 .endm
160
161 // Address expression (possibly using a base register, and a displacement)
162 // referring to ADDR, which is within our module, maybe using GOT.
163 #define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
164 #define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
165 #if CPUFAM_AMD64
166 # define INTADDR__1(addr, got) addr + rip
167 #elif WANT_PIC
168 # define INTADDR__1(addr, got) got + addr@GOTOFF
169 #else
170 # define INTADDR__1(addr, got) addr
171 #endif
172
173 // Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
174 // suitable for use in `pshufd' or `shufpd', which copies element D
175 // (0 <= D < 4) of the source to element 3 of the destination, element C to
176 // element 2, element B to element 1, and element A to element 0.
177 #define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
178
179 #endif
180
181 #if CPUFAM_X86
182
183 .macro _reg.0
184 // Stash GP registers and establish temporary stack frame.
185 pushfd
186 push eax
187 push ecx
188 push edx
189 push ebp
190 mov ebp, esp
191 and esp, ~15
192 sub esp, 512
193 fxsave [esp]
194 .endm
195
196 .macro _reg.1
197 .endm
198
199 .macro _reg.2
200 .endm
201
202 .macro _reg.3 fmt
203 // Print FMT and the other established arguments.
204 lea eax, .L$_reg$msg.\@
205 push eax
206 call printf
207 jmp .L$_reg$cont.\@
208 .L$_reg$msg.\@:
209 .ascii ";; \fmt\n\0"
210 .L$_reg$cont.\@:
211 mov eax, ebp
212 and eax, ~15
213 sub eax, 512
214 fxrstor [eax]
215 mov esp, ebp
216 pop ebp
217 pop edx
218 pop ecx
219 pop eax
220 popfd
221 .endm
222
223 .macro msg msg
224 _reg.0
225 _reg.1
226 _reg.2
227 _reg.3 "\msg"
228 .endm
229
230 .macro reg r, msg
231 _reg.0
232 .ifeqs "\r", "esp"
233 lea eax, [ebp + 20]
234 push eax
235 .else
236 .ifeqs "\r", "ebp"
237 push [ebp]
238 .else
239 push \r
240 .endif
241 .endif
242 _reg.1
243 _reg.2
244 _reg.3 "\msg: \r = %08x"
245 .endm
246
247 .macro xmmreg r, msg
248 _reg.0
249 _reg.1
250 _reg.2
251 movdqu xmm0, \r
252 pshufd xmm0, xmm0, 0x1b
253 sub esp, 16
254 movdqa [esp], xmm0
255 _reg.3 "\msg: \r = %08x %08x %08x %08x"
256 .endm
257
258 .macro mmreg r, msg
259 _reg.0
260 _reg.1
261 _reg.2
262 pshufw \r, \r, 0x4e
263 sub esp, 8
264 movq [esp], \r
265 _reg.3 "\msg: \r = %08x %08x"
266 .endm
267
268 .macro freg i, msg
269 _reg.0
270 _reg.1
271 _reg.2
272 finit
273 fldt [esp + 32 + 16*\i]
274 sub esp, 12
275 fstpt [esp]
276 _reg.3 "\msg: st(\i) = %.20Lg"
277 .endm
278
279 .macro fxreg i, msg
280 _reg.0
281 _reg.1
282 _reg.2
283 finit
284 fldt [esp + 32 + 16*\i]
285 sub esp, 12
286 fstpt [esp]
287 _reg.3 "\msg: st(\i) = %La"
288 .endm
289
290 #endif
291
292 ///--------------------------------------------------------------------------
293 /// ARM-specific hacking.
294
295 #if CPUFAM_ARMEL
296
297 // ARM/Thumb mode things. Use ARM by default.
298 #define ARM .arm; .L$_pcoff = 8
299 #define THUMB .thumb; .L$_pcoff = 4
300 ARM
301
302 // Set the function hooks.
303 #define FUNC_PREHOOK(_) .balign 4
304 #define ENDFUNC_HOOK(name) .ltorg
305
306 // Call external subroutine at ADDR, possibly via PLT.
307 .macro callext addr, cond=
308 #if WANT_PIC
309 bl\cond \addr(PLT)
310 #else
311 bl\cond \addr
312 #endif
313 .endm
314
315 // Do I need to arrange a spare GOT register?
316 #if WANT_PIC
317 # define NEED_GOT 1
318 #endif
319 #define GOTREG r9
320
321 // Maybe load GOT address into GOT.
322 .macro ldgot cond=, got=GOTREG
323 #if WANT_PIC
324 ldr\cond \got, .L$_ldgot$\@
325 .L$_ldgot_pc$\@:
326 add\cond \got, pc, \got
327 _LIT
328 .balign 4
329 .L$_ldgot$\@:
330 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
331 _ENDLIT
332 #endif
333 .endm
334
335 // Load address of external symbol ADDR into REG, maybe using GOT.
336 .macro leaext reg, addr, cond=, got=GOTREG
337 #if WANT_PIC
338 ldr\cond \reg, .L$_leaext$\@
339 ldr\cond \reg, [\got, \reg]
340 _LIT
341 .balign 4
342 .L$_leaext$\@:
343 .word \addr(GOT)
344 _ENDLIT
345 #else
346 ldr\cond \reg, =\addr
347 #endif
348 .endm
349
350 // Load address of external symbol ADDR into REG directly.
351 .macro leaextq reg, addr, cond=
352 #if WANT_PIC
353 ldr\cond \reg, .L$_leaextq$\@
354 .L$_leaextq_pc$\@:
355 .if .L$_pcoff == 8
356 ldr\cond \reg, [pc, \reg]
357 .else
358 add\cond \reg, pc
359 ldr\cond \reg, [\reg]
360 .endif
361 _LIT
362 .balign 4
363 .L$_leaextq$\@:
364 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
365 _ENDLIT
366 #else
367 ldr\cond \reg, =\addr
368 #endif
369 .endm
370
371 #endif
372
373 ///--------------------------------------------------------------------------
374 /// Final stuff.
375
376 // Default values for the various hooks.
377 #ifndef FUNC_PREHOOK
378 # define FUNC_PREHOOK(name)
379 #endif
380 #ifndef FUNC_POSTHOOK
381 # define FUNC_POSTHOOK(name)
382 #endif
383 #ifndef ENDFUNC_HOOK
384 # define ENDFUNC_HOOK(name)
385 #endif
386
387 #ifndef F
388 # define F(name) name
389 #endif
390
391 #ifndef TYPE_FUNC
392 # define TYPE_FUNC(name)
393 #endif
394
395 #ifndef SIZE_OBJ
396 # define SIZE_OBJ(name)
397 #endif
398
399 #if __ELF__ && defined(WANT_EXECUTABLE_STACK)
400 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
401 .popsection
402 #endif
403
404 ///----- That's all, folks --------------------------------------------------