base/ct.c: Better constant-time algorithms from /Hacker's Delight/.
[catacomb] / base / asm-common.h
CommitLineData
1a0c09c4
MW
1/// -*- mode: asm; asm-comment-char: ?/ -*-
2///
3/// Fancy SIMD implementation of Salsa20
4///
5/// (c) 2015 Straylight/Edgeware
6///
7
8///----- Licensing notice ---------------------------------------------------
9///
10/// This file is part of Catacomb.
11///
12/// Catacomb is free software; you can redistribute it and/or modify
13/// it under the terms of the GNU Library General Public License as
14/// published by the Free Software Foundation; either version 2 of the
15/// License, or (at your option) any later version.
16///
17/// Catacomb is distributed in the hope that it will be useful,
18/// but WITHOUT ANY WARRANTY; without even the implied warranty of
19/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20/// GNU Library General Public License for more details.
21///
22/// You should have received a copy of the GNU Library General Public
23/// License along with Catacomb; if not, write to the Free
24/// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25/// MA 02111-1307, USA.
26
27///--------------------------------------------------------------------------
28/// General definitions.
29
f8e509a9
MW
30// Some useful variables.
31 .L$_subsec = 0
32
33// Literal pools done the hard way.
34#define _LIT .text .L$_subsec + 1
35#define _ENDLIT .text .L$_subsec
6c54cbd3 36#define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
f8e509a9 37
645fcce0
MW
38// ELF section types.
39#if __ELF__
40# if CPUFAM_ARMEL
41# define _SECTTY(ty) %ty
42# else
43# define _SECTTY(ty) @ty
44# endif
45#endif
46
47// Section selection.
48#define TEXT .text .L$_subsec
49#if ABI_WIN
50# define RODATA .section .rdata, "dr"
51#elif __ELF__
52# define RODATA .section .rodata, "a", _SECTTY(progbits)
53#else
54# define RODATA TEXT
55#endif
56#define DATA .data
57
1a0c09c4
MW
58// Announcing an external function.
59#define FUNC(name) \
60 .globl F(name); \
61 TYPE_FUNC(name); \
8a1aa284 62 .macro ENDFUNC; _ENDFUNC(name); .endm; \
1a0c09c4
MW
63 FUNC_PREHOOK(name); \
64F(name): \
65 FUNC_POSTHOOK(name)
66
67// Marking the end of a function.
68#define _ENDFUNC(name) \
69 .purgem ENDFUNC; \
70 SIZE_OBJ(name); \
f8e509a9 71 ENDFUNC_HOOK(name); \
6c54cbd3 72 _LTORG
1a0c09c4 73
8ae4c946
MW
74// Make a helper function, if necessary.
75#define AUXFN(name) \
76 .ifndef .L$_auxfn_def.name; \
77 .text 7128; \
78 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
79 FUNC_PREHOOK(name); \
80name:
81#define _ENDAUXFN_TAIL(name) \
82 .purgem _ENDAUXFN; \
83 .text .L$_subsec; \
84 .L$_auxfn_def.name = 1
85#define ENDAUXFN _ENDAUXFN; .endif
86
1a0c09c4
MW
87///--------------------------------------------------------------------------
88/// ELF-specific hacking.
89
90#if __ELF__
91
92#if __PIC__ || __PIE__
93# define WANT_PIC 1
94#endif
95
96#define TYPE_FUNC(name) .type name, STT_FUNC
97
98#define SIZE_OBJ(name) .size name, . - name
99
100#endif
101
102///--------------------------------------------------------------------------
0f23f75f
MW
103/// Windows-specific hacking.
104
105#if ABI_WIN
1a0c09c4
MW
106
107#if CPUFAM_X86
0f23f75f
MW
108# define F(name) _##name
109#endif
110
111#endif
112
113///--------------------------------------------------------------------------
114/// x86- and amd64-specific hacking.
115///
116/// It's (slightly) easier to deal with both of these in one go.
117
118#if CPUFAM_X86 || CPUFAM_AMD64
1a0c09c4
MW
119
120// Set the function hooks.
121#define FUNC_PREHOOK(_) .balign 16
122
f71dd54d
MW
123// On Windows, arrange to install stack-unwinding data.
124#if CPUFAM_AMD64 && ABI_WIN
125# define FUNC_POSTHOOK(name) .seh_proc name
126# define ENDFUNC_HOOK(_) .seh_endproc
127// Procedures are expected to invoke `.seh_setframe' if necessary, and
128// `.seh_pushreg' and friends, and `.seh_endprologue'.
129#endif
130
1a0c09c4
MW
131// Don't use the wretched AT&T syntax. It's festooned with pointless
132// punctuation, and all of the data movement is backwards. Ugh!
133 .intel_syntax noprefix
134
135// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 136.macro callext addr
1a0c09c4
MW
137#if WANT_PIC
138 call \addr@PLT
139#else
140 call \addr
141#endif
8a1aa284 142.endm
1a0c09c4
MW
143
144// Do I need to arrange a spare GOT register?
145#if WANT_PIC && CPUFAM_X86
146# define NEED_GOT 1
147#endif
148#define GOTREG ebx // Not needed in AMD64 so don't care.
149
150// Maybe load GOT address into GOT.
8a1aa284 151.macro ldgot got=GOTREG
0f23f75f 152#if WANT_PIC && CPUFAM_X86
8ae4c946 153 AUXFN(_ldgot.\got)
1a0c09c4
MW
154 mov \got, [esp]
155 ret
8ae4c946
MW
156 ENDAUXFN
157 call _ldgot.\got
158 add \got, offset _GLOBAL_OFFSET_TABLE_
1a0c09c4 159#endif
8a1aa284 160.endm
1a0c09c4
MW
161
162// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 163.macro leaext reg, addr, got=GOTREG
1a0c09c4 164#if WANT_PIC
0f23f75f 165# if CPUFAM_X86
1a0c09c4 166 mov \reg, [\got + \addr@GOT]
0f23f75f
MW
167# endif
168# if CPUFAM_AMD64
169 mov \reg, \addr@GOTPCREL[rip]
170# endif
1a0c09c4 171#else
0f23f75f 172# if CPUFAM_X86
1a0c09c4 173 mov \reg, offset \addr
0f23f75f
MW
174# endif
175# if CPUFAM_AMD64
176 lea \reg, \addr[rip]
177# endif
1a0c09c4 178#endif
8a1aa284 179.endm
1a0c09c4
MW
180
181// Address expression (possibly using a base register, and a displacement)
182// referring to ADDR, which is within our module, maybe using GOT.
183#define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
184#define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
0f23f75f
MW
185#if CPUFAM_AMD64
186# define INTADDR__1(addr, got) addr + rip
187#elif WANT_PIC
1a0c09c4
MW
188# define INTADDR__1(addr, got) got + addr@GOTOFF
189#else
190# define INTADDR__1(addr, got) addr
191#endif
192
a13b5730
MW
193// Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
194// suitable for use in `pshufd' or `shufpd', which copies element D
195// (0 <= D < 4) of the source to element 3 of the destination, element C to
196// element 2, element B to element 1, and element A to element 0.
197#define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
198
1a0c09c4
MW
199#endif
200
17de5b2e
MW
201#if CPUFAM_X86
202
203.macro _reg.0
204 // Stash GP registers and establish temporary stack frame.
205 pushfd
206 push eax
207 push ecx
208 push edx
209 push ebp
210 mov ebp, esp
211 and esp, ~15
212 sub esp, 512
213 fxsave [esp]
214.endm
215
216.macro _reg.1
217.endm
218
219.macro _reg.2
220.endm
221
222.macro _reg.3 fmt
223 // Print FMT and the other established arguments.
224 lea eax, .L$_reg$msg.\@
225 push eax
226 call printf
227 jmp .L$_reg$cont.\@
228.L$_reg$msg.\@:
229 .ascii ";; \fmt\n\0"
230.L$_reg$cont.\@:
231 mov eax, ebp
232 and eax, ~15
233 sub eax, 512
234 fxrstor [eax]
235 mov esp, ebp
236 pop ebp
237 pop edx
238 pop ecx
239 pop eax
240 popfd
241.endm
242
243.macro msg msg
244 _reg.0
245 _reg.1
246 _reg.2
247 _reg.3 "\msg"
248.endm
249
250.macro reg r, msg
251 _reg.0
252 .ifeqs "\r", "esp"
253 lea eax, [ebp + 20]
254 push eax
255 .else
256 .ifeqs "\r", "ebp"
257 push [ebp]
258 .else
259 push \r
260 .endif
261 .endif
262 _reg.1
263 _reg.2
264 _reg.3 "\msg: \r = %08x"
265.endm
266
267.macro xmmreg r, msg
268 _reg.0
269 _reg.1
270 _reg.2
271 movdqu xmm0, \r
272 pshufd xmm0, xmm0, 0x1b
273 sub esp, 16
274 movdqa [esp], xmm0
275 _reg.3 "\msg: \r = %08x %08x %08x %08x"
276.endm
277
278.macro mmreg r, msg
279 _reg.0
280 _reg.1
281 _reg.2
282 pshufw \r, \r, 0x4e
283 sub esp, 8
284 movq [esp], \r
285 _reg.3 "\msg: \r = %08x %08x"
286.endm
287
288.macro freg i, msg
289 _reg.0
290 _reg.1
291 _reg.2
292 finit
293 fldt [esp + 32 + 16*\i]
294 sub esp, 12
295 fstpt [esp]
296 _reg.3 "\msg: st(\i) = %.20Lg"
297.endm
298
299.macro fxreg i, msg
300 _reg.0
301 _reg.1
302 _reg.2
303 finit
304 fldt [esp + 32 + 16*\i]
305 sub esp, 12
306 fstpt [esp]
307 _reg.3 "\msg: st(\i) = %La"
308.endm
309
310#endif
311
1a0c09c4 312///--------------------------------------------------------------------------
61bd904b
MW
313/// ARM-specific hacking.
314
59d86860 315#if CPUFAM_ARMEL
61bd904b 316
9f6eb05d
MW
317// ARM/Thumb mode things. Use ARM by default.
318#define ARM .arm; .L$_pcoff = 8
319#define THUMB .thumb; .L$_pcoff = 4
320 ARM
321
61bd904b
MW
322// Set the function hooks.
323#define FUNC_PREHOOK(_) .balign 4
324#define ENDFUNC_HOOK(name) .ltorg
325
326// Call external subroutine at ADDR, possibly via PLT.
8a1aa284 327.macro callext addr, cond=
61bd904b
MW
328#if WANT_PIC
329 bl\cond \addr(PLT)
330#else
331 bl\cond \addr
332#endif
8a1aa284 333.endm
61bd904b
MW
334
335// Do I need to arrange a spare GOT register?
336#if WANT_PIC
337# define NEED_GOT 1
338#endif
339#define GOTREG r9
340
341// Maybe load GOT address into GOT.
8a1aa284 342.macro ldgot cond=, got=GOTREG
61bd904b 343#if WANT_PIC
adca2a18
MW
344 ldr\cond \got, .L$_ldgot$\@
345.L$_ldgot_pc$\@:
2d03a881 346 add\cond \got, pc, \got
8a1aa284 347 _LIT
adca2a18
MW
348 .balign 4
349.L$_ldgot$\@:
9f6eb05d 350 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
8a1aa284 351 _ENDLIT
61bd904b 352#endif
8a1aa284 353.endm
61bd904b
MW
354
355// Load address of external symbol ADDR into REG, maybe using GOT.
8a1aa284 356.macro leaext reg, addr, cond=, got=GOTREG
61bd904b 357#if WANT_PIC
adca2a18 358 ldr\cond \reg, .L$_leaext$\@
2d03a881 359 ldr\cond \reg, [\got, \reg]
8a1aa284 360 _LIT
adca2a18
MW
361 .balign 4
362.L$_leaext$\@:
363 .word \addr(GOT)
8a1aa284 364 _ENDLIT
61bd904b 365#else
2d03a881 366 ldr\cond \reg, =\addr
61bd904b 367#endif
8a1aa284 368.endm
61bd904b 369
0c53ac58 370// Load address of external symbol ADDR into REG directly.
8a1aa284 371.macro leaextq reg, addr, cond=
0c53ac58
MW
372#if WANT_PIC
373 ldr\cond \reg, .L$_leaextq$\@
374.L$_leaextq_pc$\@:
9f6eb05d 375 .if .L$_pcoff == 8
0c53ac58 376 ldr\cond \reg, [pc, \reg]
9f6eb05d
MW
377 .else
378 add\cond \reg, pc
379 ldr\cond \reg, [\reg]
380 .endif
8a1aa284 381 _LIT
0c53ac58
MW
382 .balign 4
383.L$_leaextq$\@:
9f6eb05d 384 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
8a1aa284 385 _ENDLIT
0c53ac58
MW
386#else
387 ldr\cond \reg, =\addr
388#endif
8a1aa284 389.endm
0c53ac58 390
61bd904b
MW
391#endif
392
393///--------------------------------------------------------------------------
1a0c09c4
MW
394/// Final stuff.
395
396// Default values for the various hooks.
397#ifndef FUNC_PREHOOK
398# define FUNC_PREHOOK(name)
399#endif
400#ifndef FUNC_POSTHOOK
401# define FUNC_POSTHOOK(name)
402#endif
403#ifndef ENDFUNC_HOOK
404# define ENDFUNC_HOOK(name)
405#endif
406
407#ifndef F
408# define F(name) name
409#endif
410
411#ifndef TYPE_FUNC
412# define TYPE_FUNC(name)
413#endif
414
415#ifndef SIZE_OBJ
416# define SIZE_OBJ(name)
417#endif
418
1aa5bfa8
MW
419#if __ELF__ && defined(WANT_EXECUTABLE_STACK)
420 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
421 .popsection
422#endif
423
1a0c09c4 424///----- That's all, folks --------------------------------------------------