1 /// -*- mode: asm; asm-comment-char: ?/ -*-
3 /// Fancy SIMD implementation of Salsa20
5 /// (c) 2015 Straylight/Edgeware
8 ///----- Licensing notice ---------------------------------------------------
10 /// This file is part of Catacomb.
12 /// Catacomb is free software; you can redistribute it and/or modify
13 /// it under the terms of the GNU Library General Public License as
14 /// published by the Free Software Foundation; either version 2 of the
15 /// License, or (at your option) any later version.
17 /// Catacomb is distributed in the hope that it will be useful,
18 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 /// GNU Library General Public License for more details.
22 /// You should have received a copy of the GNU Library General Public
23 /// License along with Catacomb; if not, write to the Free
24 /// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 /// MA 02111-1307, USA.
27 ///--------------------------------------------------------------------------
28 /// General definitions.
30 // Preprocessor hacks.
31 #define STRINGY(x) _STRINGY(x, y)
32 #define _STRINGY(x) #x
33 #define GLUE(x, y) _GLUE(x, y)
34 #define _GLUE(x, y) x##y
37 // Some useful variables.
40 // Literal pools done the hard way.
41 #define _LIT .text .L$_subsec + 1
42 #define _ENDLIT .text .L$_subsec
43 #define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
48 # define _SECTTY(ty) %ty
50 # define _SECTTY(ty) @ty
55 #define TEXT .text .L$_subsec
57 # define RODATA .section .rdata, "dr"
59 # define RODATA .section .rodata, "a", _SECTTY(progbits)
65 // Announcing an external function.
69 .macro ENDFUNC; _ENDFUNC(name); .endm; \
74 // Marking the end of a function.
75 #define _ENDFUNC(name) \
81 // Make a helper function, if necessary.
83 .ifndef .L$_auxfn_def.name; \
85 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
88 #define _ENDAUXFN_TAIL(name) \
91 .L$_auxfn_def.name = 1
92 #define ENDAUXFN _ENDAUXFN; .endif
94 ///--------------------------------------------------------------------------
95 /// ELF-specific hacking.
99 #if __PIC__ || __PIE__
103 #define TYPE_FUNC(name) .type name, STT_FUNC
105 #define SIZE_OBJ(name) .size name, . - name
109 ///--------------------------------------------------------------------------
110 /// Windows-specific hacking.
115 # define F(name) _##name
120 ///--------------------------------------------------------------------------
121 /// x86- and amd64-specific hacking.
123 /// It's (slightly) easier to deal with both of these in one go.
125 #if CPUFAM_X86 || CPUFAM_AMD64
127 // Set the function hooks.
128 #define FUNC_PREHOOK(_) .balign 16
130 // On Windows, arrange to install stack-unwinding data.
131 #if CPUFAM_AMD64 && ABI_WIN
132 # define FUNC_POSTHOOK(name) .seh_proc name
133 # define ENDFUNC_HOOK(_) .seh_endproc
134 // Procedures are expected to invoke `.seh_setframe' if necessary, and
135 // `.seh_pushreg' and friends, and `.seh_endprologue'.
138 // Don't use the wretched AT&T syntax. It's festooned with pointless
139 // punctuation, and all of the data movement is backwards. Ugh!
140 .intel_syntax noprefix
142 // Call external subroutine at ADDR, possibly via PLT.
151 // Do I need to arrange a spare GOT register?
152 #if WANT_PIC && CPUFAM_X86
155 #define GOTREG ebx // Not needed in AMD64 so don't care.
157 // Maybe load GOT address into GOT.
158 .macro ldgot got
=GOTREG
159 #if WANT_PIC && CPUFAM_X86
165 add \got
, offset _GLOBAL_OFFSET_TABLE_
169 // Load address of external symbol ADDR into REG, maybe using GOT.
170 .macro leaext reg
, addr
, got
=GOTREG
173 mov
\reg
, [\got
+ \addr@GOT
]
176 mov
\reg
, \addr@GOTPCREL
[rip
]
180 mov
\reg
, offset
\addr
188 // Address expression (possibly using a base register, and a displacement)
189 // referring to ADDR, which is within our module, maybe using GOT.
190 #define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
191 #define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
193 # define INTADDR__1(addr, got) addr + rip
195 # define INTADDR__1(addr, got) got + addr@GOTOFF
197 # define INTADDR__1(addr, got) addr
200 // Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
201 // suitable for use in `pshufd' or `shufpd', which copies element D
202 // (0 <= D < 4) of the source to element 3 of the destination, element C to
203 // element 2, element B to element 1, and element A to element 0.
204 #define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
211 // Stash GP registers and establish temporary stack frame.
230 // Print FMT and the other established arguments.
231 lea eax
, .L$_reg$msg
.\@
271 _reg
.3 "\msg: \r = %08x"
279 pshufd xmm0
, xmm0
, 0x1b
282 _reg
.3 "\msg: \r = %08x %08x %08x %08x"
292 _reg
.3 "\msg: \r = %08x %08x"
300 fldt
[esp
+ 32 + 16*\i
]
303 _reg
.3 "\msg: st(\i) = %.20Lg"
311 fldt
[esp
+ 32 + 16*\i
]
314 _reg
.3 "\msg: st(\i) = %La"
319 ///--------------------------------------------------------------------------
320 /// ARM-specific hacking.
324 // ARM/Thumb mode things. Use ARM by default.
325 #define ARM .arm; .L$_pcoff = 8
326 #define THUMB .thumb; .L$_pcoff = 4
329 // Set the function hooks.
330 #define FUNC_PREHOOK(_) .balign 4
331 #define ENDFUNC_HOOK(name) .ltorg
333 // Call external subroutine at ADDR, possibly via PLT.
334 .macro callext addr
, cond
=
342 // Do I need to arrange a spare GOT register?
348 // Maybe load GOT address into GOT.
349 .macro ldgot cond
=, got
=GOTREG
351 ldr\cond \got
, .L$_ldgot$\@
353 add\cond \got
, pc
, \got
357 .word _GLOBAL_OFFSET_TABLE_
- .L$_ldgot_pc$\@
- .L$_pcoff
362 // Load address of external symbol ADDR into REG, maybe using GOT.
363 .macro leaext reg
, addr
, cond
=, got
=GOTREG
365 ldr\cond
\reg
, .L$_leaext$\@
366 ldr\cond
\reg
, [\got
, \reg
]
373 ldr\cond
\reg
, =\addr
377 // Load address of external symbol ADDR into REG directly.
378 .macro leaextq reg
, addr
, cond
=
380 ldr\cond
\reg
, .L$_leaextq$\@
383 ldr\cond
\reg
, [pc
, \reg
]
386 ldr\cond
\reg
, [\reg
]
391 .word
\addr
(GOT_PREL
) + (. - .L$_leaextq_pc$\@
- .L$_pcoff
)
394 ldr\cond
\reg
, =\addr
400 ///--------------------------------------------------------------------------
403 // Default values for the various hooks.
405 # define FUNC_PREHOOK(name)
407 #ifndef FUNC_POSTHOOK
408 # define FUNC_POSTHOOK(name)
411 # define ENDFUNC_HOOK(name)
415 # define F(name) name
419 # define TYPE_FUNC(name)
423 # define SIZE_OBJ(name)
426 #if __ELF__ && defined(WANT_EXECUTABLE_STACK)
427 .pushsection
.note
.GNU
-stack
, "", _SECTTY(progbits
)
431 ///----- That's all, folks --------------------------------------------------