/// MA 02111-1307, USA.
///--------------------------------------------------------------------------
-/// External definitions.
+/// Preliminaries.
#include "config.h"
#include "asm-common.h"
- .globl F(abort)
- .globl F(rijndael_rcon)
+ .arch armv8-a
+ .fpu crypto-neon-fp-armv8
+
+ .extern F(abort)
+ .extern F(rijndael_rcon)
+
+ .text
///--------------------------------------------------------------------------
/// Main code.
- .arch armv8-a
- .fpu crypto-neon-fp-armv8
-
/// The ARM crypto extension implements a little-endian version of AES
/// (though the manual doesn't actually spell this out and you have to
/// experiment), but Catacomb's internal interface presents as big-endian so
// r2 = pointer to key material
// r3 = key size in words
- stmfd sp!, {r4-r9, r14}
+ pushreg r4-r9, r14
// The initial round key material is taken directly from the input
// key, so copy it over. Unfortunately, the key material is not
mla r2, r1, r7, r1 // total key size in words
leaextq r5, rijndael_rcon // round constants
sub r8, r2, r3 // minus what we've copied already
- veor q1, q1 // all-zero register for the key
+ vmov.i32 q1, #0 // all-zero register for the key
add r8, r9, r8, lsl #2 // limit of the key buffer
mov r12, #0 // position in current cycle
// First word of the cycle. Byte substitution, rotation, and round
// constant.
1: ldrb r14, [r5], #1 // next round constant
- ldr r6, [r9, -r3, lsl #2]
vdup.32 q0, r4
aese.8 q0, q1 // effectively, just SubBytes
vmov.32 r4, d0[0]
bl endswap_block
// All done.
-9: ldmfd sp!, {r4-r9, pc}
+9: popreg r4-r9, pc
-endswap_block:
+ENDFUNC
+
+INTFUNC(endswap_block)
// End-swap R2 words starting at R1. R1 is clobbered; R2 is not.
// It's OK to work in 16-byte chunks.
+
mov r4, r2
0: vldmia r1, {d0, d1}
vrev32.8 q0, q0