X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/blobdiff_plain/1a517bb3785891ff6940c73af7c5a136d0250ebf..8f2287ef5c05d496fcb9b012629af007fe56f897:/symm/rijndael-arm-crypto.S diff --git a/symm/rijndael-arm-crypto.S b/symm/rijndael-arm-crypto.S index 4d7312d4..1df81d97 100644 --- a/symm/rijndael-arm-crypto.S +++ b/symm/rijndael-arm-crypto.S @@ -25,20 +25,22 @@ /// MA 02111-1307, USA. ///-------------------------------------------------------------------------- -/// External definitions. +/// Preliminaries. #include "config.h" #include "asm-common.h" + .arch armv8-a + .fpu crypto-neon-fp-armv8 + .extern F(abort) .extern F(rijndael_rcon) + .text + ///-------------------------------------------------------------------------- /// Main code. - .arch armv8-a - .fpu crypto-neon-fp-armv8 - /// The ARM crypto extension implements a little-endian version of AES /// (though the manual doesn't actually spell this out and you have to /// experiment), but Catacomb's internal interface presents as big-endian so @@ -70,7 +72,7 @@ FUNC(rijndael_setup_arm_crypto) // r2 = pointer to key material // r3 = key size in words - stmfd sp!, {r4-r9, r14} + pushreg r4-r9, r14 // The initial round key material is taken directly from the input // key, so copy it over. Unfortunately, the key material is not @@ -104,7 +106,7 @@ FUNC(rijndael_setup_arm_crypto) mla r2, r1, r7, r1 // total key size in words leaextq r5, rijndael_rcon // round constants sub r8, r2, r3 // minus what we've copied already - veor q1, q1 // all-zero register for the key + vmov.i32 q1, #0 // all-zero register for the key add r8, r9, r8, lsl #2 // limit of the key buffer mov r12, #0 // position in current cycle @@ -128,7 +130,6 @@ FUNC(rijndael_setup_arm_crypto) // First word of the cycle. Byte substitution, rotation, and round // constant. 1: ldrb r14, [r5], #1 // next round constant - ldr r6, [r9, -r3, lsl #2] vdup.32 q0, r4 aese.8 q0, q1 // effectively, just SubBytes vmov.32 r4, d0[0] @@ -209,7 +210,7 @@ FUNC(rijndael_setup_arm_crypto) bl endswap_block // All done. -9: ldmfd sp!, {r4-r9, pc} +9: popreg r4-r9, pc ENDFUNC