// the byte substitution.
dup v0.4s, w14
aese v0.16b, v1.16b // effectively, just SubBytes
- mov w14, v0.4s[0]
+ mov w14, v0.s[0]
b 2f
// First word of the cycle. Byte substitution, rotation, and round
1: ldrb w13, [x5], #1 // next round constant
dup v0.4s, w14
aese v0.16b, v1.16b // effectively, just SubBytes
- mov w14, v0.4s[0]
+ mov w14, v0.s[0]
eor w14, w13, w14, ror #8
// Common ending: mix in the word from the previous cycle and store.
sub x6, x6, #1
cmp x8, x3
cbz x6, 9f
- csel x8, x8, xzr, cc
+ cmov.cs x8, xzr
b 0b
// Next job is to construct the decryption keys. The keys for the