stp x23, x24, [sp, #48]
stp x25, x26, [sp, #64]
stp x27, x28, [sp, #80]
- str x1, [sp, #96]
-
- mov x16, x0
-
- ldr x17, [x1, #128]
+ str x1, [sp, #104]
+
+ ldp x29, x30, [x1, #224]
+ msr nzcv, x30
+ mov x30, x0
+ ldp x27, x28, [x1, #208]
+ ldp x25, x26, [x1, #192]
+ ldp x23, x24, [x1, #176]
+ ldp x21, x22, [x1, #160]
+ ldp x19, x20, [x1, #144]
+ ldp x16, x17, [x1, #128]
ldp x14, x15, [x1, #112]
ldp x12, x13, [x1, #96]
ldp x10, x11, [x1, #80]
ldp x4, x5, [x1, #32]
ldp x2, x3, [x1, #16]
ldp x0, x1, [x1, #0]
- msr nzcv, x17
-
- blr x16
-
- ldr x16, [sp, #96]
- mrs x17, nzcv
- str x17, [x16, #128]
- stp x14, x15, [x16, #112]
- stp x12, x13, [x16, #96]
- stp x10, x11, [x16, #80]
- stp x8, x9, [x16, #64]
- stp x6, x7, [x16, #48]
- stp x4, x5, [x16, #32]
- stp x2, x3, [x16, #16]
- stp x0, x1, [x16, #0]
+
+ blr x30
+
+ ldr x30, [sp, #104]
+ stp x27, x28, [x30, #208]
+ stp x25, x26, [x30, #192]
+ stp x23, x24, [x30, #176]
+ stp x21, x22, [x30, #160]
+ stp x19, x20, [x30, #144]
+ stp x16, x17, [x30, #128]
+ stp x14, x15, [x30, #112]
+ stp x12, x13, [x30, #96]
+ stp x10, x11, [x30, #80]
+ stp x8, x9, [x30, #64]
+ stp x6, x7, [x30, #48]
+ stp x4, x5, [x30, #32]
+ stp x2, x3, [x30, #16]
+ stp x0, x1, [x30, #0]
+ mov x0, x30
+ mrs x30, nzcv
+ stp x29, x30, [x0, #224]
ldp x19, x20, [sp, #16]
ldp x21, x22, [sp, #32]
proc x26
+ // a bad way to rotate a right by 7 places
+
#if defined(__x86_64__)
- notimpl
+ mov rbx, rax
+ ror rbx, 7 // better
+
+ mov rdx, rax // d' = a
+ shr rax, 7 // a' = a >> 7
+ shl rdx, 0x39 // d' = a << 57
+ or rax, rdx // a' = a >>> 7
#elif defined(__i386__)
- notimpl
+ mov ebx, eax
+ ror ebx, 7 // better
+
+ mov edx, eax // d' = a
+ shr eax, 7 // a' = a >> 7
+ shl edx, 0x39 // d' = a << 57
+ or eax, edx // a' = a >>> 7
#elif defined(__arm__)
- notimpl
+ mov r1, r0, ror #7 // easy way
+
+ // even the hard way is fairly easy on arm
+ mov r3, r0, lsl #25
+ orr r0, r3, r0, lsr #7 // hard way
#elif defined(__aarch64__)
- notimpl
+ ror x1, x0, #7 // easy way
+
+ // even the hard way is fairly easy on arm
+ lsl x3, x0, #57
+ orr x0, x3, x0, lsr #7 // hard way
#else
notimpl
#endif
+ ret
+
endproc
proc x27
+ // shift a right by c places, in two halves
+
#if defined(__x86_64__)
- notimpl
+ mov ch, cl // c' = [c, c]
+ inc ch // c' = [c, c + 1]
+ shr ch, 1
+ shr cl, 1 // c' = [floor(c/2), ceil(c/2)]
+ shr rax, cl
+ xchg ch, cl
+ shr rax, cl
#elif defined(__i386__)
- notimpl
+ mov ch, cl // c' = [c, c]
+ inc ch // c' = [c, c + 1]
+ shr ch, 1
+ shr cl, 1 // c' = [floor(c/2), ceil(c/2)]
+ shr eax, cl
+ xchg ch, cl
+ shr eax, cl
#elif defined(__arm__)
- notimpl
+ // it would be clearer and more efficient to say: `mov r12, r2, lsr
+ // #1; sub r2, r2, r12', but that's not the lesson this exercise is
+ // trying to teach.
+ add r12, r2, #1
+ mov r2, r2, lsr #1
+ mov r12, r12, lsr #1
+ mov r0, r0, lsr r2
+ mov r0, r0, lsr r12
#elif defined(__aarch64__)
- notimpl
+ add w16, w2, #1
+ lsr w2, w2, #1
+ lsr w16, w16, #1
+ lsr x0, x0, x2
+ lsr x0, x0, x16
#else
notimpl
#endif
+ ret
+
endproc
proc x28