xchg.S: Another couple of (easy) exercises.

author Mark Wooding <mdw@distorted.org.uk>

Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)

committer Mark Wooding <mdw@distorted.org.uk>

Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)
author Mark Wooding <mdw@distorted.org.uk>
Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)
committer Mark Wooding <mdw@distorted.org.uk>
Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)
diff --git a/xchg.S b/xchg.S

index c99e0a6..40c5ba9 100644 (file)
--- a/xchg.S
+++ b/xchg.S
@@ -2750,50 +2750,101 @@ endproc
  
  proc   x26
  
+       // a bad way to rotate a right by 7 places
+
  #if defined(__x86_64__)
  
-       notimpl
+       mov     rbx, rax
+       ror     rbx, 7                  // better
+
+       mov     rdx, rax                // d' = a
+       shr     rax, 7                  // a' = a >> 7
+       shl     rdx, 0x39               // d' = a << 57
+       or      rax, rdx                // a' = a >>> 7
  
  #elif defined(__i386__)
  
-       notimpl
+       mov     ebx, eax
+       ror     ebx, 7                  // better
+
+       mov     edx, eax                // d' = a
+       shr     eax, 7                  // a' = a >> 7
+       shl     edx, 0x39               // d' = a << 57
+       or      eax, edx                // a' = a >>> 7
  
  #elif defined(__arm__)
  
-       notimpl
+       mov     r1, r0, ror #7          // easy way
+
+       // even the hard way is fairly easy on arm
+       mov     r3, r0, lsl #25
+       orr     r0, r3, r0, lsr #7      // hard way
  
  #elif defined(__aarch64__)
  
-       notimpl
+       ror     x1, x0, #7              // easy way
+
+       // even the hard way is fairly easy on arm
+       lsl     x3, x0, #57
+       orr     x0, x3, x0, lsr #7      // hard way
  
  #else
         notimpl
  #endif
  
+       ret
+
  endproc
  
  proc   x27
  
+       // shift a right by c places, in two halves
+
  #if defined(__x86_64__)
  
-       notimpl
+       mov     ch, cl                  // c' = [c, c]
+       inc     ch                      // c' = [c, c + 1]
+       shr     ch, 1
+       shr     cl, 1                   // c' = [floor(c/2), ceil(c/2)]
+       shr     rax, cl
+       xchg    ch, cl
+       shr     rax, cl
  
  #elif defined(__i386__)
  
-       notimpl
+       mov     ch, cl                  // c' = [c, c]
+       inc     ch                      // c' = [c, c + 1]
+       shr     ch, 1
+       shr     cl, 1                   // c' = [floor(c/2), ceil(c/2)]
+       shr     eax, cl
+       xchg    ch, cl
+       shr     eax, cl
  
  #elif defined(__arm__)
  
-       notimpl
+       // it would be clearer and more efficient to say: `mov r12, r2, lsr
+       // #1; sub r2, r2, r12', but that's not the lesson this exercise is
+       // trying to teach.
+       add     r12, r2, #1
+       mov     r2, r2, lsr #1
+       mov     r12, r12, lsr #1
+       mov     r0, r0, lsr r2
+       mov     r0, r0, lsr r12
  
  #elif defined(__aarch64__)
  
-       notimpl
+       add     w16, w2, #1
+       lsr     w2, w2, #1
+       lsr     w16, w16, #1
+       lsr     x0, x0, x2
+       lsr     x0, x0, x16
  
  #else
         notimpl
  #endif
  
+       ret
+
  endproc
  
  proc   x28
author	Mark Wooding <mdw@distorted.org.uk>
	Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)
committer	Mark Wooding <mdw@distorted.org.uk>
	Mon, 19 Oct 2020 04:35:37 +0000 (05:35 +0100)