## Base definitions for assembler source.
EXTRA_DIST += asm-common.h
+if ASM_DEBUG
+libbase_la_SOURCES += regdump.c regdump.h
+if CPUFAM_X86
+libbase_la_SOURCES += regdump-x86ish.S
+endif
+if CPUFAM_AMD64
+libbase_la_SOURCES += regdump-x86ish.S
+endif
+if CPUFAM_ARMEL
+libbase_la_SOURCES += regdump-arm.S
+endif
+if CPUFAM_ARM64
+libbase_la_SOURCES += regdump-arm64.S
+endif
+endif
+
###----- That's all, folks --------------------------------------------------
#endif
-#if CPUFAM_X86
-
-.macro _reg.0
- // Stash GP registers and establish temporary stack frame.
- pushfd
- push eax
- push ecx
- push edx
- push ebp
- mov ebp, esp
- and esp, ~15
- sub esp, 512
- fxsave [esp]
-.endm
-
-.macro _reg.1
-.endm
-
-.macro _reg.2
-.endm
-
-.macro _reg.3 fmt
- // Print FMT and the other established arguments.
- lea eax, .L$_reg$msg.\@
- push eax
- call printf
- jmp .L$_reg$cont.\@
-.L$_reg$msg.\@:
- .ascii ";; \fmt\n\0"
-.L$_reg$cont.\@:
- mov eax, ebp
- and eax, ~15
- sub eax, 512
- fxrstor [eax]
- mov esp, ebp
- pop ebp
- pop edx
- pop ecx
- pop eax
- popfd
-.endm
-
-.macro msg msg
- _reg.0
- _reg.1
- _reg.2
- _reg.3 "\msg"
-.endm
-
-.macro reg r, msg
- _reg.0
- .ifeqs "\r", "esp"
- lea eax, [ebp + 20]
- push eax
- .else
- .ifeqs "\r", "ebp"
- push [ebp]
- .else
- push \r
- .endif
- .endif
- _reg.1
- _reg.2
- _reg.3 "\msg: \r = %08x"
-.endm
-
-.macro xmmreg r, msg
- _reg.0
- _reg.1
- _reg.2
- movdqu xmm0, \r
- pshufd xmm0, xmm0, 0x1b
- sub esp, 16
- movdqa [esp], xmm0
- _reg.3 "\msg: \r = %08x %08x %08x %08x"
-.endm
-
-.macro mmreg r, msg
- _reg.0
- _reg.1
- _reg.2
- pshufw \r, \r, 0x4e
- sub esp, 8
- movq [esp], \r
- _reg.3 "\msg: \r = %08x %08x"
-.endm
-
-.macro freg i, msg
- _reg.0
- _reg.1
- _reg.2
- finit
- fldt [esp + 32 + 16*\i]
- sub esp, 12
- fstpt [esp]
- _reg.3 "\msg: st(\i) = %.20Lg"
-.endm
-
-.macro fxreg i, msg
- _reg.0
- _reg.1
- _reg.2
- finit
- fldt [esp + 32 + 16*\i]
- sub esp, 12
- fstpt [esp]
- _reg.3 "\msg: st(\i) = %La"
-.endm
-
-#endif
-
///--------------------------------------------------------------------------
/// ARM-specific hacking.
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Register dump and debugging for 32-bit ARM
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .arch armv7-a
+ .fpu neon
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Main code.
+
+FUNC(regdump_gpsave)
+ endprologue
+ // On entry, r13 should point to `REGDUMP_GPSIZE' bytes of
+ // word-aligned storage to be the general-purpose save area, with r12
+ // and r14 already saved. On exit, the initial registers are saved
+ // in this space, and modified: r4 points to the general-purpose save
+ // area, r6 holds the focus address (possibly already saved), r0
+ // contains the number of bytes required in the extended save area,
+ // and other general-purpose registers are clobbered or used to
+ // communicate with `regdump_xtsave' below. Doing anything other
+ // than lowering the stack pointer and calling `regdump_xtsave' is
+ // not recommended.
+
+ // Save the easy registers.
+ stmia r13, {r0-r11}
+ mov r4, r13
+
+ // Determine the previous stack pointer and save it.
+ add r0, r4, #REGDUMP_GPSIZE
+ str r0, [r4, #13*4]
+
+ // Capture the status flags and return address. If the return
+ // address has its low bit set, then the caller was in Thumb state:
+ // clear the bit from the reconstructed PC, and set the corresponding
+ // CPSR bit.
+ mrs r0, cpsr
+ tst r14, #1
+ bic r1, r14, #1
+ orrne r0, r0, #0x00000020
+ str r0, [r13, #4*REGIX_CPSR]
+ str r1, [r13, #15*4]
+
+ // Load the focus address and save it as r6.
+ ldr r6, [r4, #4*REGIX_ADDR]
+
+ // Determine the extended save area size.
+ ldgot
+ mov r0, #8 + 8
+ leaext r12, regdump__flags
+ ldr r12, [r12]
+ tst r12, #REGF_VFP
+ addne r0, r0, #REGDUMP_FPSIZE_D16
+ tstne r12, #REGF_D32
+ addne r0, r0, #REGDUMP_FPSIZE_D32 - REGDUMP_FPSIZE_D16
+
+ // Done.
+ bx r14
+
+ENDFUNC
+
+FUNC(regdump_gprstr)
+ endprologue
+ // On entry, r4 points to a general-purpose save area, established by
+ // `regdump_gpsave'. On exit, the general-purpose registers (other
+ // than r13 and r14) are restored to their original values.
+
+ // Restore the processor flags.
+ ldr r0, [r4, #4*REGIX_CPSR]
+ msr cpsr_fs, r0
+
+ // Load the easy registers.
+ ldmia r4, {r0-r12}
+
+ // Done.
+ bx r14
+
+ENDFUNC
+
+FUNC(regdump_xtsave)
+ endprologue
+ // On entry, r13 points to an extended save area, of size determined
+ // by `regdump_gpsave' above. On exit, the save area is filled in
+ // and a handy map placed at its base.
+
+ // Set up the map/extended save area pointer.
+ add r5, r13, #7
+ bic r5, r5, #7
+
+ // Start by filling in the easy part of the map.
+ str r4, [r5, #regmap_gp]
+
+ // Fetch the flags explaining what to do.
+ ldgot
+ leaext r12, regdump__flags
+ ldr r12, [r12]
+
+ // Figure out whether there are VFP/NEON registers.
+ tst r12, #REGF_VFP
+ moveq r3, #0
+ addne r3, r5, #regmap_size
+ str r3, [r5, #regmap_fp]
+ beq 9f
+
+ // Get the FP status register.
+ vmrs r0, fpscr
+ str r0, [r3], #8
+
+ // At least the first 16.
+ vstmia r3!, {d0-d15}
+
+ // Maybe the other 16 too.
+ tst r12, #REGF_D32
+ vstmiane r3!, {d16-d31}
+
+ // Done.
+9: bx r14
+
+ENDFUNC
+
+FUNC(regdump_xtrstr)
+ endprologue
+ // On entry, r5 points to a register-save map. On exit, the extended
+ // registers are restored from the save area, r4 (pointing to the
+ // general-purpose save area) is preserved, and the other general
+ // registers are clobbered.
+
+ // Fetch the flags explaining what to do.
+ ldgot
+ leaext r12, regdump__flags
+ ldr r12, [r12]
+
+ // Figure out if there are VFP/NEON registers.
+ tst r12, #REGF_VFP
+ beq 9f
+ ldr r3, [r5, #regmap_fp]
+
+ // Load the FP status register.
+ ldr r0, [r3], #8
+ vmsr fpscr, r0
+
+ // Load the first 16 registers.
+ vldmia r3!, {d0-d15}
+
+ // And maybe the other 16.
+ tst r12, #REGF_D32
+ vldmiane r3!, {d16-d31}
+
+ // Done.
+9: bx r14
+
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Register dump and debugging for 64-bit ARM
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .arch armv8-a
+
+ .text
+
+///--------------------------------------------------------------------------
+/// Main code.
+
+FUNC(regdump_gpsave)
+ endprologue
+ // On entry, sp should point to `REGDUMP_GPSIZE' bytes of
+ // doubleword-aligned storage to be the general-purpose save area,
+ // with x16, x17, and x30 already saved. On exit, the initial
+ // registers are saved in this space, and modified: x20 points to the
+ // general-purpose save area, x22 holds the focus address (possibly
+ // already saved), x0 contains the number of bytes required in the
+ // extended save area, and other general-purpose registers are
+ // clobbered or used to communicate with `regdump_xtsave' below.
+ // Doing anything other than lowering the stack pointer and calling
+ // `regdump_xtsave' is not recommended.
+
+ // Save the easy registers.
+ stp x0, x1, [sp, #0]
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
+ stp x8, x9, [sp, #64]
+ stp x10, x11, [sp, #80]
+ stp x12, x13, [sp, #96]
+ stp x14, x15, [sp, #112]
+ stp x18, x19, [sp, #144]
+ stp x20, x21, [sp, #160]
+ stp x22, x23, [sp, #176]
+ stp x24, x25, [sp, #192]
+ stp x26, x27, [sp, #208]
+ stp x28, x29, [sp, #224]
+
+ mov x20, sp
+
+ // Determine the previous stack pointer and save it.
+ add x0, x20, #REGDUMP_GPSIZE
+ str x0, [x20, #31*8]
+
+ // Capture the status flags.
+ mrs x0, nzcv
+ str x0, [x20, #8*REGIX_NZCV]
+
+ // Set the return address as our PC.
+ str x30, [x20, #8*REGIX_PC]
+
+ // Load the focus address and save it as x22.
+ ldr x22, [x20, #8*REGIX_ADDR]
+
+ // Determine the extended save area size.
+ mov x0, #REGDUMP_FPSIZE
+
+ // Done.
+ ret
+
+ENDFUNC
+
+FUNC(regdump_gprstr)
+ endprologue
+ // On entry, x20 points to a general-purpose save area, established
+ // by `regdump_gpsave'. On exit, the general-purpose registers
+ // (other than x30 and sp) are restored to their original values.
+
+ // Restore the processor flags.
+ ldr w0, [x20, #8*REGIX_NZCV]
+ msr nzcv, x0
+
+ // Load the easy registers.
+ ldp x0, x1, [sp, #0]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp x8, x9, [sp, #64]
+ ldp x10, x11, [sp, #80]
+ ldp x12, x13, [sp, #96]
+ ldp x14, x15, [sp, #112]
+ ldp x16, x17, [sp, #128]
+ ldp x18, x19, [sp, #144]
+ ldp x20, x21, [sp, #160]
+ ldp x22, x23, [sp, #176]
+ ldp x24, x25, [sp, #192]
+ ldp x26, x27, [sp, #208]
+ ldp x28, x29, [sp, #224]
+
+ // Done.
+ ret
+
+ENDFUNC
+
+FUNC(regdump_xtsave)
+ endprologue
+ // On entry, sp points to an extended save area, of size determined
+ // by `regdump_gpsave' above. On exit, the save area is filled in
+ // and a handy map placed at its base.
+
+ // Set up the map/extended save area pointer.
+ mov x21, sp
+
+ // Start by filling in the easy part of the map.
+ add x0, x21, #regmap_size
+ stp x20, x0, [x21]
+
+ // Get the FP status register.
+ mrs x1, fpsr
+ mrs x2, fpcr
+ stp w1, w2, [x0], #8
+
+ // Store the SIMD registers.
+ stp q0, q1, [x0, #0]
+ stp q2, q3, [x0, #32]
+ stp q4, q5, [x0, #64]
+ stp q6, q7, [x0, #96]
+ stp q8, q9, [x0, #128]
+ stp q10, q11, [x0, #160]
+ stp q12, q13, [x0, #192]
+ stp q14, q15, [x0, #224]
+ stp q16, q17, [x0, #256]
+ stp q18, q19, [x0, #288]
+ stp q20, q21, [x0, #320]
+ stp q22, q23, [x0, #352]
+ stp q24, q25, [x0, #384]
+ stp q26, q27, [x0, #416]
+ stp q28, q29, [x0, #448]
+ stp q30, q31, [x0, #480]
+
+ // Done.
+ ret
+
+ENDFUNC
+
+FUNC(regdump_xtrstr)
+ endprologue
+ // On entry, x21 points to a register-save map. On exit, the
+ // extended registers are restored from the save area, x20 (pointing
+ // to the general-purpose save area) is preserved, and the other
+ // general registers are clobbered.
+
+ ldr x0, [x21, #regmap_fp]
+
+ // Load the FP status and control registers.
+ ldp w1, w2, [x0], #8
+ msr fpsr, x1
+ msr fpcr, x2
+
+ // Load the SIMD registers.
+ ldp q0, q1, [x0, #0]
+ ldp q2, q3, [x0, #32]
+ ldp q4, q5, [x0, #64]
+ ldp q6, q7, [x0, #96]
+ ldp q8, q9, [x0, #128]
+ ldp q10, q11, [x0, #160]
+ ldp q12, q13, [x0, #192]
+ ldp q14, q15, [x0, #224]
+ ldp q16, q17, [x0, #256]
+ ldp q18, q19, [x0, #288]
+ ldp q20, q21, [x0, #320]
+ ldp q22, q23, [x0, #352]
+ ldp q24, q25, [x0, #384]
+ ldp q26, q27, [x0, #416]
+ ldp q28, q29, [x0, #448]
+ ldp q30, q31, [x0, #480]
+
+ // Done.
+ ret
+
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Register dump and debugging for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .text
+ .arch pentium4
+ .arch .xsave
+
+///--------------------------------------------------------------------------
+/// Main code.
+
+FUNC(regdump_gpsave)
+ endprologue
+ // On entry, r/esp should point to a return address and
+ // `REGDUMP_GPSIZE' bytes of word-aligned storage to be the
+ // general-purpose save area, with flags saved in the bottom word,
+ // r/eax saved in the fourth, and (on 32-bit x86) ebx in the fifth.
+ // On exit, the initial registers are saved in this space, and
+ // modified: r/ebp points to the general-purpose save area, ecx
+ // contains the number of bytes required in the extended save area,
+ // ebx is preserved on 32-bit x86, and other general-purpose
+ // registers are clobbered or used to communicate with
+ // `regdump_xtsave' below. Doing anything other than lowering the
+ // stack pointer and calling `regdump_xtsave' is not recommended.
+
+ // Other code will insist that df is clear.
+ cld
+
+ // Save r/ebp and establish it pointing to the save area.
+ mov [R_sp(r) + WORDSZ + REGIX_BP*WORDSZ], R_bp(r)
+ lea R_bp(r), [R_sp(r) + WORDSZ]
+
+ // Save the other easy general-purpose registers.
+#if !CPUFAM_X86
+ mov [R_bp(r) + REGIX_BX*WORDSZ], R_b(r)
+#endif
+ mov [R_bp(r) + REGIX_CX*WORDSZ], R_c(r)
+ mov [R_bp(r) + REGIX_DX*WORDSZ], R_d(r)
+ mov [R_bp(r) + REGIX_SI*WORDSZ], R_si(r)
+ mov [R_bp(r) + REGIX_DI*WORDSZ], R_di(r)
+#if CPUFAM_AMD64
+ mov [R_bp(r) + REGIX_R8*WORDSZ], R_r8(r)
+ mov [R_bp(r) + REGIX_R9*WORDSZ], R_r9(r)
+ mov [R_bp(r) + REGIX_R10*WORDSZ], R_r10(r)
+ mov [R_bp(r) + REGIX_R11*WORDSZ], R_r11(r)
+ mov [R_bp(r) + REGIX_R12*WORDSZ], R_r12(r)
+ mov [R_bp(r) + REGIX_R13*WORDSZ], R_r13(r)
+ mov [R_bp(r) + REGIX_R14*WORDSZ], R_r14(r)
+ mov [R_bp(r) + REGIX_R15*WORDSZ], R_r15(r)
+#endif
+
+ // Determine the previous stack pointer and save it.
+#if CPUFAM_AMD64 && ABI_SYSV
+ lea R_a(r), [R_bp(r) + 128 + REGDUMP_GPSIZE]
+#else
+ lea R_a(r), [R_bp(r) + REGDUMP_GPSIZE]
+#endif
+ mov [R_bp(r) + REGIX_SP*WORDSZ], R_a(r)
+
+ // Collect the return address and save it as r/eip.
+ mov R_a(r), [R_sp(r)]
+ mov [R_bp(r) + REGIX_IP*WORDSZ], R_a(r)
+
+ // Save the segment registers.
+ lea R_a(r), [R_bp(r) + REGIX_GPLIM*WORDSZ]
+ mov [R_a(r) + 2*REGIX_CS], cs
+ mov [R_a(r) + 2*REGIX_DS], ds
+ mov [R_a(r) + 2*REGIX_SS], ss
+ mov [R_a(r) + 2*REGIX_ES], es
+ mov [R_a(r) + 2*REGIX_FS], fs
+ mov [R_a(r) + 2*REGIX_GS], gs
+
+ // Determine the extended save area size. Preserve ebx on 32-bit x86
+ // here, because the caller needs it for PLT-indirect calls.
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 1f
+
+ mov eax, 0x0d
+ mov ecx, 0x00
+ cpuid
+ add ecx, regmap_size + 64 // map + align
+ jmp 8f
+
+1: mov ecx, 512 + regmap_size + 16 // fxsave + map + align
+
+ // Done.
+8:
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+FUNC(regdump_gprstr)
+ endprologue
+ // On entry, r/ebp points to a general-purpose save area, established
+ // by `regdump_gpsave'. On exit, the general-purpose registers
+ // (other than the stack pointer) are restored to their original
+ // values.
+
+ // We assume nobody actually fiddled with the segment registers. So
+ // just the actual integer registers to do.
+ mov R_a(r), [R_bp(r) + REGIX_AX*WORDSZ]
+ mov R_b(r), [R_bp(r) + REGIX_BX*WORDSZ]
+ mov R_c(r), [R_bp(r) + REGIX_CX*WORDSZ]
+ mov R_d(r), [R_bp(r) + REGIX_DX*WORDSZ]
+ mov R_si(r), [R_bp(r) + REGIX_SI*WORDSZ]
+ mov R_di(r), [R_bp(r) + REGIX_DI*WORDSZ]
+#if CPUFAM_AMD64
+ mov R_r8(r), [R_bp(r) + REGIX_R8*WORDSZ]
+ mov R_r9(r), [R_bp(r) + REGIX_R9*WORDSZ]
+ mov R_r10(r), [R_bp(r) + REGIX_R10*WORDSZ]
+ mov R_r11(r), [R_bp(r) + REGIX_R11*WORDSZ]
+ mov R_r12(r), [R_bp(r) + REGIX_R12*WORDSZ]
+ mov R_r13(r), [R_bp(r) + REGIX_R13*WORDSZ]
+ mov R_r14(r), [R_bp(r) + REGIX_R14*WORDSZ]
+ mov R_r15(r), [R_bp(r) + REGIX_R15*WORDSZ]
+#endif
+ mov R_bp(r), [R_bp(r) + REGIX_BP*WORDSZ]
+
+ // Done.
+ ret
+
+ENDFUNC
+
+#ifdef CPUFAM_AMD64
+# define fxsave fxsave64
+# define fxrstor fxrstor64
+# define xsave xsave64
+# define xrstor xrstor64
+#endif
+
+FUNC(regdump_xtsave)
+ endprologue
+ // On entry, r/esp points to a return address and extended save area,
+ // of size determined by `regdump_gpsave' above. On exit, the save
+ // area is filled in and a handy map placed at its base, the x87
+ // floating-point state is reset, r/ebp is left pointing to the
+ // register map, ebx is preserved on 32-bit x86, and the other
+ // general registers are clobbered.
+
+ // Start by filling in the easy parts of the map.
+ mov [R_sp(r) + WORDSZ + regmap_gp], R_bp(r)
+ lea R_bp(r), [R_sp(r) + WORDSZ]
+
+ xor eax, eax // clears rax too on amd64
+ mov [R_bp(r) + regmap_avx], R_a(r)
+
+ // Find out whether we use `xsave'. (Preserve ebx.)
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 5f
+
+ // We have the `xsave' machinery. Select the base address.
+ lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 63]
+ and R_si(r), ~63
+ mov [R_bp(r) + regmap_fx], R_si(r)
+
+ // Clear out the header area.
+ xor eax, eax
+ lea R_di(r), [R_si(r) + 512]
+ mov ecx, 16
+ rep stosd
+
+ // Save the registers.
+ mov eax, 0x00000007
+ xor edx, edx
+ xsave [R_si(r)]
+
+ // Establish the AVX pointer, if available.
+ test dword ptr [R_si(r) + 512], 4 // = xstate_bv
+ je 8f
+
+ mov eax, 13
+ mov ecx, 2
+ cpuid
+ add R_b(r), R_si(r)
+ mov [R_bp(r) + regmap_avx], R_b(r)
+
+ jmp 8f
+
+ // We have only `fxsave'. Set the base address.
+5: lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 15]
+ and R_si(r), ~15
+ mov [R_bp(r) + regmap_fx], R_si(r)
+
+ // Save the registers.
+ fxsave [R_si(r)]
+
+ // Clear the x87 state; otherwise it can cause trouble later.
+8: fninit
+
+ // Done.
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+FUNC(regdump_xtrstr)
+ endprologue
+ // On entry, r/ebp points to a register-save map. On exit, the
+ // extended registers are restored from the save area; r/ebp is left
+ // pointing to the general-purpose save area, ebx is preserved on
+ // 32-bit x86, and the other general registers are clobbered.
+
+ // Find the extended register dump.
+ mov R_si(r), [R_bp(r) + regmap_fx]
+
+ // Probe to find out whether we have `xsave'.
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 1f
+
+ // We have the `xsave' machinery.
+ mov eax, 0x00000007
+ xor edx, edx
+ xrstor [R_si(r)]
+ jmp 8f
+
+ // We must fake it up.
+1: fxrstor [R_si(r)]
+
+ // Done.
+8: mov R_bp(r), [R_bp(r) + regmap_gp]
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------
--- /dev/null
+/* -*-c-*-
+ *
+ * Register dumping and other diagnostic tools for assembler code
+ *
+ * (c) 2016 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb; if not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA.
+ */
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "config.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <mLib/bits.h>
+#include <mLib/macros.h>
+
+#include "dispatch.h"
+#include "regdump.h"
+
+/*----- Low-level printing ------------------------------------------------*/
+
+/* Currently these are good for all of our targets. */
+#define STEP_8 1
+#define TY_HEX_8 uint8
+#define P_HEX_8 "0x%02x"
+#define TY_UNSGN_8 uint8
+#define P_UNSGN_8 "%3u"
+#define PV_CHR_8 " `%c'"
+#define PV_HEX_8 " %02x"
+#define PV_UNSGN_8 "%4u"
+
+#define STEP_16 2
+#define TY_HEX_16 uint16
+#define P_HEX_16 "0x%04x"
+#define TY_UNSGN_16 uint16
+#define P_UNSGN_16 "%5u"
+#define TY_SGN_16 int16
+#define P_SGN_16 "%6d"
+#define PV_HEX_16 " 0x%04x"
+#define PV_UNSGN_16 "%9u"
+#define PV_SGN_16 "%9d"
+
+#define STEP_32 4
+#define TY_HEX_32 uint32
+#define P_HEX_32 "0x%08x"
+#define TY_UNSGN_32 uint32
+#define P_UNSGN_32 "%10u"
+#define TY_SGN_32 int32
+#define P_SGN_32 "%11d"
+#define TY_FLT_32 float
+#define P_FLT_32 "%15.9g"
+#define PV_HEX_32 " 0x%08x"
+#define PV_UNSGN_32 "%19u"
+#define PV_SGN_32 "%19d"
+#define PV_FLT_32 "%19.9g"
+
+#if ULONG_MAX >> 31 > 0xffffffff
+# define PL64 "l"
+#else
+# define PL64 "ll"
+#endif
+#define STEP_64 8
+#define TY_HEX_64 uint64
+#define P_HEX_64 "0x%016"PL64"x"
+#define TY_UNSGN_64 uint64
+#define P_UNSGN_64 "%20"PL64"u"
+#define TY_SGN_64 int64
+#define P_SGN_64 "%20"PL64"d"
+#define TY_FLT_64 double
+#define P_FLT_64 "%24.17g"
+#define PV_HEX_64 " 0x%016"PL64"x"
+#define PV_UNSGN_64 "%39"PL64"u"
+#define PV_SGN_64 "%39"PL64"d"
+#define PV_FLT_64 "%39.17g"
+
+#if CPUFAM_X86
+# define STEP_80 12
+#endif
+#if CPUFAM_AMD64
+# define STEP_80 16
+#endif
+#define TY_FLT_80 long double
+#define P_FLT_80 "%29.21Lg"
+#define PV_FLT_80 P_FLT_80
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+# define ARCH_FORMATS(_) \
+ _(80, FLT)
+#endif
+#ifndef ARCH_FORMATS
+# define ARCH_FORMATS(_)
+#endif
+
+#define FORMATS(_) \
+ ARCH_FORMATS(_) \
+ _(64, HEX) _(64, FLT) _(64, UNSGN) _(64, SGN) \
+ _(32, HEX) _(32, FLT) _(32, UNSGN) _(32, SGN) \
+ _(16, HEX) _(16, UNSGN) _(16, SGN) \
+ _(8, HEX) _(8, CHR) _(8, UNSGN)
+
+struct fmtinfo {
+ const unsigned char *p;
+ unsigned wd, f;
+#define FMTF_VECTOR 1u
+};
+
+#define FMTFUNC_STD(w, fmt) \
+ static void dump_##fmt##_##w(struct fmtinfo *fmt) \
+ { \
+ TY_##fmt##_##w x = *(const TY_##fmt##_##w *)fmt->p; \
+ \
+ if (fmt->f&FMTF_VECTOR) printf(PV_##fmt##_##w, x); \
+ else printf(P_##fmt##_##w, x); \
+ fmt->p += STEP_##w; fmt->wd += 8*STEP_##w; \
+ }
+
+#define FMTFUNC_HEX(w) FMTFUNC_STD(w, HEX)
+#define FMTFUNC_UNSGN(w) FMTFUNC_STD(w, UNSGN)
+#define FMTFUNC_SGN(w) FMTFUNC_STD(w, SGN)
+#define FMTFUNC_FLT(w) FMTFUNC_STD(w, FLT)
+#define FMTFUNC_CHR(w)
+
+static void dump_CHR_8(struct fmtinfo *fmt)
+{
+ unsigned char x = *(const unsigned char *)fmt->p;
+
+ if (x < 32 || x > 126) printf("\\x%02x", x);
+ else printf(" `%c'", x);
+ fmt->p += 1; fmt->wd += 8;
+}
+
+#define FMTFUNC(w, fmt) FMTFUNC_##fmt(w)
+FORMATS(FMTFUNC)
+#undef FMTFUNC
+
+static const struct fmttab {
+ uint32 mask;
+ void (*fmt)(struct fmtinfo *);
+} fmttab[] = {
+#define FMTTAB(wd, fmt) { REGF_##fmt | REGF_##wd, dump_##fmt##_##wd },
+ FORMATS(FMTTAB)
+#undef FMTTAB
+ { 0, 0 }
+};
+
+/*----- Common subroutines ------------------------------------------------*/
+
+/* --- @regwd@ --- *
+ *
+ * Arguments: @uint32 f@ = format control word; see @REGF_...@
+ *
+ * Returns: The actual width of the operand, in bits.
+ *
+ * Use: If the operand is a vector (the @REGF_WDMASK@ field is
+ * nonzero) then return the width it denotes; otherwise, return
+ * the largest width implied by the @REGF_TYMASK@ field.
+ */
+
+static unsigned regwd(uint32 f)
+{
+ unsigned wd = 1 << ((f®F_WDMASK) >> REGF_WDSHIFT);
+
+ if (wd > 1) return (wd);
+ else if (f®F_80) return (80);
+ else if (f®F_64) return (64);
+ else if (f®F_32) return (32);
+ else if (f®F_16) return (16);
+ else if (f®F_8) return (8);
+ else { assert(0); return (1); }
+}
+
+/* --- @regname@ --- *
+ *
+ * Arguments: @char *buf = pointer to output buffer@
+ * @uint32 f@ = format control word; see @REGF_...@
+ *
+ * Returns: Pointer to name string.
+ *
+ * Use: Return a pointer to the name of the register implied by @f@,
+ * or null if there is no register. Systematic register names
+ * can be built in the provided buffer.
+ */
+
+static const char *regname(char *buf, uint32 f)
+{
+ unsigned wd = regwd(f);
+ unsigned src = f®F_SRCMASK;
+ unsigned ix = (f®F_IXMASK) >> REGF_IXSHIFT;
+ char *p = buf;
+
+ switch (src) {
+
+ case REGSRC_ABS:
+ return (0);
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+ case REGSRC_GP:
+ if (ix == REGIX_FLAGS) {
+ if (wd == 64) *p++ = 'r';
+ else if (wd == 32) *p++ = 'e';
+ else if (wd != 16) assert(0);
+ p += sprintf(p, "flags");
+#if CPUFAM_AMD64
+ } else if (REGIX_R8 <= ix && ix <= REGIX_R15) {
+ p += sprintf(p, "r%u", ix - REGIX_R8 + 8);
+ switch (wd) {
+ case 64: break;
+ case 32: *p++ = 'd'; break;
+ case 16: *p++ = 'w'; break;
+ case 8: *p++ = 'l'; break;
+ default: assert(0);
+ }
+# endif
+ } else {
+ if (wd == 64) *p++ = 'r';
+ else if (wd == 32) *p++ = 'e';
+ switch (ix) {
+ case REGIX_IP: *p++ = 'i'; *p++ = 'p'; goto longreg;
+ case REGIX_AX: *p++ = 'a'; goto shortreg;
+ case REGIX_BX: *p++ = 'b'; goto shortreg;
+ case REGIX_CX: *p++ = 'c'; goto shortreg;
+ case REGIX_DX: *p++ = 'd'; goto shortreg;
+ case REGIX_SI: *p++ = 's'; *p++ = 'i'; goto longreg;
+ case REGIX_DI: *p++ = 'd'; *p++ = 'i'; goto longreg;
+ case REGIX_BP: *p++ = 'b'; *p++ = 'p'; goto longreg;
+ case REGIX_SP: *p++ = 's'; *p++ = 'p'; goto longreg;
+ default: assert(0);
+ }
+ if (0) {
+ shortreg:
+ switch (wd) {
+ case 64:
+ case 32:
+ case 16: *p++ = 'x'; break;
+ case 8: *p++ = 'l'; break;
+ default: assert(0);
+ }
+ } else {
+ longreg:
+ switch (wd) {
+ case 64:
+ case 32:
+ case 16: break;
+ case 8: *p++ = 'l'; break;
+ default: assert(0);
+ }
+ }
+ }
+ *p++ = 0;
+ return (buf);
+
+ case REGSRC_SEG:
+ assert(wd == 16);
+ switch (ix) {
+ case REGIX_CS: sprintf(buf, "cs"); break;
+ case REGIX_DS: sprintf(buf, "ds"); break;
+ case REGIX_SS: sprintf(buf, "ss"); break;
+ case REGIX_ES: sprintf(buf, "es"); break;
+ case REGIX_FS: sprintf(buf, "fs"); break;
+ case REGIX_GS: sprintf(buf, "gs"); break;
+ default: assert(0);
+ }
+ return (buf);
+
+ case REGSRC_STMMX:
+ if (ix == REGIX_FPFLAGS) return (0);
+ if (f®F_80) sprintf(buf, "st(%u)", ix);
+ else sprintf(buf, "mm%u", ix);
+ return (buf);
+
+ case REGSRC_SIMD:
+ if (ix == REGIX_FPFLAGS) return (0);
+ switch (wd) {
+ case 32: case 64: case 128: sprintf(buf, "xmm%u", ix); break;
+ case 256: sprintf(buf, "ymm%u", ix); break;
+ default: assert(0);
+ }
+ return (buf);
+#endif
+
+#if CPUFAM_ARMEL
+ case REGSRC_GP:
+ if (ix == REGIX_CPSR) sprintf(buf, "cpsr");
+ else if (ix == 15) sprintf(buf, "pc");
+ else sprintf(buf, "r%u", ix);
+ return (buf);
+ case REGSRC_FP:
+ if (ix == REGIX_FPSCR) sprintf(buf, "fpscr");
+ else {
+ switch (wd) {
+ case 32: *p++ = 's'; break;
+ case 64: *p++ = 'd'; break;
+ case 128: *p++ = 'q'; break;
+ default: assert(0);
+ }
+ p += sprintf(p, "%u", ix);
+ *p++ = 0;
+ }
+ return (buf);
+#endif
+
+#if CPUFAM_ARM64
+ case REGSRC_GP:
+ if (ix == REGIX_PC) sprintf(buf, "pc");
+ else if (ix == REGIX_NZCV) sprintf(buf, "nzcv");
+ else if (ix == 31 && wd == 64) sprintf(buf, "sp");
+ else {
+ switch (wd) {
+ case 32: *p++ = 'w'; break;
+ case 64: *p++ = 'x'; break;
+ default: assert(0);
+ }
+ p += sprintf(p, "%u", ix);
+ *p++ = 0;
+ }
+ return (buf);
+ case REGSRC_FP:
+ if (ix == REGIX_FPFLAGS) sprintf(buf, "fpflags");
+ else {
+ if (f®F_WDMASK)
+ *p++ = 'v';
+ else switch (wd) {
+ case 8: *p++ = 'b'; break;
+ case 16: *p++ = 'h'; break;
+ case 32: *p++ = 's'; break;
+ case 64: *p++ = 'd'; break;
+ default: assert(0);
+ }
+ p += sprintf(p, "%u", ix);
+ *p++ = 0;
+ }
+ return (buf);
+#endif
+
+ default:
+ assert(0);
+ return ("???");
+ }
+}
+
+/*----- x86 and AMD64 -----------------------------------------------------*/
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+
+#if CPUFAM_X86
+# define P_HEX_GP "0x%08x"
+# define GP(gp) (gp).u32
+#endif
+#if CPUFAM_AMD64
+# define P_HEX_GP "0x%016"PL64"x"
+# define GP(gp) (gp).u64
+#endif
+
+void regdump_init(void) { ; }
+
+static void dump_flags(const char *lbl, const char *reg, gpreg f)
+{
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ if (reg) printf("%s = ", reg);
+ printf(""P_HEX_GP"\n", GP(f));
+ printf(";;\t\tstatus: %ccf %cpf %caf %czf %csf %cdf %cof\n",
+ (GP(f) >> 0)&1u ? '+' : '-',
+ (GP(f) >> 2)&1u ? '+' : '-',
+ (GP(f) >> 4)&1u ? '+' : '-',
+ (GP(f) >> 6)&1u ? '+' : '-',
+ (GP(f) >> 7)&1u ? '+' : '-',
+ (GP(f) >> 10)&1u ? '+' : '-',
+ (GP(f) >> 11)&1u ? '+' : '-');
+ printf(";;\t\tsystem: %ctf %cif iopl=%d %cnt "
+ "%crf %cvm %cac %cvif %cvip %cid\n",
+ (GP(f) >> 8)&1u ? '+' : '-',
+ (GP(f) >> 9)&1u ? '+' : '-',
+ (int)((GP(f) >> 12)&1u),
+ (GP(f) >> 14)&1u ? '+' : '-',
+ (GP(f) >> 16)&1u ? '+' : '-',
+ (GP(f) >> 17)&1u ? '+' : '-',
+ (GP(f) >> 18)&1u ? '+' : '-',
+ (GP(f) >> 19)&1u ? '+' : '-',
+ (GP(f) >> 20)&1u ? '+' : '-',
+ (GP(f) >> 21)&1u ? '+' : '-');
+}
+
+static const char
+ *pcmap[] = { "sgl", "???", "dbl", "ext" },
+ *rcmap[] = { "nr", "-∞", "+∞", "0" };
+
+static void dump_fpflags(const char *lbl, const struct fxsave *fx)
+{
+ unsigned top = (fx->fsw >> 11)&7u;
+ unsigned tag = fx->ftw;
+ int skip = lbl ? strlen(lbl) + 2 : 0;
+
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+
+ printf(" fcw = 0x%04x: "
+ "%cim %cdm %czm %com %cum %cpm pc=%s rc=%s %cx\n",
+ fx->fcw,
+ (fx->fcw >> 0)&1u ? '+' : '-',
+ (fx->fcw >> 1)&1u ? '+' : '-',
+ (fx->fcw >> 2)&1u ? '+' : '-',
+ (fx->fcw >> 3)&1u ? '+' : '-',
+ (fx->fcw >> 4)&1u ? '+' : '-',
+ (fx->fcw >> 5)&1u ? '+' : '-',
+ pcmap[(fx->fcw >> 8)&3u],
+ rcmap[(fx->fcw >> 10)&3u],
+ (fx->fcw >> 12)&1u ? '+' : '-');
+ printf(";; %*s fsw = 0x%04x: "
+ "%cie %cde %cze %coe %cue %cpe %csf %ces %cc0 %cc1 %cc2 %cc3 "
+ "top=%d %cb\n",
+ skip, "",
+ fx->fsw,
+ (fx->fsw >> 0)&1u ? '+' : '-',
+ (fx->fsw >> 1)&1u ? '+' : '-',
+ (fx->fsw >> 2)&1u ? '+' : '-',
+ (fx->fsw >> 3)&1u ? '+' : '-',
+ (fx->fsw >> 4)&1u ? '+' : '-',
+ (fx->fsw >> 5)&1u ? '+' : '-',
+ (fx->fsw >> 6)&1u ? '+' : '-',
+ (fx->fsw >> 7)&1u ? '+' : '-',
+ (fx->fsw >> 8)&1u ? '+' : '-',
+ (fx->fsw >> 9)&1u ? '+' : '-',
+ (fx->fsw >> 10)&1u ? '+' : '-',
+ (fx->fsw >> 14)&1u ? '+' : '-',
+ top,
+ (fx->fsw >> 15)&1u ? '+' : '-');
+ printf(";; %*s ftw = 0x%02x\n", skip, "", tag);
+}
+
+static void dump_mxflags(const char *lbl, const struct fxsave *fx)
+{
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+
+ printf(" mxcsr = 0x%08x\n"
+ ";;\t\tmask = %cim %cdm %czm %com %cum %cpm\n"
+ ";;\t\t exc = %cie %cde %cze %coe %cue %cpe\n"
+ ";;\t\tmisc = %cdaz %cftz rc=%s\n",
+ fx->mxcsr,
+ (fx->mxcsr >> 7)&1u ? '+' : '-',
+ (fx->mxcsr >> 8)&1u ? '+' : '-',
+ (fx->mxcsr >> 9)&1u ? '+' : '-',
+ (fx->mxcsr >> 10)&1u ? '+' : '-',
+ (fx->mxcsr >> 11)&1u ? '+' : '-',
+ (fx->mxcsr >> 12)&1u ? '+' : '-',
+ (fx->mxcsr >> 0)&1u ? '+' : '-',
+ (fx->mxcsr >> 1)&1u ? '+' : '-',
+ (fx->mxcsr >> 2)&1u ? '+' : '-',
+ (fx->mxcsr >> 3)&1u ? '+' : '-',
+ (fx->mxcsr >> 4)&1u ? '+' : '-',
+ (fx->mxcsr >> 5)&1u ? '+' : '-',
+ (fx->mxcsr >> 6)&1u ? '+' : '-',
+ (fx->mxcsr >> 15)&1u ? '+' : '-',
+ rcmap[(fx->mxcsr >> 13)&3u]);
+}
+
+#if CPUFAM_X86
+# define REGF_GPWD REGF_32
+#endif
+#if CPUFAM_AMD64
+# define REGF_GPWD REGF_64
+#endif
+
+void regdump_gp(const struct regmap *map)
+{
+ unsigned i;
+
+ printf(";; General-purpose registers:\n");
+ for (i = REGIX_AX; i < REGIX_GPLIM; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_GPWD | REGSRC_GP | i);
+ regdump(map, 0, REGF_HEX | REGF_GPWD | REGSRC_GP | REGIX_IP);
+
+ printf(";; Segment registers:\n");
+ for (i = 0; i < REGIX_SEGLIM; i++)
+ regdump(map, 0, REGF_HEX | REGF_16 | REGSRC_SEG | i);
+
+ printf(";; Flags:\n");
+ regdump(map, 0, REGSRC_GP | REGF_GPWD | REGIX_FLAGS);
+}
+
+void regdump_fp(const struct regmap *map)
+{
+ unsigned top = (map->fx->fsw >> 11)&7u;
+ unsigned tag = map->fx->ftw;
+ unsigned i;
+
+ printf(";; Floating-point/MMX registers:\n");
+ if (!top && tag == 0xff)
+ for (i = 0; i < 8; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_CHR |
+ REGF_32 | REGF_16 | REGF_8 |
+ REGSRC_STMMX | i | (6 << REGF_WDSHIFT));
+ if (tag)
+ for (i = 0; i < 8; i++)
+ regdump(map, 0, REGF_FLT | REGF_80 | REGSRC_STMMX | i);
+
+ printf(";; Floating-point state:\n");
+ dump_fpflags(0, map->fx);
+}
+
+void regdump_simd(const struct regmap *map)
+{
+ unsigned f = REGF_HEX | REGF_FLT | REGF_UNSGN | REGF_SGN | REGF_CHR |
+ REGF_64 | REGF_32 | REGF_16 | REGF_8 |
+ REGSRC_SIMD;
+ unsigned i;
+
+ if (map->avx) f |= 8 << REGF_WDSHIFT;
+ else f |= 7 << REGF_WDSHIFT;
+
+ printf(";; SSE/AVX registers:\n");
+ for (i = 0; i < N(map->fx->xmm); i++)
+ regdump(map, 0, f | i);
+
+ printf(";; SSE/AVX floating-point state:\n");
+ dump_mxflags(0, map->fx);
+}
+
+#endif
+
+/*----- ARM32 -------------------------------------------------------------*/
+
+#if CPUFAM_ARMEL
+
+unsigned regdump__flags = 0;
+
+void regdump_init(void)
+{
+ if (cpu_feature_p(CPUFEAT_ARM_VFP)) regdump__flags |= REGF_VFP;
+ if (cpu_feature_p(CPUFEAT_ARM_D32)) regdump__flags |= REGF_D32;
+}
+
+static void dump_flags(const char *lbl, unsigned f)
+{
+ static const char
+ *modetab[] = { "?00", "?01", "?02", "?03", "?04", "?05", "?06", "?07",
+ "?08", "?09", "?10", "?11", "?12", "?13", "?14", "?15",
+ "usr", "fiq", "irq", "svc", "?20", "?21", "mon", "abt",
+ "?24", "?25", "hyp", "und", "?28", "?29", "?30", "sys" },
+ *condtab[] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" };
+
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ printf(" cpsr = 0x%08x\n", f);
+ printf(";;\t\tuser: %cn %cz %cc %cv %cq ge=%c%c%c%c\n",
+ (f >> 31)&1u ? '+' : '-',
+ (f >> 30)&1u ? '+' : '-',
+ (f >> 29)&1u ? '+' : '-',
+ (f >> 28)&1u ? '+' : '-',
+ (f >> 27)&1u ? '+' : '-',
+ (f >> 19)&1u ? '1' : '0',
+ (f >> 18)&1u ? '1' : '0',
+ (f >> 17)&1u ? '1' : '0',
+ (f >> 16)&1u ? '1' : '0');
+ printf(";;\t\tsystem: %cj it=%s:%c%c%c%c %ce %ca %ci %cf %ct m=%s\n",
+ (f >> 24)&1u ? '+' : '-',
+ condtab[(f >> 12)&15u],
+ (f >> 11)&1u ? '1' : '0',
+ (f >> 10)&1u ? '1' : '0',
+ (f >> 26)&1u ? '1' : '0',
+ (f >> 25)&1u ? '1' : '0',
+ (f >> 9)&1u ? '+' : '-',
+ (f >> 8)&1u ? '+' : '-',
+ (f >> 7)&1u ? '+' : '-',
+ (f >> 6)&1u ? '+' : '-',
+ (f >> 5)&1u ? '+' : '-',
+ modetab[(f >> 0)&31u]);
+}
+
+static void dump_fpflags(const char *lbl, unsigned f)
+{
+ static const char *rcmap[] = { "nr", "+∞", "-∞", "0" };
+
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ printf(" fpscr = 0x%08x\n", f);
+ printf(";;\t\tcond: %cn %cz %cc %cv %cqc\n",
+ (f >> 31)&1u ? '+' : '-',
+ (f >> 30)&1u ? '+' : '-',
+ (f >> 29)&1u ? '+' : '-',
+ (f >> 28)&1u ? '+' : '-',
+ (f >> 27)&1u ? '+' : '-');
+ printf(";;\t\ttrap: %cide %cixe %cufe %cofe %cdze %cioe\n",
+ (f >> 15)&1u ? '+' : '-',
+ (f >> 12)&1u ? '+' : '-',
+ (f >> 11)&1u ? '+' : '-',
+ (f >> 10)&1u ? '+' : '-',
+ (f >> 9)&1u ? '+' : '-',
+ (f >> 8)&1u ? '+' : '-');
+ printf(";;\t\terror: %cide %cixe %cufe %cofe %cdze %cioe\n",
+ (f >> 7)&1u ? '+' : '-',
+ (f >> 4)&1u ? '+' : '-',
+ (f >> 3)&1u ? '+' : '-',
+ (f >> 2)&1u ? '+' : '-',
+ (f >> 1)&1u ? '+' : '-',
+ (f >> 0)&1u ? '+' : '-');
+ printf(";;\t\tcontrol: %cahp %cdn %cfz rm=%s str=%d len=%d\n",
+ (f >> 26)&1u ? '+' : '-',
+ (f >> 25)&1u ? '+' : '-',
+ (f >> 24)&1u ? '+' : '-',
+ rcmap[(f >> 22)&3u],
+ (f >> 20)&3u,
+ (f >> 16)&7u);
+}
+
+void regdump_gp(const struct regmap *map)
+{
+ unsigned i;
+
+ printf(";; General-purpose registers:\n");
+ for (i = 0; i < 16; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_32 | REGSRC_GP | i);
+
+ printf(";; Flags:\n");
+ regdump(map, 0, REGSRC_GP | REGF_32 | REGIX_CPSR);
+}
+
+void regdump_fp(const struct regmap *map)
+{
+ unsigned i, n;
+
+ if (!(regdump__flags®F_VFP)) {
+ printf(";; Floating-point and SIMD not available\n");
+ return;
+ }
+
+ printf(";; Floating-point/SIMD registers:\n");
+ if (regdump__flags®F_D32) n = 32;
+ else n = 16;
+ for (i = 0; i < n; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_FLT | REGF_CHR |
+ REGF_64 | REGF_32 | REGF_16 | REGF_8 |
+ REGSRC_SIMD | i | (6 << REGF_WDSHIFT));
+
+ printf(";; Floating-point state:\n");
+ dump_fpflags(0, map->fp->fpscr);
+}
+
+void regdump_simd(const struct regmap *map) { ; }
+
+#endif
+
+/*----- ARM64 -------------------------------------------------------------*/
+
+#if CPUFAM_ARM64
+
+void regdump_init(void) { ; }
+
+static void dump_flags(const char *lbl, unsigned f)
+{
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ printf(" nzcv = 0x%08x\n", f);
+ printf(";;\t\tuser: %cn %cz %cc %cv\n",
+ (f >> 31)&1u ? '+' : '-',
+ (f >> 30)&1u ? '+' : '-',
+ (f >> 29)&1u ? '+' : '-',
+ (f >> 28)&1u ? '+' : '-');
+}
+
+static void dump_fpflags(const char *lbl, const struct fpsave *fp)
+{
+ static const char *rcmap[] = { "nr", "+∞", "-∞", "0" };
+ int skip = lbl ? strlen(lbl) + 2 : 0;
+
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ printf(" fpsr = 0x%08x\n", fp->fpsr);
+ printf(";;\t\tcond: %cn %cz %cc %cv %cqc\n",
+ (fp->fpsr >> 31)&1u ? '+' : '-',
+ (fp->fpsr >> 30)&1u ? '+' : '-',
+ (fp->fpsr >> 29)&1u ? '+' : '-',
+ (fp->fpsr >> 28)&1u ? '+' : '-',
+ (fp->fpsr >> 27)&1u ? '+' : '-');
+ printf(";;\t\terror: %cidc %cixc %cufc %cofc %cdzc %cioc\n",
+ (fp->fpsr >> 7)&1u ? '+' : '-',
+ (fp->fpsr >> 4)&1u ? '+' : '-',
+ (fp->fpsr >> 3)&1u ? '+' : '-',
+ (fp->fpsr >> 2)&1u ? '+' : '-',
+ (fp->fpsr >> 1)&1u ? '+' : '-',
+ (fp->fpsr >> 0)&1u ? '+' : '-');
+ printf(";; %*s fpcr = 0x%08x\n", skip, "", fp->fpcr);
+ printf(";;\t\ttrap: %cide %cixe %cufe %cofe %cdze %cioe\n",
+ (fp->fpcr >> 15)&1u ? '+' : '-',
+ (fp->fpcr >> 12)&1u ? '+' : '-',
+ (fp->fpcr >> 11)&1u ? '+' : '-',
+ (fp->fpcr >> 10)&1u ? '+' : '-',
+ (fp->fpcr >> 9)&1u ? '+' : '-',
+ (fp->fpcr >> 8)&1u ? '+' : '-');
+ printf(";;\t\tcontrol: %cahp %cdn %cfz rm=%s str=%d len=%d\n",
+ (fp->fpcr >> 26)&1u ? '+' : '-',
+ (fp->fpcr >> 25)&1u ? '+' : '-',
+ (fp->fpcr >> 24)&1u ? '+' : '-',
+ rcmap[(fp->fpcr >> 22)&3u],
+ (fp->fpcr >> 20)&3u,
+ (fp->fpcr >> 16)&7u);
+}
+
+void regdump_gp(const struct regmap *map)
+{
+ unsigned i;
+
+ printf(";; General-purpose registers:\n");
+ for (i = 0; i < 32; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_64 | REGSRC_GP | i);
+ regdump(map, 0, REGF_HEX | REGF_64 | REGSRC_GP | REGIX_PC);
+
+ printf(";; Flags:\n");
+ regdump(map, 0, REGSRC_GP | REGF_32 | REGIX_NZCV);
+}
+
+void regdump_fp(const struct regmap *map)
+{
+ unsigned i;
+
+ printf(";; Floating-point/SIMD registers:\n");
+ for (i = 0; i < 32; i++)
+ regdump(map, 0,
+ REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_FLT | REGF_CHR |
+ REGF_64 | REGF_32 | REGF_16 | REGF_8 |
+ REGSRC_SIMD | i | (7 << REGF_WDSHIFT));
+
+ printf(";; Floating-point state:\n");
+ dump_fpflags(0, map->fp);
+}
+
+void regdump_simd(const struct regmap *map) { ; }
+
+#endif
+
+/*----- The main entry point ----------------------------------------------*/
+
+/* --- @regdump@ --- *
+ *
+ * Arguments: @const void *base@ = pointer to base structure, corresponding
+ * to the @REGF_SRCMASK@ part of @f@
+ * @const char *lbl@ = label to print
+ * @uint32 f@ = format control word; see @REGF_...@
+ *
+ * Returns: ---
+ *
+ * Use: Dump a register value, or chunk of memory.
+ *
+ * This function is not usually called directly; instead, use
+ * the `reg' or `mem' assembler macros.
+ */
+
+void regdump(const void *base, const char *lbl, uint32 f)
+{
+ unsigned ix = (f®F_IXMASK) >> REGF_IXSHIFT;
+ unsigned wd = 1 << ((f®F_WDMASK) >> REGF_WDSHIFT);
+ unsigned fmt, ty;
+ uint32 fmtbit, tybit;
+ const void *p;
+ char regbuf[8]; const char *reg = regname(regbuf, f);
+ const struct regmap *map;
+ const struct fmttab *tab;
+ struct fmtinfo fi;
+ int firstp = 1;
+ int skip;
+ size_t n;
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+ union vreg vr;
+#endif
+
+ if (reg) {
+ n = strlen(reg);
+ if (n < 7) {
+ memmove(regbuf + 7 - n, reg, n + 1);
+ memset(regbuf, ' ', 7 - n);
+ }
+ }
+
+ switch (f®F_SRCMASK) {
+ case REGSRC_ABS:
+ p = base;
+ break;
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+ case REGSRC_GP:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_FLAGS && !(f®F_FMTMASK))
+ { dump_flags(lbl, reg, map->gp->gp[REGIX_FLAGS]); return; }
+ p = &map->gp->gp[ix];
+ break;
+ case REGSRC_SEG:
+ map = (const struct regmap *)base;
+ assert(wd == 1); assert((f®F_TYMASK) == REGF_16);
+ p = &map->gp->seg[ix];
+ break;
+ case REGSRC_STMMX:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_FPFLAGS)
+ { assert(!(f®F_FMTMASK)); dump_fpflags(lbl, map->fx); return; }
+ if (!((map->fx->ftw << ix)&128u)) {
+ printf(";; ");
+ if (lbl) printf("%s: ", lbl);
+ if (reg) printf("%s = ", reg);
+ printf(" dead\n");
+ return;
+ }
+ p = &map->fx->stmmx[ix];
+ break;
+ case REGSRC_SIMD:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_FPFLAGS)
+ { assert(!(f®F_FMTMASK)); dump_mxflags(lbl, map->fx); return; }
+ if (wd <= 128)
+ p = &map->fx->xmm[ix];
+ else {
+ vr.v128[0] = map->fx->xmm[ix];
+ vr.v128[1] = map->avx->ymmh[ix];
+ assert(wd == 256);
+ p = &vr;
+ }
+ break;
+#endif
+
+#if CPUFAM_ARMEL
+ case REGSRC_GP:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_CPSR && !(f®F_FMTMASK))
+ { dump_flags(lbl, map->gp->r[REGIX_CPSR].u32); return; }
+ p = &map->gp->r[ix];
+ break;
+ case REGSRC_FP:
+ case REGSRC_SIMD:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_FPSCR) {
+ assert(!(f®F_FMTMASK));
+ dump_fpflags(lbl, map->fp->fpscr);
+ return;
+ }
+ switch (regwd(f)) {
+ case 32: p = &map->fp->u.s[ix]; break;
+ case 64: p = &map->fp->u.d[ix]; break;
+ case 128: p = &map->fp->u.q[ix]; break;
+ default: assert(0);
+ }
+ break;
+#endif
+
+#if CPUFAM_ARM64
+ case REGSRC_GP:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_NZCV && !(f®F_FMTMASK))
+ { dump_flags(lbl, map->gp->r[REGIX_NZCV].u64); return; }
+ p = &map->gp->r[ix];
+ break;
+ case REGSRC_FP:
+ case REGSRC_SIMD:
+ map = (const struct regmap *)base;
+ if (ix == REGIX_FPFLAGS)
+ { assert(!(f®F_FMTMASK)); dump_fpflags(lbl, map->fp); return; }
+ p = &map->fp->v[ix];
+ break;
+#endif
+
+ default:
+ assert(0);
+ }
+
+ skip = (lbl ? strlen(lbl) + 2 : 0) + (reg ? strlen(reg) : 0);
+ fi.f = 0; if (wd > 1) fi.f |= FMTF_VECTOR;
+
+ for (ty = (f®F_TYMASK) >> REGF_TYSHIFT,
+ tybit = 1 << REGF_TYSHIFT;
+ ty;
+ ty >>= 1, tybit <<= 1) {
+ if (!(ty&1u)) continue;
+
+ for (fmt = (f®F_FMTMASK) >> REGF_FMTSHIFT,
+ fmtbit = 1 << REGF_FMTSHIFT;
+ fmt;
+ fmt >>= 1, fmtbit <<= 1) {
+
+ if (!(fmt&1u)) continue;
+
+ for (tab = fmttab; tab->mask; tab++)
+ if (tab->mask == (fmtbit | tybit)) goto found;
+ continue;
+ found:
+
+ if (firstp) {
+ printf(";;");
+ if (lbl) printf(" %s:", lbl);
+ if (reg) printf(" %s =", reg);
+ firstp = 0;
+ } else if (wd > 1)
+ printf("\n;; %*s =", skip, "");
+ else
+ fputs(" =", stdout);
+
+ fi.p = p; fi.wd = 0;
+ while (fi.wd < wd) { putchar(' '); tab->fmt(&fi); }
+ }
+ }
+ putchar('\n');
+}
+
+/*----- Other random utilities --------------------------------------------*/
+
+/* --- @regdump_freshline@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: ---
+ *
+ * Use: Begin a fresh line of output.
+ */
+
+void regdump_freshline(void) { putchar('\n'); }
+
+/*----- That's all, folks -------------------------------------------------*/
--- /dev/null
+/* -*-c-*-
+ *
+ * Register dump and debugging support
+ *
+ * (c) 2019 Straylight/Edgeware
+ */
+
+/*----- Licensing notice --------------------------------------------------*
+ *
+ * This file is part of Catacomb.
+ *
+ * Catacomb is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Library General Public License as published
+ * by the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Catacomb is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with Catacomb. If not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ */
+
+#ifndef CATACOMB_REGDUMP_H
+#define CATACOMB_REGDUMP_H
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+/*----- Header files ------------------------------------------------------*/
+
+#include "config.h"
+
+#ifndef ENABLE_ASM_DEBUG
+# error "Assembler-level debug disabled by `configure' script."
+#endif
+
+#if __ASSEMBLER__
+# include "asm-common.h"
+#else
+# include <float.h>
+# include <mLib/bits.h>
+#endif
+
+/*----- Random utilities --------------------------------------------------*/
+
+#define DO8(_) \
+ _(0) _(1) _(2) _(3) _(4) _(5) _(6) _(7)
+#define DOHI8(_) \
+ _(8) _(9) _(10) _(11) _(12) _(13) _(14) _(15)
+
+#define DO16(_) DO8(_) DOHI8(_)
+
+#define DO32(_) \
+ DO16(_) \
+ _(16) _(17) _(18) _(19) _(20) _(21) _(22) _(23) \
+ _(24) _(25) _(26) _(27) _(28) _(29) _(30) _(31)
+
+/*----- Common data structures --------------------------------------------*/
+
+#if !__ASSEMBLER__
+
+/* The following are good on our assembler targets. */
+typedef signed char int8;
+typedef short int16;
+typedef int int32;
+#if LONG_MAX >> 31 > 0x7fffffff
+ typedef long int64;
+#else
+ typedef long long int64;
+#endif
+typedef float float32;
+typedef double float64;
+typedef long double float80;
+
+#if CPUFAM_X86 || CPUFAM_ARMEL
+# define PTR32 void *p;
+# define PTR64
+#endif
+#if CPUFAM_AMD64 || CPUFAM_ARM64
+# define PTR32
+# define PTR64 void *p;
+#endif
+
+#define SIMD_COMMON(wd) \
+ uint8 u8[wd/8]; \
+ int8 i8[wd/8]; \
+ uint16 u16[wd/16]; \
+ int16 i16[wd/16]; \
+ uint32 u32[wd/32]; \
+ int32 i32[wd/32]; \
+ uint64 u64[wd/64]; \
+ int64 i64[wd/64]; \
+ float32 f32[wd/32]; \
+ float64 f64[wd/64]
+
+union gp32 { uint32 u32; int32 i32; PTR32 };
+union gp64 { uint64 u64; int64 i64; PTR64 };
+
+#endif
+
+/*----- Format word layout ------------------------------------------------*/
+
+#define REGF_IXMASK 0x000000ff
+#define REGF_IXSHIFT 0
+/* The index into the vector indicated by `REGF_SRCMASK', if applicable. */
+
+#define REGF_FMTMASK 0x0000ff00
+#define REGF_FMTSHIFT 8
+#define REGF_HEX 0x00000100
+#define REGF_CHR 0x00000200
+#define REGF_FLT 0x00000400
+#define REGF_UNSGN 0x00000800
+#define REGF_SGN 0x00001000
+/* How to format the value(s) found. */
+
+#define REGF_TYMASK 0x00ff0000
+#define REGF_TYSHIFT 16
+#define REGF_80 0x00010000
+#define REGF_64 0x00020000
+#define REGF_32 0x00040000
+#define REGF_16 0x00080000
+#define REGF_8 0x00100000
+/* Size of the value(s) to dump. */
+
+#define REGF_SRCMASK 0x0f000000
+#define REGSRC_ABS 0x01000000 /* absolute address */
+#define REGSRC_GP 0x02000000 /* general-purpose register */
+#define REGSRC_FP 0x03000000 /* floating-point register */
+#define REGSRC_SIMD 0x04000000 /* SIMD vector register */
+#define REGSRC_STMMX 0x05000000 /* x86-specific: x87/MMX register */
+#define REGSRC_SEG 0x06000000 /* x86-specific: segment register */
+/* Where to find the values. */
+
+#define REGF_WDMASK 0xf0000000
+#define REGF_WDSHIFT 28
+/* If we're to print a scalar, this is zero; otherwise, log_2 of the vector
+ * register width, in bits.
+ */
+
+/*----- x86 and AMD64 -----------------------------------------------------*/
+
+#if CPUFAM_X86 || CPUFAM_AMD64
+
+#define REGIX_FLAGS 0
+#define REGIX_IP 1
+#define REGIX_ADDR 2
+#define REGIX_AX 3
+#define REGIX_BX 4
+#define REGIX_CX 5
+#define REGIX_DX 6
+#define REGIX_SI 7
+#define REGIX_DI 8
+#define REGIX_BP 9
+#define REGIX_SP 10
+#if CPUFAM_X86
+# define REGIX_GPLIM 11
+#endif
+#if CPUFAM_AMD64
+# define REGIX_R8 11
+# define REGIX_R9 12
+# define REGIX_R10 13
+# define REGIX_R11 14
+# define REGIX_R12 15
+# define REGIX_R13 16
+# define REGIX_R14 17
+# define REGIX_R15 18
+# define REGIX_GPLIM 19
+#endif
+
+#define REGIX_CS 0
+#define REGIX_DS 1
+#define REGIX_SS 2
+#define REGIX_ES 3
+#define REGIX_FS 4
+#define REGIX_GS 5
+#define REGIX_SEGLIM 6
+
+#define REGIX_FPFLAGS 255
+
+#if !__ASSEMBLER__
+
+#if CPUFAM_X86
+typedef union gp32 gpreg;
+#endif
+#if CPUFAM_AMD64
+typedef union gp64 gpreg;
+#endif
+
+struct gpsave {
+ gpreg gp[REGIX_GPLIM];
+ uint16 seg[REGIX_SEGLIM];
+};
+
+union stmmx {
+ SIMD_COMMON(64);
+#if FLT_RADIX == 2 && LDBL_MANT_DIG == 64
+ long double f80;
+#endif
+unsigned char _pad[16];
+};
+
+union xmm { SIMD_COMMON(128); };
+union ymm { SIMD_COMMON(256); };
+union vreg { union xmm v128[2]; union ymm v256; };
+
+struct fxsave {
+ unsigned short fcw;
+ unsigned short fsw;
+ unsigned char ftw;
+ unsigned char _res0;
+ unsigned short fop;
+#if CPUFAM_X86
+ unsigned int fpu_ip;
+ unsigned short fpu_cs;
+ unsigned short _res1;
+ unsigned int fpu_dp;
+ unsigned short fpu_ds;
+ unsigned short _res2;
+#endif
+#if CPUFAM_AMD64
+ unsigned long long fpu_ip;
+ unsigned long long fpu_dp;
+#endif
+ unsigned int mxcsr;
+ unsigned int mxcsr_mask;
+
+ union stmmx stmmx[8];
+
+#if CPUFAM_X86
+ union xmm xmm[8];
+ unsigned char _pad0[8*16];
+#endif
+#if CPUFAM_AMD64
+ union xmm xmm[16];
+#endif
+
+ unsigned char _pad1[96];
+};
+
+struct xsave_avx {
+#if CPUFAM_X86
+ union xmm ymmh[8];
+ unsigned char _pad0[8*16];
+#endif
+#if CPUFAM_AMD64
+ union xmm ymmh[16];
+#endif
+};
+
+struct regmap {
+ struct gpsave *gp;
+ struct fxsave *fx;
+ struct xsave_avx *avx;
+};
+
+#else
+
+ .extern regdump_gpsave
+ .extern regdump_xtsave
+ .extern regdump_xtrstr
+ .extern regdump_gprstr
+
+ regmap_gp = 0*WORDSZ
+ regmap_fx = 1*WORDSZ
+ regmap_avx = 2*WORDSZ
+ regmap_size = 3*WORDSZ
+
+#define REGDEF_GPX86_COMMON(rn, RN) \
+ regsrc.e##rn = REGSRC_GP | REGIX_##RN; \
+ regty.e##rn = REGF_32; \
+ regfmt.e##rn = REGF_HEX; \
+ regsrc.r##rn = REGSRC_GP | REGIX_##RN; \
+ regty.r##rn = REGF_64; \
+ regfmt.r##rn = REGF_HEX
+
+#define REGDEF_GPX86_ABCD(rn, RN) \
+ regsrc.rn##hl = (4 << REGF_WDSHIFT) | REGSRC_GP | REGIX_##RN##X; \
+ regty.rn##hl = REGF_8; \
+ regfmt.rn##hl = REGF_HEX; \
+ regsrc.rn##l = REGSRC_GP | REGIX_##RN##X; \
+ regty.rn##l = REGF_8; \
+ regfmt.rn##l = REGF_HEX; \
+ regsrc.rn##x = REGSRC_GP | REGIX_##RN##X; \
+ regty.rn##x = REGF_16; \
+ regfmt.rn##x = REGF_HEX; \
+ REGDEF_GPX86_COMMON(rn##x, RN##X)
+REGDEF_GPX86_ABCD(a, A)
+REGDEF_GPX86_ABCD(b, B)
+REGDEF_GPX86_ABCD(c, C)
+REGDEF_GPX86_ABCD(d, D)
+
+ regsrc.eflags = REGSRC_GP | REGIX_FLAGS
+ regty.eflags = REGF_32
+ regty.eflags = 0
+
+#if CPUFAM_AMD64
+ regsrc.rflags = REGSRC_GP | REGIX_FLAGS
+ regty.rflags = REGF_64
+ regty.rflags = 0
+#endif
+
+#define REGDEF_GPX86_XP(rn, RN) \
+ regsrc.rn##l = REGSRC_GP | REGIX_##RN; \
+ regty.rn##l = REGF_8; \
+ regfmt.rn##l = REGF_HEX; \
+ regsrc.rn = REGSRC_GP | REGIX_##RN; \
+ regty.rn = REGF_16; \
+ regfmt.rn = REGF_HEX; \
+ REGDEF_GPX86_COMMON(rn, RN)
+REGDEF_GPX86_XP(ip, IP)
+REGDEF_GPX86_XP(si, SI)
+REGDEF_GPX86_XP(di, DI)
+REGDEF_GPX86_XP(bp, BP)
+REGDEF_GPX86_XP(sp, SP)
+
+#if CPUFAM_AMD64
+# define REGDEF_GPAMD64(i) \
+ regsrc.r##i##b = REGSRC_GP | REGIX_R##i; \
+ regty.r##i##b = REGF_8; \
+ regfmt.r##i##b = REGF_HEX; \
+ regsrc.r##i##w = REGSRC_GP | REGIX_R##i; \
+ regty.r##i##w = REGF_16; \
+ regfmt.r##i##w = REGF_HEX; \
+ regsrc.r##i##d = REGSRC_GP | REGIX_R##i; \
+ regty.r##i##d = REGF_32; \
+ regfmt.r##i##d = REGF_HEX; \
+ regsrc.r##i = REGSRC_GP | REGIX_R##i; \
+ regty.r##i = REGF_64; \
+ regfmt.r##i = REGF_HEX;
+ DOHI8(REGDEF_GPAMD64)
+#endif
+
+#define REGDEF_SEG(rn, RN) \
+ regsrc.rn = REGSRC_SEG | REGIX_##RN; \
+ regty.rn = REGF_16; \
+ regfmt.rn = REGF_HEX
+REGDEF_SEG(ss, SS)
+REGDEF_SEG(cs, CS)
+REGDEF_SEG(ds, DS)
+REGDEF_SEG(es, ES)
+REGDEF_SEG(fs, FS)
+REGDEF_SEG(gs, GS)
+
+#define REGDEF_STMMX(i) \
+ regsrc.st##i = REGSRC_STMMX | i; \
+ regty.st##i = REGF_80; \
+ regfmt.st##i = REGF_FLT; \
+ regsrc.mm##i = (6 << REGF_WDSHIFT) | REGSRC_STMMX | i; \
+ regty.mm##i = REGF_16; \
+ regfmt.mm##i = REGF_HEX;
+DO8(REGDEF_STMMX)
+
+#define REGDEF_SIMD(i) \
+ regsrc.xmm##i = (7 << REGF_WDSHIFT) | REGSRC_SIMD | i; \
+ regty.xmm##i = REGF_32; \
+ regfmt.xmm##i = REGF_HEX; \
+ regsrc.ymm##i = (8 << REGF_WDSHIFT) | REGSRC_SIMD | i; \
+ regty.ymm##i = REGF_32; \
+ regfmt.ymm##i = REGF_HEX;
+DO8(REGDEF_SIMD)
+#if CPUFAM_AMD64
+ DOHI8(REGDEF_SIMD)
+#endif
+
+ REGDUMP_GPSIZE = REGIX_GPLIM*WORDSZ + REGIX_SEGLIM*2
+
+# if CPUFAM_AMD64 && ABI_SYSV
+ REGDUMP_SPADJ = REGDUMP_GPSIZE + WORDSZ + 128
+# else
+ REGDUMP_SPADJ = REGDUMP_GPSIZE + WORDSZ
+# endif
+
+.macro _saveregs addr=nil
+ // Save the registers, leaving r/ebp pointing to the register map.
+
+ // Stash r/eax. This is bletcherous: hope we don't get a signal in
+ // the next few instructions.
+ mov [R_sp(r) - REGDUMP_SPADJ + (REGIX_AX - 1)*WORDSZ], R_a(r)
+
+ .ifnes "\addr", "nil"
+ // Collect the effective address for the following dump, leaving it
+ // in the `addr' slot of the dump.
+ lea R_a(r), \addr
+ mov [R_sp(r) - REGDUMP_SPADJ + (REGIX_ADDR - 1)*WORDSZ], R_a(r)
+ .endif
+
+ // Make space for the register save area. On AMD64 with System/V
+ // ABI, also skip the red zone. Use `lea' here to preserve the
+ // flags.
+ lea R_sp(r), [R_sp(r) - REGDUMP_SPADJ]
+
+ // Save flags and general-purpose registers. On 32-bit x86, we save
+ // ebx here and establish a GOT pointer here for the benefit of the
+ // PLT-indirect calls made later on.
+ pushf
+# if CPUFAM_X86
+ mov [esp + 4*REGIX_BX], ebx
+ ldgot
+# endif
+ callext regdump_gpsave
+
+ // Make space for the extended registers.
+ sub R_sp(r), R_c(r)
+ callext regdump_xtsave
+
+ // Prepare for calling back into C. On 32-bit x86, leave space for
+ // the arguments and set up the GOT pointer; on AMD64 Windows, leave
+ // the `shadow space' for the called-function's arguments. Also,
+ // forcibly align the stack pointer to a 16-byte boundary.
+# if CPUFAM_X86
+ sub esp, 16
+# elif ABI_WIN
+ sub rsp, 32
+# endif
+ and R_sp(r), ~15
+.endm
+
+.macro _rstrregs
+ // Restore registers.
+
+ // We assume r/ebp still points to the register map.
+ callext regdump_xtrstr
+ mov R_sp(r), R_bp(r)
+ callext regdump_gprstr
+ popf
+ lea R_sp(r), [R_sp(r) + REGDUMP_SPADJ]
+.endm
+
+.macro _regbase
+# if CPUFAM_X86
+ mov [esp + 0], ebp
+# elif ABI_SYSV
+ mov rdi, rbp
+# elif ABI_WIN
+ mov rcx, rbp
+# endif
+.endm
+
+.macro _membase
+ mov R_a(r), [R_bp(r) + regmap_gp]
+# if CPUFAM_X86
+ mov eax, [eax + REGIX_ADDR*WORDSZ]
+ mov [esp + 0], eax
+# elif ABI_SYSV
+ mov rdi, [rax + REGIX_ADDR*WORDSZ]
+# elif ABI_WIN
+ mov rcx, [rax + REGIX_ADDR*WORDSZ]
+# endif
+.endm
+
+.macro _reglbl msg
+ .ifeqs "\msg", ""
+# if CPUFAM_X86
+ mov dword ptr [esp + 4], 0
+# elif ABI_SYSV
+ xor esi, esi
+# elif ABI_WIN
+ xor edx, edx
+# endif
+ .else
+# if CPUFAM_X86
+ lea eax, [INTADDR(.L$_reglbl$\@)]
+ mov [esp + 4], eax
+# elif ABI_SYSV
+ lea rsi, [INTADDR(.L$_reglbl$\@)]
+# elif ABI_WIN
+ lea rdx, [INTADDR(.L$_reglbl$\@)]
+# endif
+ _LIT
+.L$_reglbl$\@:
+ .asciz "\msg"
+ _ENDLIT
+ .endif
+.endm
+
+.macro _regfmt arg
+# if CPUFAM_X86
+ mov dword ptr [esp + 8], \arg
+# elif ABI_SYSV
+ mov edx, \arg
+# elif ABI_WIN
+ mov r8d, \arg
+# endif
+.endm
+
+#endif
+
+#endif
+
+/*----- ARM32 -------------------------------------------------------------*/
+
+#if CPUFAM_ARMEL
+
+#if !__ASSEMBLER__
+extern unsigned regdump__flags;
+#endif
+#define REGF_VFP 1u
+#define REGF_D32 2u
+
+#define REGIX_CPSR 16
+#define REGIX_ADDR 17
+#define REGIX_GPLIM 18
+
+#define REGIX_FPSCR 255
+
+#if !__ASSEMBLER__
+
+union neon64 { SIMD_COMMON(64); };
+union neon128 { SIMD_COMMON(128); };
+
+struct gpsave { union gp32 r[REGIX_GPLIM]; };
+
+struct fpsave {
+ unsigned fpscr;
+ unsigned _pad0;
+ union {
+ float32 s[32];
+ union neon64 d[32];
+ union neon128 q[16];
+ } u;
+};
+
+struct regmap {
+ struct gpsave *gp;
+ struct fpsave *fp;
+};
+
+#else
+
+ .extern regdump_gpsave
+ .extern regdump_xtsave
+ .extern regdump_xtrstr
+ .extern regdump_gprstr
+
+ regmap_gp = 0
+ regmap_fp = 4
+ regmap_size = 8
+
+#define REGDEF_GP(i) \
+ regsrc.r##i = REGSRC_GP | i; \
+ regty.r##i = REGF_32; \
+ regfmt.r##i = REGF_HEX;
+DO16(REGDEF_GP)
+
+ regsrc.cpsr = REGSRC_GP | REGIX_CPSR
+ regty.cpsr = REGF_32
+ regfmt.cpsr = 0
+
+#define REGDEF_NEONS(i) \
+ regsrc.s##i = REGSRC_FP | i; \
+ regty.s##i = REGF_32; \
+ regfmt.s##i = REGF_FLT;
+DO32(REGDEF_NEONS)
+
+#define REGDEF_NEOND(i) \
+ regsrc.d##i = (6 << REGF_WDSHIFT) | REGSRC_FP | i; \
+ regty.d##i = REGF_32; \
+ regfmt.d##i = REGF_HEX;
+DO32(REGDEF_NEOND)
+
+#define REGDEF_NEONQ(i) \
+ regsrc.q##i = (7 << REGF_WDSHIFT) | REGSRC_FP | i; \
+ regty.q##i = REGF_32; \
+ regfmt.q##i = REGF_HEX;
+DO16(REGDEF_NEONQ)
+
+ regsrc.fpscr = REGSRC_FP | REGIX_FPSCR
+ regty.fpscr = REGF_32
+ regfmt.fpscr = 0
+
+ REGDUMP_GPSIZE = 4*REGIX_GPLIM
+ REGDUMP_FPSIZE_D16 = 8 + 16*8
+ REGDUMP_FPSIZE_D32 = 8 + 32*8
+
+.macro _saveregs base=nil, off=#0
+ // Save the registers, leaving r4 pointing to the register map.
+
+ // Stash r14. This is bletcherous: hope we don't get a signal in
+ // the next few instructions.
+ str r14, [r13, #-REGDUMP_GPSIZE + 14*4]
+
+ .ifnes "\base,\off", "nil,#0"
+ // Collect the effective address for the following dump, leaving it
+ // in the `addr' slot of the dump.
+ .ifeqs "\base", "nil"
+ adrl r14, \off
+ .else
+ add r14, \base, \off
+ .endif
+ str r14, [r13, #-REGDUMP_GPSIZE + 4*REGIX_ADDR]
+ .endif
+
+ // Make space for the register save area.
+ sub r13, r13, #REGDUMP_GPSIZE
+
+ // Save flags and general-purpose registers.
+ str r12, [r13, #4*12]
+ bl regdump_gpsave
+
+ // Make space for the extended registers.
+ sub r13, r13, r0
+ bl regdump_xtsave
+
+ // Prepare for calling back into C.
+ ldgot
+ mov r0, r13
+ bic r0, r0, #15
+ mov r13, r0
+.endm
+
+.macro _rstrregs
+ // Restore registers.
+
+ // We assume r4 still points to the register map.
+ bl regdump_xtrstr
+ mov r13, r4
+ bl regdump_gprstr
+ ldr r14, [r13, #14*4]
+ add r13, r13, #REGDUMP_GPSIZE
+.endm
+
+.macro _regbase
+ mov r0, r5
+.endm
+
+.macro _membase
+ mov r0, r6
+.endm
+
+.macro _reglbl msg
+ adrl r1, .L$_reglbl$\@
+ _LIT
+.L$_reglbl$\@:
+ .asciz "\msg"
+ .balign 4
+ _ENDLIT
+.endm
+
+.macro _regfmt arg
+ movw r2, #\arg&0xffff
+ movt r2, #(\arg >> 16)&0xffff
+.endm
+
+#endif
+
+#endif
+
+/*----- ARM64 -------------------------------------------------------------*/
+
+#if CPUFAM_ARM64
+
+#define REGIX_NZCV 32
+#define REGIX_PC 33
+#define REGIX_ADDR 34
+#define REGIX_GPLIM 36
+
+#define REGIX_FPFLAGS 255
+
+#if !__ASSEMBLER__
+
+union v128 { SIMD_COMMON(128); };
+
+struct gpsave { union gp64 r[REGIX_GPLIM]; };
+
+struct fpsave {
+ unsigned fpsr, fpcr;
+ union v128 v[32];
+};
+
+struct regmap {
+ struct gpsave *gp;
+ struct fpsave *fp;
+};
+
+#else
+
+ .extern regdump_gpsave
+ .extern regdump_xtsave
+ .extern regdump_xtrstr
+ .extern regdump_gprstr
+
+ regmap_gp = 0
+ regmap_fp = 8
+ regmap_size = 16
+
+#define REGDEF_GP(i) \
+ regsrc.x##i = REGSRC_GP | i; \
+ regty.x##i = REGF_64; \
+ regfmt.x##i = REGF_HEX; \
+ regsrc.w##i = REGSRC_GP | i; \
+ regty.w##i = REGF_32; \
+ regfmt.w##i = REGF_HEX;
+DO32(REGDEF_GP)
+
+ regsrc.sp = REGSRC_GP | 31
+ regty.sp = REGF_64
+ regfmt.sp = REGF_HEX
+
+ regsrc.pc = REGSRC_GP | REGIX_PC
+ regty.pc = REGF_64
+ regfmt.pc = REGF_HEX
+
+ regsrc.nzcv = REGSRC_GP | REGIX_NZCV
+ regty.nzcv = REGF_32
+ regfmt.nzcv = 0
+
+#define REGDEF_FP(i) \
+ regsrc.b##i = REGSRC_FP | i; \
+ regty.b##i = REGF_8; \
+ regfmt.b##i = REGF_HEX; \
+ regsrc.h##i = REGSRC_FP | i; \
+ regty.h##i = REGF_16; \
+ regfmt.h##i = REGF_HEX; \
+ regsrc.s##i = REGSRC_FP | i; \
+ regty.s##i = REGF_32; \
+ regfmt.s##i = REGF_FLT; \
+ regsrc.d##i = REGSRC_FP | i; \
+ regty.d##i = REGF_64; \
+ regfmt.d##i = REGF_FLT; \
+ regsrc.v##i = (7 << REGF_WDSHIFT) | REGSRC_FP | i; \
+ regty.v##i = REGF_32; \
+ regfmt.v##i = REGF_HEX;
+DO32(REGDEF_FP)
+
+ regsrc.fpflags = REGSRC_FP | REGIX_FPFLAGS
+ regty.fpflags = REGF_32
+ regfmt.fpflags = 0
+
+ REGDUMP_GPSIZE = 8*REGIX_GPLIM
+ REGDUMP_FPSIZE = 16 + 16 + 32*16
+
+.macro _saveregs base=nil, off=#0
+ // Save the registers, leaving x20 pointing to the register map.
+
+ // Stash x30. This is bletcherous: hope we don't get a signal in
+ // the next few instructions.
+ str x30, [sp, #-REGDUMP_GPSIZE + 30*8]
+
+ .ifnes "\base,\off", "nil,#0"
+ // Collect the effective address for the following dump, leaving it
+ // in the `addr' slot of the dump.
+ .ifeqs "\base", "nil"
+ adr x30, \off
+ .else
+ add x30, \base, \off
+ .endif
+ str x30, [sp, #-REGDUMP_GPSIZE + 8*REGIX_ADDR]
+ .endif
+
+ // Make space for the register save area.
+ sub sp, sp, #REGDUMP_GPSIZE
+
+ // Save flags and general-purpose registers.
+ stp x16, x17, [sp, #8*16]
+ bl regdump_gpsave
+
+ // Make space for the extended registers.
+ sub sp, sp, x0
+ bl regdump_xtsave
+.endm
+
+.macro _rstrregs
+ // Restore registers.
+
+ // We assume x21 still points to the register map.
+ bl regdump_xtrstr
+ mov sp, x20
+ bl regdump_gprstr
+ ldr x30, [sp, #30*8]
+ add sp, sp, #REGDUMP_GPSIZE
+.endm
+
+.macro _regbase
+ mov x0, x21
+.endm
+
+.macro _membase
+ mov x0, x22
+.endm
+
+.macro _reglbl msg
+ adr x1, .L$_reglbl$\@
+ _LIT
+.L$_reglbl$\@:
+ .asciz "\msg"
+ .balign 4
+ _ENDLIT
+.endm
+
+.macro _regfmt arg
+ movz w2, #\arg&0xffff
+ movk w2, #(\arg >> 16)&0xffff, lsl #16
+.endm
+
+#endif
+
+#endif
+
+/*----- Functions provided ------------------------------------------------*/
+
+/* --- @regdump_init@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: ---
+ *
+ * Use: Performs one-time initialization for register dumping. In
+ * particular, this performs CPU feature detection on platforms
+ * where that is a difficult task: without it, registers
+ * corresponding to optional architectural features can be
+ * neither printed nor preserved by the register-dump machinery.
+ */
+
+#if !__ASSEMBLER__
+extern void regdump_init(void);
+#endif
+
+/* --- @regdump@ --- *
+ *
+ * Arguments: @const void *base@ = pointer to base structure, corresponding
+ * to the @REGF_SRCMASK@ part of @f@
+ * @const char *lbl@ = label to print
+ * @uint32 f@ = format control word; see @REGF_...@
+ *
+ * Returns: ---
+ *
+ * Use: Dump a register value, or chunk of memory.
+ *
+ * This function is not usually called directly; instead, use
+ * the `reg' or `mem' assembler macros.
+ */
+
+#if !__ASSEMBLER__
+extern void regdump(const void *base, const char *lbl, uint32 f);
+#else
+ .extern regdump
+#endif
+
+/* --- @regdump_gp@, @regdump_fp@, @regdump_simd@ --- *
+ *
+ * Arguments: @const struct regmap *map@ = pointer to register map
+ *
+ * Returns: ---
+ *
+ * Use: Dump the general-purpose/floating-point/SIMD registers.
+ *
+ * This function is not usually called directly; instead, use
+ * the `regdump' assembler macro.
+ */
+
+#if !__ASSEMBLER__
+extern void regdump_gp(const struct regmap */*map*/);
+extern void regdump_fp(const struct regmap */*map*/);
+extern void regdump_simd(const struct regmap */*map*/);
+#else
+ .extern regdump_gp
+ .extern regdump_fp
+ .extern regdump_simd
+#endif
+
+/* --- @regdump_freshline@ --- *
+ *
+ * Arguments: ---
+ *
+ * Returns: ---
+ *
+ * Use: Begin a fresh line of output.
+ */
+
+#if !__ASSEMBLER__
+extern void regdump_freshline(void);
+#else
+ .extern regdump_freshline
+#endif
+
+/*----- Main user interface macros ----------------------------------------*/
+
+#if __ASSEMBLER__
+
+.macro terpri
+ _saveregs
+ callext regdump_freshline
+ _rstrregs
+.endm
+
+.macro reg lbl, rn, fmt=0
+ _saveregs
+ _regbase
+ _reglbl "\lbl"
+ .L$reg.fmt$\@ = regsrc.\rn | \fmt | \
+ (((\fmt®F_TYMASK) == 0)®ty.\rn) | \
+ (((\fmt®F_FMTMASK) == 0)®fmt.\rn)
+ _regfmt .L$reg.fmt$\@
+ callext regdump
+ _rstrregs
+.endm
+
+.macro mem lbl, addr, fmt=0
+ _saveregs \addr
+ _membase
+ _reglbl "\lbl"
+ .L$mem.fmt$\@ = REGSRC_ABS | \fmt | \
+ (((\fmt®F_TYMASK) == 0)®F_32) | \
+ (((\fmt®F_FMTMASK) == 0)®F_HEX)
+ _regfmt .L$mem.fmt$\@
+ callext regdump
+ _rstrregs
+.endm
+
+.macro regdump gp=nil, fp=nil, simd=nil
+ _saveregs
+ .ifnes "\gp", "nil"
+ _regbase
+ callext regdump_gp
+ .endif
+ .ifnes "\fp", "nil"
+ _regbase
+ callext regdump_fp
+ .endif
+ .ifnes "\simd", "nil"
+ _regbase
+ callext regdump_simd
+ .endif
+ _rstrregs
+.endm
+
+#endif
+
+/*----- That's all, folks -------------------------------------------------*/
+
+#ifdef __cplusplus
+ }
+#endif
+
+#endif
--- /dev/null
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .text
+
+ .p2align 5
+vec:
+ .long 1, 2, 3, 4, 5, 6, 7, 8
+
+FUNC(main)
+
+ pushreg x29, x30
+ setfp
+ endprologue
+
+ bl regdump_init
+
+ cmp x0, x0
+ reg "simd", v1
+
+ mov w0, #0
+ dropfp
+ popreg x29, x30
+ ret
+
+ENDFUNC
--- /dev/null
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .text
+ .arch armv7-a
+
+ .p2align 5
+vec:
+ .long 1, 2, 3, 4, 5, 6, 7, 8
+
+FUNC(main)
+
+ pushreg r11, r14
+ setfp
+ endprologue
+
+ bl regdump_init
+
+ cmp r0, r0
+ regdump gp=t
+ reg "general purpose", r1
+ reg " flags", cpsr
+ reg " simd", q0, fmt=REGF_HEX | REGF_32 | REGF_16
+ reg " float", d0, fmt=REGF_FLT | REGF_64
+ reg " float", s0
+ reg " float status", fpscr
+
+ mov r0, #0
+ dropfp
+ popreg r11, pc
+
+ENDFUNC
--- /dev/null
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .text
+
+ .p2align 5
+vec:
+ .long 1, 2, 3, 4, 5, 6, 7, 8
+
+FUNC(main)
+
+ pushreg R_bp(r)
+ setfp
+ and R_sp(r), ~15
+ endprologue
+
+ fldz
+ fld1
+ fldpi
+ fldl2t
+ fldl2e
+ fldlg2
+ fldln2
+ //fld1
+
+ ldgot
+ movdqa xmm2, [INTADDR(vec)]
+ //vmovdqa ymm2, [INTADDR(vec)]
+
+ reg "my fp", xmm2, REGF_FLT | REGF_64 | REGF_32
+
+ xor eax, eax
+ dropfp
+ popreg R_bp(r)
+ ret
+
+ENDFUNC
catacomb_DEFINE_CPU_OR_ABI([ABI], [3], [abi],
[ABI_], [Define if target ABI is \`$][3\'.])
+AC_ARG_ENABLE([asm-debug],
+ AS_HELP_STRING([--enable-asm-debug],
+ [enable assembler debugging features]),
+ [mdw_asm_debug=$enableval], [mdw_asm_debug=no])
+case $CPUFAM in nil) mdw_asm_debug=no ;; esac
+case mdw_asm_debug in
+ no) ;;
+ *) AC_DEFINE([ENABLE_ASM_DEBUG], [1],
+ [Define to enable assembler-level debugging.]) ;;
+esac
+
dnl Establish Automake conditions for things.
catacomb_CPU_FAMILIES([catacomb_CLEAR_FLAGS])
m4_define([catacomb_COND_CPU],
m4_define([catacomb_seen_abi/$3], [t])])])
catacomb_CPU_FAMILIES([catacomb_COND_CPU])
AM_CONDITIONAL([KNOWN_CPUFAM], [test x$CPUFAM != xnil])
+AM_CONDITIONAL([ASM_DEBUG], [test x$mdw_asm_debug != xno])
dnl Report on what we found.
case $CPUFAM in