From: Mark Wooding Date: Sun, 18 Aug 2019 01:08:07 +0000 (+0100) Subject: base/regdump.[ch], etc.: Fancy register dumping infrastructure. X-Git-Tag: 2.5.0~14^2~9 X-Git-Url: https://git.distorted.org.uk/~mdw/catacomb/commitdiff_plain/4bc8424a9fde81305a5bcd04708492f3a067b438 base/regdump.[ch], etc.: Fancy register dumping infrastructure. --- diff --git a/base/Makefile.am b/base/Makefile.am index 8de4d69c..145f9c35 100644 --- a/base/Makefile.am +++ b/base/Makefile.am @@ -66,4 +66,20 @@ EXTRA_DIST += t/rsvr ## Base definitions for assembler source. EXTRA_DIST += asm-common.h +if ASM_DEBUG +libbase_la_SOURCES += regdump.c regdump.h +if CPUFAM_X86 +libbase_la_SOURCES += regdump-x86ish.S +endif +if CPUFAM_AMD64 +libbase_la_SOURCES += regdump-x86ish.S +endif +if CPUFAM_ARMEL +libbase_la_SOURCES += regdump-arm.S +endif +if CPUFAM_ARM64 +libbase_la_SOURCES += regdump-arm64.S +endif +endif + ###----- That's all, folks -------------------------------------------------- diff --git a/base/asm-common.h b/base/asm-common.h index 6ec238ff..e8f6445c 100644 --- a/base/asm-common.h +++ b/base/asm-common.h @@ -549,117 +549,6 @@ name: #endif -#if CPUFAM_X86 - -.macro _reg.0 - // Stash GP registers and establish temporary stack frame. - pushfd - push eax - push ecx - push edx - push ebp - mov ebp, esp - and esp, ~15 - sub esp, 512 - fxsave [esp] -.endm - -.macro _reg.1 -.endm - -.macro _reg.2 -.endm - -.macro _reg.3 fmt - // Print FMT and the other established arguments. - lea eax, .L$_reg$msg.\@ - push eax - call printf - jmp .L$_reg$cont.\@ -.L$_reg$msg.\@: - .ascii ";; \fmt\n\0" -.L$_reg$cont.\@: - mov eax, ebp - and eax, ~15 - sub eax, 512 - fxrstor [eax] - mov esp, ebp - pop ebp - pop edx - pop ecx - pop eax - popfd -.endm - -.macro msg msg - _reg.0 - _reg.1 - _reg.2 - _reg.3 "\msg" -.endm - -.macro reg r, msg - _reg.0 - .ifeqs "\r", "esp" - lea eax, [ebp + 20] - push eax - .else - .ifeqs "\r", "ebp" - push [ebp] - .else - push \r - .endif - .endif - _reg.1 - _reg.2 - _reg.3 "\msg: \r = %08x" -.endm - -.macro xmmreg r, msg - _reg.0 - _reg.1 - _reg.2 - movdqu xmm0, \r - pshufd xmm0, xmm0, 0x1b - sub esp, 16 - movdqa [esp], xmm0 - _reg.3 "\msg: \r = %08x %08x %08x %08x" -.endm - -.macro mmreg r, msg - _reg.0 - _reg.1 - _reg.2 - pshufw \r, \r, 0x4e - sub esp, 8 - movq [esp], \r - _reg.3 "\msg: \r = %08x %08x" -.endm - -.macro freg i, msg - _reg.0 - _reg.1 - _reg.2 - finit - fldt [esp + 32 + 16*\i] - sub esp, 12 - fstpt [esp] - _reg.3 "\msg: st(\i) = %.20Lg" -.endm - -.macro fxreg i, msg - _reg.0 - _reg.1 - _reg.2 - finit - fldt [esp + 32 + 16*\i] - sub esp, 12 - fstpt [esp] - _reg.3 "\msg: st(\i) = %La" -.endm - -#endif - ///-------------------------------------------------------------------------- /// ARM-specific hacking. diff --git a/base/regdump-arm.S b/base/regdump-arm.S new file mode 100644 index 00000000..963a60ee --- /dev/null +++ b/base/regdump-arm.S @@ -0,0 +1,184 @@ +/// -*- mode: asm; asm-comment-char: ?/ -*- +/// +/// Register dump and debugging for 32-bit ARM +/// +/// (c) 2019 Straylight/Edgeware +/// + +///----- Licensing notice --------------------------------------------------- +/// +/// This file is part of Catacomb. +/// +/// Catacomb is free software: you can redistribute it and/or modify it +/// under the terms of the GNU Library General Public License as published +/// by the Free Software Foundation; either version 2 of the License, or +/// (at your option) any later version. +/// +/// Catacomb is distributed in the hope that it will be useful, but +/// WITHOUT ANY WARRANTY; without even the implied warranty of +/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +/// Library General Public License for more details. +/// +/// You should have received a copy of the GNU Library General Public +/// License along with Catacomb. If not, write to the Free Software +/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +/// USA. + +///-------------------------------------------------------------------------- +/// Preliminaries. + +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .arch armv7-a + .fpu neon + + .text + +///-------------------------------------------------------------------------- +/// Main code. + +FUNC(regdump_gpsave) + endprologue + // On entry, r13 should point to `REGDUMP_GPSIZE' bytes of + // word-aligned storage to be the general-purpose save area, with r12 + // and r14 already saved. On exit, the initial registers are saved + // in this space, and modified: r4 points to the general-purpose save + // area, r6 holds the focus address (possibly already saved), r0 + // contains the number of bytes required in the extended save area, + // and other general-purpose registers are clobbered or used to + // communicate with `regdump_xtsave' below. Doing anything other + // than lowering the stack pointer and calling `regdump_xtsave' is + // not recommended. + + // Save the easy registers. + stmia r13, {r0-r11} + mov r4, r13 + + // Determine the previous stack pointer and save it. + add r0, r4, #REGDUMP_GPSIZE + str r0, [r4, #13*4] + + // Capture the status flags and return address. If the return + // address has its low bit set, then the caller was in Thumb state: + // clear the bit from the reconstructed PC, and set the corresponding + // CPSR bit. + mrs r0, cpsr + tst r14, #1 + bic r1, r14, #1 + orrne r0, r0, #0x00000020 + str r0, [r13, #4*REGIX_CPSR] + str r1, [r13, #15*4] + + // Load the focus address and save it as r6. + ldr r6, [r4, #4*REGIX_ADDR] + + // Determine the extended save area size. + ldgot + mov r0, #8 + 8 + leaext r12, regdump__flags + ldr r12, [r12] + tst r12, #REGF_VFP + addne r0, r0, #REGDUMP_FPSIZE_D16 + tstne r12, #REGF_D32 + addne r0, r0, #REGDUMP_FPSIZE_D32 - REGDUMP_FPSIZE_D16 + + // Done. + bx r14 + +ENDFUNC + +FUNC(regdump_gprstr) + endprologue + // On entry, r4 points to a general-purpose save area, established by + // `regdump_gpsave'. On exit, the general-purpose registers (other + // than r13 and r14) are restored to their original values. + + // Restore the processor flags. + ldr r0, [r4, #4*REGIX_CPSR] + msr cpsr_fs, r0 + + // Load the easy registers. + ldmia r4, {r0-r12} + + // Done. + bx r14 + +ENDFUNC + +FUNC(regdump_xtsave) + endprologue + // On entry, r13 points to an extended save area, of size determined + // by `regdump_gpsave' above. On exit, the save area is filled in + // and a handy map placed at its base. + + // Set up the map/extended save area pointer. + add r5, r13, #7 + bic r5, r5, #7 + + // Start by filling in the easy part of the map. + str r4, [r5, #regmap_gp] + + // Fetch the flags explaining what to do. + ldgot + leaext r12, regdump__flags + ldr r12, [r12] + + // Figure out whether there are VFP/NEON registers. + tst r12, #REGF_VFP + moveq r3, #0 + addne r3, r5, #regmap_size + str r3, [r5, #regmap_fp] + beq 9f + + // Get the FP status register. + vmrs r0, fpscr + str r0, [r3], #8 + + // At least the first 16. + vstmia r3!, {d0-d15} + + // Maybe the other 16 too. + tst r12, #REGF_D32 + vstmiane r3!, {d16-d31} + + // Done. +9: bx r14 + +ENDFUNC + +FUNC(regdump_xtrstr) + endprologue + // On entry, r5 points to a register-save map. On exit, the extended + // registers are restored from the save area, r4 (pointing to the + // general-purpose save area) is preserved, and the other general + // registers are clobbered. + + // Fetch the flags explaining what to do. + ldgot + leaext r12, regdump__flags + ldr r12, [r12] + + // Figure out if there are VFP/NEON registers. + tst r12, #REGF_VFP + beq 9f + ldr r3, [r5, #regmap_fp] + + // Load the FP status register. + ldr r0, [r3], #8 + vmsr fpscr, r0 + + // Load the first 16 registers. + vldmia r3!, {d0-d15} + + // And maybe the other 16. + tst r12, #REGF_D32 + vldmiane r3!, {d16-d31} + + // Done. +9: bx r14 + +ENDFUNC + +///----- That's all, folks -------------------------------------------------- diff --git a/base/regdump-arm64.S b/base/regdump-arm64.S new file mode 100644 index 00000000..81c9f8e7 --- /dev/null +++ b/base/regdump-arm64.S @@ -0,0 +1,204 @@ +/// -*- mode: asm; asm-comment-char: ?/ -*- +/// +/// Register dump and debugging for 64-bit ARM +/// +/// (c) 2019 Straylight/Edgeware +/// + +///----- Licensing notice --------------------------------------------------- +/// +/// This file is part of Catacomb. +/// +/// Catacomb is free software: you can redistribute it and/or modify it +/// under the terms of the GNU Library General Public License as published +/// by the Free Software Foundation; either version 2 of the License, or +/// (at your option) any later version. +/// +/// Catacomb is distributed in the hope that it will be useful, but +/// WITHOUT ANY WARRANTY; without even the implied warranty of +/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +/// Library General Public License for more details. +/// +/// You should have received a copy of the GNU Library General Public +/// License along with Catacomb. If not, write to the Free Software +/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +/// USA. + +///-------------------------------------------------------------------------- +/// Preliminaries. + +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .arch armv8-a + + .text + +///-------------------------------------------------------------------------- +/// Main code. + +FUNC(regdump_gpsave) + endprologue + // On entry, sp should point to `REGDUMP_GPSIZE' bytes of + // doubleword-aligned storage to be the general-purpose save area, + // with x16, x17, and x30 already saved. On exit, the initial + // registers are saved in this space, and modified: x20 points to the + // general-purpose save area, x22 holds the focus address (possibly + // already saved), x0 contains the number of bytes required in the + // extended save area, and other general-purpose registers are + // clobbered or used to communicate with `regdump_xtsave' below. + // Doing anything other than lowering the stack pointer and calling + // `regdump_xtsave' is not recommended. + + // Save the easy registers. + stp x0, x1, [sp, #0] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + stp x8, x9, [sp, #64] + stp x10, x11, [sp, #80] + stp x12, x13, [sp, #96] + stp x14, x15, [sp, #112] + stp x18, x19, [sp, #144] + stp x20, x21, [sp, #160] + stp x22, x23, [sp, #176] + stp x24, x25, [sp, #192] + stp x26, x27, [sp, #208] + stp x28, x29, [sp, #224] + + mov x20, sp + + // Determine the previous stack pointer and save it. + add x0, x20, #REGDUMP_GPSIZE + str x0, [x20, #31*8] + + // Capture the status flags. + mrs x0, nzcv + str x0, [x20, #8*REGIX_NZCV] + + // Set the return address as our PC. + str x30, [x20, #8*REGIX_PC] + + // Load the focus address and save it as x22. + ldr x22, [x20, #8*REGIX_ADDR] + + // Determine the extended save area size. + mov x0, #REGDUMP_FPSIZE + + // Done. + ret + +ENDFUNC + +FUNC(regdump_gprstr) + endprologue + // On entry, x20 points to a general-purpose save area, established + // by `regdump_gpsave'. On exit, the general-purpose registers + // (other than x30 and sp) are restored to their original values. + + // Restore the processor flags. + ldr w0, [x20, #8*REGIX_NZCV] + msr nzcv, x0 + + // Load the easy registers. + ldp x0, x1, [sp, #0] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + ldp x8, x9, [sp, #64] + ldp x10, x11, [sp, #80] + ldp x12, x13, [sp, #96] + ldp x14, x15, [sp, #112] + ldp x16, x17, [sp, #128] + ldp x18, x19, [sp, #144] + ldp x20, x21, [sp, #160] + ldp x22, x23, [sp, #176] + ldp x24, x25, [sp, #192] + ldp x26, x27, [sp, #208] + ldp x28, x29, [sp, #224] + + // Done. + ret + +ENDFUNC + +FUNC(regdump_xtsave) + endprologue + // On entry, sp points to an extended save area, of size determined + // by `regdump_gpsave' above. On exit, the save area is filled in + // and a handy map placed at its base. + + // Set up the map/extended save area pointer. + mov x21, sp + + // Start by filling in the easy part of the map. + add x0, x21, #regmap_size + stp x20, x0, [x21] + + // Get the FP status register. + mrs x1, fpsr + mrs x2, fpcr + stp w1, w2, [x0], #8 + + // Store the SIMD registers. + stp q0, q1, [x0, #0] + stp q2, q3, [x0, #32] + stp q4, q5, [x0, #64] + stp q6, q7, [x0, #96] + stp q8, q9, [x0, #128] + stp q10, q11, [x0, #160] + stp q12, q13, [x0, #192] + stp q14, q15, [x0, #224] + stp q16, q17, [x0, #256] + stp q18, q19, [x0, #288] + stp q20, q21, [x0, #320] + stp q22, q23, [x0, #352] + stp q24, q25, [x0, #384] + stp q26, q27, [x0, #416] + stp q28, q29, [x0, #448] + stp q30, q31, [x0, #480] + + // Done. + ret + +ENDFUNC + +FUNC(regdump_xtrstr) + endprologue + // On entry, x21 points to a register-save map. On exit, the + // extended registers are restored from the save area, x20 (pointing + // to the general-purpose save area) is preserved, and the other + // general registers are clobbered. + + ldr x0, [x21, #regmap_fp] + + // Load the FP status and control registers. + ldp w1, w2, [x0], #8 + msr fpsr, x1 + msr fpcr, x2 + + // Load the SIMD registers. + ldp q0, q1, [x0, #0] + ldp q2, q3, [x0, #32] + ldp q4, q5, [x0, #64] + ldp q6, q7, [x0, #96] + ldp q8, q9, [x0, #128] + ldp q10, q11, [x0, #160] + ldp q12, q13, [x0, #192] + ldp q14, q15, [x0, #224] + ldp q16, q17, [x0, #256] + ldp q18, q19, [x0, #288] + ldp q20, q21, [x0, #320] + ldp q22, q23, [x0, #352] + ldp q24, q25, [x0, #384] + ldp q26, q27, [x0, #416] + ldp q28, q29, [x0, #448] + ldp q30, q31, [x0, #480] + + // Done. + ret + +ENDFUNC + +///----- That's all, folks -------------------------------------------------- diff --git a/base/regdump-x86ish.S b/base/regdump-x86ish.S new file mode 100644 index 00000000..e4dd8e80 --- /dev/null +++ b/base/regdump-x86ish.S @@ -0,0 +1,277 @@ +/// -*- mode: asm; asm-comment-char: ?/ -*- +/// +/// Register dump and debugging for x86 +/// +/// (c) 2019 Straylight/Edgeware +/// + +///----- Licensing notice --------------------------------------------------- +/// +/// This file is part of Catacomb. +/// +/// Catacomb is free software: you can redistribute it and/or modify it +/// under the terms of the GNU Library General Public License as published +/// by the Free Software Foundation; either version 2 of the License, or +/// (at your option) any later version. +/// +/// Catacomb is distributed in the hope that it will be useful, but +/// WITHOUT ANY WARRANTY; without even the implied warranty of +/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +/// Library General Public License for more details. +/// +/// You should have received a copy of the GNU Library General Public +/// License along with Catacomb. If not, write to the Free Software +/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, +/// USA. + +///-------------------------------------------------------------------------- +/// Preliminaries. + +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .text + .arch pentium4 + .arch .xsave + +///-------------------------------------------------------------------------- +/// Main code. + +FUNC(regdump_gpsave) + endprologue + // On entry, r/esp should point to a return address and + // `REGDUMP_GPSIZE' bytes of word-aligned storage to be the + // general-purpose save area, with flags saved in the bottom word, + // r/eax saved in the fourth, and (on 32-bit x86) ebx in the fifth. + // On exit, the initial registers are saved in this space, and + // modified: r/ebp points to the general-purpose save area, ecx + // contains the number of bytes required in the extended save area, + // ebx is preserved on 32-bit x86, and other general-purpose + // registers are clobbered or used to communicate with + // `regdump_xtsave' below. Doing anything other than lowering the + // stack pointer and calling `regdump_xtsave' is not recommended. + + // Other code will insist that df is clear. + cld + + // Save r/ebp and establish it pointing to the save area. + mov [R_sp(r) + WORDSZ + REGIX_BP*WORDSZ], R_bp(r) + lea R_bp(r), [R_sp(r) + WORDSZ] + + // Save the other easy general-purpose registers. +#if !CPUFAM_X86 + mov [R_bp(r) + REGIX_BX*WORDSZ], R_b(r) +#endif + mov [R_bp(r) + REGIX_CX*WORDSZ], R_c(r) + mov [R_bp(r) + REGIX_DX*WORDSZ], R_d(r) + mov [R_bp(r) + REGIX_SI*WORDSZ], R_si(r) + mov [R_bp(r) + REGIX_DI*WORDSZ], R_di(r) +#if CPUFAM_AMD64 + mov [R_bp(r) + REGIX_R8*WORDSZ], R_r8(r) + mov [R_bp(r) + REGIX_R9*WORDSZ], R_r9(r) + mov [R_bp(r) + REGIX_R10*WORDSZ], R_r10(r) + mov [R_bp(r) + REGIX_R11*WORDSZ], R_r11(r) + mov [R_bp(r) + REGIX_R12*WORDSZ], R_r12(r) + mov [R_bp(r) + REGIX_R13*WORDSZ], R_r13(r) + mov [R_bp(r) + REGIX_R14*WORDSZ], R_r14(r) + mov [R_bp(r) + REGIX_R15*WORDSZ], R_r15(r) +#endif + + // Determine the previous stack pointer and save it. +#if CPUFAM_AMD64 && ABI_SYSV + lea R_a(r), [R_bp(r) + 128 + REGDUMP_GPSIZE] +#else + lea R_a(r), [R_bp(r) + REGDUMP_GPSIZE] +#endif + mov [R_bp(r) + REGIX_SP*WORDSZ], R_a(r) + + // Collect the return address and save it as r/eip. + mov R_a(r), [R_sp(r)] + mov [R_bp(r) + REGIX_IP*WORDSZ], R_a(r) + + // Save the segment registers. + lea R_a(r), [R_bp(r) + REGIX_GPLIM*WORDSZ] + mov [R_a(r) + 2*REGIX_CS], cs + mov [R_a(r) + 2*REGIX_DS], ds + mov [R_a(r) + 2*REGIX_SS], ss + mov [R_a(r) + 2*REGIX_ES], es + mov [R_a(r) + 2*REGIX_FS], fs + mov [R_a(r) + 2*REGIX_GS], gs + + // Determine the extended save area size. Preserve ebx on 32-bit x86 + // here, because the caller needs it for PLT-indirect calls. +#if CPUFAM_X86 + push ebx +#endif + mov eax, 0x01 + cpuid + test ecx, 1 << 26 + je 1f + + mov eax, 0x0d + mov ecx, 0x00 + cpuid + add ecx, regmap_size + 64 // map + align + jmp 8f + +1: mov ecx, 512 + regmap_size + 16 // fxsave + map + align + + // Done. +8: +#if CPUFAM_X86 + pop ebx +#endif + ret + +ENDFUNC + +FUNC(regdump_gprstr) + endprologue + // On entry, r/ebp points to a general-purpose save area, established + // by `regdump_gpsave'. On exit, the general-purpose registers + // (other than the stack pointer) are restored to their original + // values. + + // We assume nobody actually fiddled with the segment registers. So + // just the actual integer registers to do. + mov R_a(r), [R_bp(r) + REGIX_AX*WORDSZ] + mov R_b(r), [R_bp(r) + REGIX_BX*WORDSZ] + mov R_c(r), [R_bp(r) + REGIX_CX*WORDSZ] + mov R_d(r), [R_bp(r) + REGIX_DX*WORDSZ] + mov R_si(r), [R_bp(r) + REGIX_SI*WORDSZ] + mov R_di(r), [R_bp(r) + REGIX_DI*WORDSZ] +#if CPUFAM_AMD64 + mov R_r8(r), [R_bp(r) + REGIX_R8*WORDSZ] + mov R_r9(r), [R_bp(r) + REGIX_R9*WORDSZ] + mov R_r10(r), [R_bp(r) + REGIX_R10*WORDSZ] + mov R_r11(r), [R_bp(r) + REGIX_R11*WORDSZ] + mov R_r12(r), [R_bp(r) + REGIX_R12*WORDSZ] + mov R_r13(r), [R_bp(r) + REGIX_R13*WORDSZ] + mov R_r14(r), [R_bp(r) + REGIX_R14*WORDSZ] + mov R_r15(r), [R_bp(r) + REGIX_R15*WORDSZ] +#endif + mov R_bp(r), [R_bp(r) + REGIX_BP*WORDSZ] + + // Done. + ret + +ENDFUNC + +#ifdef CPUFAM_AMD64 +# define fxsave fxsave64 +# define fxrstor fxrstor64 +# define xsave xsave64 +# define xrstor xrstor64 +#endif + +FUNC(regdump_xtsave) + endprologue + // On entry, r/esp points to a return address and extended save area, + // of size determined by `regdump_gpsave' above. On exit, the save + // area is filled in and a handy map placed at its base, the x87 + // floating-point state is reset, r/ebp is left pointing to the + // register map, ebx is preserved on 32-bit x86, and the other + // general registers are clobbered. + + // Start by filling in the easy parts of the map. + mov [R_sp(r) + WORDSZ + regmap_gp], R_bp(r) + lea R_bp(r), [R_sp(r) + WORDSZ] + + xor eax, eax // clears rax too on amd64 + mov [R_bp(r) + regmap_avx], R_a(r) + + // Find out whether we use `xsave'. (Preserve ebx.) +#if CPUFAM_X86 + push ebx +#endif + mov eax, 0x01 + cpuid + test ecx, 1 << 26 + je 5f + + // We have the `xsave' machinery. Select the base address. + lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 63] + and R_si(r), ~63 + mov [R_bp(r) + regmap_fx], R_si(r) + + // Clear out the header area. + xor eax, eax + lea R_di(r), [R_si(r) + 512] + mov ecx, 16 + rep stosd + + // Save the registers. + mov eax, 0x00000007 + xor edx, edx + xsave [R_si(r)] + + // Establish the AVX pointer, if available. + test dword ptr [R_si(r) + 512], 4 // = xstate_bv + je 8f + + mov eax, 13 + mov ecx, 2 + cpuid + add R_b(r), R_si(r) + mov [R_bp(r) + regmap_avx], R_b(r) + + jmp 8f + + // We have only `fxsave'. Set the base address. +5: lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 15] + and R_si(r), ~15 + mov [R_bp(r) + regmap_fx], R_si(r) + + // Save the registers. + fxsave [R_si(r)] + + // Clear the x87 state; otherwise it can cause trouble later. +8: fninit + + // Done. +#if CPUFAM_X86 + pop ebx +#endif + ret + +ENDFUNC + +FUNC(regdump_xtrstr) + endprologue + // On entry, r/ebp points to a register-save map. On exit, the + // extended registers are restored from the save area; r/ebp is left + // pointing to the general-purpose save area, ebx is preserved on + // 32-bit x86, and the other general registers are clobbered. + + // Find the extended register dump. + mov R_si(r), [R_bp(r) + regmap_fx] + + // Probe to find out whether we have `xsave'. +#if CPUFAM_X86 + push ebx +#endif + mov eax, 0x01 + cpuid + test ecx, 1 << 26 + je 1f + + // We have the `xsave' machinery. + mov eax, 0x00000007 + xor edx, edx + xrstor [R_si(r)] + jmp 8f + + // We must fake it up. +1: fxrstor [R_si(r)] + + // Done. +8: mov R_bp(r), [R_bp(r) + regmap_gp] +#if CPUFAM_X86 + pop ebx +#endif + ret + +ENDFUNC + +///----- That's all, folks -------------------------------------------------- diff --git a/base/regdump.c b/base/regdump.c new file mode 100644 index 00000000..d4f5fdec --- /dev/null +++ b/base/regdump.c @@ -0,0 +1,945 @@ +/* -*-c-*- + * + * Register dumping and other diagnostic tools for assembler code + * + * (c) 2016 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, + * MA 02111-1307, USA. + */ + +/*----- Header files ------------------------------------------------------*/ + +#include "config.h" + +#include +#include +#include +#include + +#include +#include + +#include "dispatch.h" +#include "regdump.h" + +/*----- Low-level printing ------------------------------------------------*/ + +/* Currently these are good for all of our targets. */ +#define STEP_8 1 +#define TY_HEX_8 uint8 +#define P_HEX_8 "0x%02x" +#define TY_UNSGN_8 uint8 +#define P_UNSGN_8 "%3u" +#define PV_CHR_8 " `%c'" +#define PV_HEX_8 " %02x" +#define PV_UNSGN_8 "%4u" + +#define STEP_16 2 +#define TY_HEX_16 uint16 +#define P_HEX_16 "0x%04x" +#define TY_UNSGN_16 uint16 +#define P_UNSGN_16 "%5u" +#define TY_SGN_16 int16 +#define P_SGN_16 "%6d" +#define PV_HEX_16 " 0x%04x" +#define PV_UNSGN_16 "%9u" +#define PV_SGN_16 "%9d" + +#define STEP_32 4 +#define TY_HEX_32 uint32 +#define P_HEX_32 "0x%08x" +#define TY_UNSGN_32 uint32 +#define P_UNSGN_32 "%10u" +#define TY_SGN_32 int32 +#define P_SGN_32 "%11d" +#define TY_FLT_32 float +#define P_FLT_32 "%15.9g" +#define PV_HEX_32 " 0x%08x" +#define PV_UNSGN_32 "%19u" +#define PV_SGN_32 "%19d" +#define PV_FLT_32 "%19.9g" + +#if ULONG_MAX >> 31 > 0xffffffff +# define PL64 "l" +#else +# define PL64 "ll" +#endif +#define STEP_64 8 +#define TY_HEX_64 uint64 +#define P_HEX_64 "0x%016"PL64"x" +#define TY_UNSGN_64 uint64 +#define P_UNSGN_64 "%20"PL64"u" +#define TY_SGN_64 int64 +#define P_SGN_64 "%20"PL64"d" +#define TY_FLT_64 double +#define P_FLT_64 "%24.17g" +#define PV_HEX_64 " 0x%016"PL64"x" +#define PV_UNSGN_64 "%39"PL64"u" +#define PV_SGN_64 "%39"PL64"d" +#define PV_FLT_64 "%39.17g" + +#if CPUFAM_X86 +# define STEP_80 12 +#endif +#if CPUFAM_AMD64 +# define STEP_80 16 +#endif +#define TY_FLT_80 long double +#define P_FLT_80 "%29.21Lg" +#define PV_FLT_80 P_FLT_80 + +#if CPUFAM_X86 || CPUFAM_AMD64 +# define ARCH_FORMATS(_) \ + _(80, FLT) +#endif +#ifndef ARCH_FORMATS +# define ARCH_FORMATS(_) +#endif + +#define FORMATS(_) \ + ARCH_FORMATS(_) \ + _(64, HEX) _(64, FLT) _(64, UNSGN) _(64, SGN) \ + _(32, HEX) _(32, FLT) _(32, UNSGN) _(32, SGN) \ + _(16, HEX) _(16, UNSGN) _(16, SGN) \ + _(8, HEX) _(8, CHR) _(8, UNSGN) + +struct fmtinfo { + const unsigned char *p; + unsigned wd, f; +#define FMTF_VECTOR 1u +}; + +#define FMTFUNC_STD(w, fmt) \ + static void dump_##fmt##_##w(struct fmtinfo *fmt) \ + { \ + TY_##fmt##_##w x = *(const TY_##fmt##_##w *)fmt->p; \ + \ + if (fmt->f&FMTF_VECTOR) printf(PV_##fmt##_##w, x); \ + else printf(P_##fmt##_##w, x); \ + fmt->p += STEP_##w; fmt->wd += 8*STEP_##w; \ + } + +#define FMTFUNC_HEX(w) FMTFUNC_STD(w, HEX) +#define FMTFUNC_UNSGN(w) FMTFUNC_STD(w, UNSGN) +#define FMTFUNC_SGN(w) FMTFUNC_STD(w, SGN) +#define FMTFUNC_FLT(w) FMTFUNC_STD(w, FLT) +#define FMTFUNC_CHR(w) + +static void dump_CHR_8(struct fmtinfo *fmt) +{ + unsigned char x = *(const unsigned char *)fmt->p; + + if (x < 32 || x > 126) printf("\\x%02x", x); + else printf(" `%c'", x); + fmt->p += 1; fmt->wd += 8; +} + +#define FMTFUNC(w, fmt) FMTFUNC_##fmt(w) +FORMATS(FMTFUNC) +#undef FMTFUNC + +static const struct fmttab { + uint32 mask; + void (*fmt)(struct fmtinfo *); +} fmttab[] = { +#define FMTTAB(wd, fmt) { REGF_##fmt | REGF_##wd, dump_##fmt##_##wd }, + FORMATS(FMTTAB) +#undef FMTTAB + { 0, 0 } +}; + +/*----- Common subroutines ------------------------------------------------*/ + +/* --- @regwd@ --- * + * + * Arguments: @uint32 f@ = format control word; see @REGF_...@ + * + * Returns: The actual width of the operand, in bits. + * + * Use: If the operand is a vector (the @REGF_WDMASK@ field is + * nonzero) then return the width it denotes; otherwise, return + * the largest width implied by the @REGF_TYMASK@ field. + */ + +static unsigned regwd(uint32 f) +{ + unsigned wd = 1 << ((f®F_WDMASK) >> REGF_WDSHIFT); + + if (wd > 1) return (wd); + else if (f®F_80) return (80); + else if (f®F_64) return (64); + else if (f®F_32) return (32); + else if (f®F_16) return (16); + else if (f®F_8) return (8); + else { assert(0); return (1); } +} + +/* --- @regname@ --- * + * + * Arguments: @char *buf = pointer to output buffer@ + * @uint32 f@ = format control word; see @REGF_...@ + * + * Returns: Pointer to name string. + * + * Use: Return a pointer to the name of the register implied by @f@, + * or null if there is no register. Systematic register names + * can be built in the provided buffer. + */ + +static const char *regname(char *buf, uint32 f) +{ + unsigned wd = regwd(f); + unsigned src = f®F_SRCMASK; + unsigned ix = (f®F_IXMASK) >> REGF_IXSHIFT; + char *p = buf; + + switch (src) { + + case REGSRC_ABS: + return (0); + +#if CPUFAM_X86 || CPUFAM_AMD64 + case REGSRC_GP: + if (ix == REGIX_FLAGS) { + if (wd == 64) *p++ = 'r'; + else if (wd == 32) *p++ = 'e'; + else if (wd != 16) assert(0); + p += sprintf(p, "flags"); +#if CPUFAM_AMD64 + } else if (REGIX_R8 <= ix && ix <= REGIX_R15) { + p += sprintf(p, "r%u", ix - REGIX_R8 + 8); + switch (wd) { + case 64: break; + case 32: *p++ = 'd'; break; + case 16: *p++ = 'w'; break; + case 8: *p++ = 'l'; break; + default: assert(0); + } +# endif + } else { + if (wd == 64) *p++ = 'r'; + else if (wd == 32) *p++ = 'e'; + switch (ix) { + case REGIX_IP: *p++ = 'i'; *p++ = 'p'; goto longreg; + case REGIX_AX: *p++ = 'a'; goto shortreg; + case REGIX_BX: *p++ = 'b'; goto shortreg; + case REGIX_CX: *p++ = 'c'; goto shortreg; + case REGIX_DX: *p++ = 'd'; goto shortreg; + case REGIX_SI: *p++ = 's'; *p++ = 'i'; goto longreg; + case REGIX_DI: *p++ = 'd'; *p++ = 'i'; goto longreg; + case REGIX_BP: *p++ = 'b'; *p++ = 'p'; goto longreg; + case REGIX_SP: *p++ = 's'; *p++ = 'p'; goto longreg; + default: assert(0); + } + if (0) { + shortreg: + switch (wd) { + case 64: + case 32: + case 16: *p++ = 'x'; break; + case 8: *p++ = 'l'; break; + default: assert(0); + } + } else { + longreg: + switch (wd) { + case 64: + case 32: + case 16: break; + case 8: *p++ = 'l'; break; + default: assert(0); + } + } + } + *p++ = 0; + return (buf); + + case REGSRC_SEG: + assert(wd == 16); + switch (ix) { + case REGIX_CS: sprintf(buf, "cs"); break; + case REGIX_DS: sprintf(buf, "ds"); break; + case REGIX_SS: sprintf(buf, "ss"); break; + case REGIX_ES: sprintf(buf, "es"); break; + case REGIX_FS: sprintf(buf, "fs"); break; + case REGIX_GS: sprintf(buf, "gs"); break; + default: assert(0); + } + return (buf); + + case REGSRC_STMMX: + if (ix == REGIX_FPFLAGS) return (0); + if (f®F_80) sprintf(buf, "st(%u)", ix); + else sprintf(buf, "mm%u", ix); + return (buf); + + case REGSRC_SIMD: + if (ix == REGIX_FPFLAGS) return (0); + switch (wd) { + case 32: case 64: case 128: sprintf(buf, "xmm%u", ix); break; + case 256: sprintf(buf, "ymm%u", ix); break; + default: assert(0); + } + return (buf); +#endif + +#if CPUFAM_ARMEL + case REGSRC_GP: + if (ix == REGIX_CPSR) sprintf(buf, "cpsr"); + else if (ix == 15) sprintf(buf, "pc"); + else sprintf(buf, "r%u", ix); + return (buf); + case REGSRC_FP: + if (ix == REGIX_FPSCR) sprintf(buf, "fpscr"); + else { + switch (wd) { + case 32: *p++ = 's'; break; + case 64: *p++ = 'd'; break; + case 128: *p++ = 'q'; break; + default: assert(0); + } + p += sprintf(p, "%u", ix); + *p++ = 0; + } + return (buf); +#endif + +#if CPUFAM_ARM64 + case REGSRC_GP: + if (ix == REGIX_PC) sprintf(buf, "pc"); + else if (ix == REGIX_NZCV) sprintf(buf, "nzcv"); + else if (ix == 31 && wd == 64) sprintf(buf, "sp"); + else { + switch (wd) { + case 32: *p++ = 'w'; break; + case 64: *p++ = 'x'; break; + default: assert(0); + } + p += sprintf(p, "%u", ix); + *p++ = 0; + } + return (buf); + case REGSRC_FP: + if (ix == REGIX_FPFLAGS) sprintf(buf, "fpflags"); + else { + if (f®F_WDMASK) + *p++ = 'v'; + else switch (wd) { + case 8: *p++ = 'b'; break; + case 16: *p++ = 'h'; break; + case 32: *p++ = 's'; break; + case 64: *p++ = 'd'; break; + default: assert(0); + } + p += sprintf(p, "%u", ix); + *p++ = 0; + } + return (buf); +#endif + + default: + assert(0); + return ("???"); + } +} + +/*----- x86 and AMD64 -----------------------------------------------------*/ + +#if CPUFAM_X86 || CPUFAM_AMD64 + +#if CPUFAM_X86 +# define P_HEX_GP "0x%08x" +# define GP(gp) (gp).u32 +#endif +#if CPUFAM_AMD64 +# define P_HEX_GP "0x%016"PL64"x" +# define GP(gp) (gp).u64 +#endif + +void regdump_init(void) { ; } + +static void dump_flags(const char *lbl, const char *reg, gpreg f) +{ + printf(";; "); + if (lbl) printf("%s: ", lbl); + if (reg) printf("%s = ", reg); + printf(""P_HEX_GP"\n", GP(f)); + printf(";;\t\tstatus: %ccf %cpf %caf %czf %csf %cdf %cof\n", + (GP(f) >> 0)&1u ? '+' : '-', + (GP(f) >> 2)&1u ? '+' : '-', + (GP(f) >> 4)&1u ? '+' : '-', + (GP(f) >> 6)&1u ? '+' : '-', + (GP(f) >> 7)&1u ? '+' : '-', + (GP(f) >> 10)&1u ? '+' : '-', + (GP(f) >> 11)&1u ? '+' : '-'); + printf(";;\t\tsystem: %ctf %cif iopl=%d %cnt " + "%crf %cvm %cac %cvif %cvip %cid\n", + (GP(f) >> 8)&1u ? '+' : '-', + (GP(f) >> 9)&1u ? '+' : '-', + (int)((GP(f) >> 12)&1u), + (GP(f) >> 14)&1u ? '+' : '-', + (GP(f) >> 16)&1u ? '+' : '-', + (GP(f) >> 17)&1u ? '+' : '-', + (GP(f) >> 18)&1u ? '+' : '-', + (GP(f) >> 19)&1u ? '+' : '-', + (GP(f) >> 20)&1u ? '+' : '-', + (GP(f) >> 21)&1u ? '+' : '-'); +} + +static const char + *pcmap[] = { "sgl", "???", "dbl", "ext" }, + *rcmap[] = { "nr", "-∞", "+∞", "0" }; + +static void dump_fpflags(const char *lbl, const struct fxsave *fx) +{ + unsigned top = (fx->fsw >> 11)&7u; + unsigned tag = fx->ftw; + int skip = lbl ? strlen(lbl) + 2 : 0; + + printf(";; "); + if (lbl) printf("%s: ", lbl); + + printf(" fcw = 0x%04x: " + "%cim %cdm %czm %com %cum %cpm pc=%s rc=%s %cx\n", + fx->fcw, + (fx->fcw >> 0)&1u ? '+' : '-', + (fx->fcw >> 1)&1u ? '+' : '-', + (fx->fcw >> 2)&1u ? '+' : '-', + (fx->fcw >> 3)&1u ? '+' : '-', + (fx->fcw >> 4)&1u ? '+' : '-', + (fx->fcw >> 5)&1u ? '+' : '-', + pcmap[(fx->fcw >> 8)&3u], + rcmap[(fx->fcw >> 10)&3u], + (fx->fcw >> 12)&1u ? '+' : '-'); + printf(";; %*s fsw = 0x%04x: " + "%cie %cde %cze %coe %cue %cpe %csf %ces %cc0 %cc1 %cc2 %cc3 " + "top=%d %cb\n", + skip, "", + fx->fsw, + (fx->fsw >> 0)&1u ? '+' : '-', + (fx->fsw >> 1)&1u ? '+' : '-', + (fx->fsw >> 2)&1u ? '+' : '-', + (fx->fsw >> 3)&1u ? '+' : '-', + (fx->fsw >> 4)&1u ? '+' : '-', + (fx->fsw >> 5)&1u ? '+' : '-', + (fx->fsw >> 6)&1u ? '+' : '-', + (fx->fsw >> 7)&1u ? '+' : '-', + (fx->fsw >> 8)&1u ? '+' : '-', + (fx->fsw >> 9)&1u ? '+' : '-', + (fx->fsw >> 10)&1u ? '+' : '-', + (fx->fsw >> 14)&1u ? '+' : '-', + top, + (fx->fsw >> 15)&1u ? '+' : '-'); + printf(";; %*s ftw = 0x%02x\n", skip, "", tag); +} + +static void dump_mxflags(const char *lbl, const struct fxsave *fx) +{ + printf(";; "); + if (lbl) printf("%s: ", lbl); + + printf(" mxcsr = 0x%08x\n" + ";;\t\tmask = %cim %cdm %czm %com %cum %cpm\n" + ";;\t\t exc = %cie %cde %cze %coe %cue %cpe\n" + ";;\t\tmisc = %cdaz %cftz rc=%s\n", + fx->mxcsr, + (fx->mxcsr >> 7)&1u ? '+' : '-', + (fx->mxcsr >> 8)&1u ? '+' : '-', + (fx->mxcsr >> 9)&1u ? '+' : '-', + (fx->mxcsr >> 10)&1u ? '+' : '-', + (fx->mxcsr >> 11)&1u ? '+' : '-', + (fx->mxcsr >> 12)&1u ? '+' : '-', + (fx->mxcsr >> 0)&1u ? '+' : '-', + (fx->mxcsr >> 1)&1u ? '+' : '-', + (fx->mxcsr >> 2)&1u ? '+' : '-', + (fx->mxcsr >> 3)&1u ? '+' : '-', + (fx->mxcsr >> 4)&1u ? '+' : '-', + (fx->mxcsr >> 5)&1u ? '+' : '-', + (fx->mxcsr >> 6)&1u ? '+' : '-', + (fx->mxcsr >> 15)&1u ? '+' : '-', + rcmap[(fx->mxcsr >> 13)&3u]); +} + +#if CPUFAM_X86 +# define REGF_GPWD REGF_32 +#endif +#if CPUFAM_AMD64 +# define REGF_GPWD REGF_64 +#endif + +void regdump_gp(const struct regmap *map) +{ + unsigned i; + + printf(";; General-purpose registers:\n"); + for (i = REGIX_AX; i < REGIX_GPLIM; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_GPWD | REGSRC_GP | i); + regdump(map, 0, REGF_HEX | REGF_GPWD | REGSRC_GP | REGIX_IP); + + printf(";; Segment registers:\n"); + for (i = 0; i < REGIX_SEGLIM; i++) + regdump(map, 0, REGF_HEX | REGF_16 | REGSRC_SEG | i); + + printf(";; Flags:\n"); + regdump(map, 0, REGSRC_GP | REGF_GPWD | REGIX_FLAGS); +} + +void regdump_fp(const struct regmap *map) +{ + unsigned top = (map->fx->fsw >> 11)&7u; + unsigned tag = map->fx->ftw; + unsigned i; + + printf(";; Floating-point/MMX registers:\n"); + if (!top && tag == 0xff) + for (i = 0; i < 8; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_CHR | + REGF_32 | REGF_16 | REGF_8 | + REGSRC_STMMX | i | (6 << REGF_WDSHIFT)); + if (tag) + for (i = 0; i < 8; i++) + regdump(map, 0, REGF_FLT | REGF_80 | REGSRC_STMMX | i); + + printf(";; Floating-point state:\n"); + dump_fpflags(0, map->fx); +} + +void regdump_simd(const struct regmap *map) +{ + unsigned f = REGF_HEX | REGF_FLT | REGF_UNSGN | REGF_SGN | REGF_CHR | + REGF_64 | REGF_32 | REGF_16 | REGF_8 | + REGSRC_SIMD; + unsigned i; + + if (map->avx) f |= 8 << REGF_WDSHIFT; + else f |= 7 << REGF_WDSHIFT; + + printf(";; SSE/AVX registers:\n"); + for (i = 0; i < N(map->fx->xmm); i++) + regdump(map, 0, f | i); + + printf(";; SSE/AVX floating-point state:\n"); + dump_mxflags(0, map->fx); +} + +#endif + +/*----- ARM32 -------------------------------------------------------------*/ + +#if CPUFAM_ARMEL + +unsigned regdump__flags = 0; + +void regdump_init(void) +{ + if (cpu_feature_p(CPUFEAT_ARM_VFP)) regdump__flags |= REGF_VFP; + if (cpu_feature_p(CPUFEAT_ARM_D32)) regdump__flags |= REGF_D32; +} + +static void dump_flags(const char *lbl, unsigned f) +{ + static const char + *modetab[] = { "?00", "?01", "?02", "?03", "?04", "?05", "?06", "?07", + "?08", "?09", "?10", "?11", "?12", "?13", "?14", "?15", + "usr", "fiq", "irq", "svc", "?20", "?21", "mon", "abt", + "?24", "?25", "hyp", "und", "?28", "?29", "?30", "sys" }, + *condtab[] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; + + printf(";; "); + if (lbl) printf("%s: ", lbl); + printf(" cpsr = 0x%08x\n", f); + printf(";;\t\tuser: %cn %cz %cc %cv %cq ge=%c%c%c%c\n", + (f >> 31)&1u ? '+' : '-', + (f >> 30)&1u ? '+' : '-', + (f >> 29)&1u ? '+' : '-', + (f >> 28)&1u ? '+' : '-', + (f >> 27)&1u ? '+' : '-', + (f >> 19)&1u ? '1' : '0', + (f >> 18)&1u ? '1' : '0', + (f >> 17)&1u ? '1' : '0', + (f >> 16)&1u ? '1' : '0'); + printf(";;\t\tsystem: %cj it=%s:%c%c%c%c %ce %ca %ci %cf %ct m=%s\n", + (f >> 24)&1u ? '+' : '-', + condtab[(f >> 12)&15u], + (f >> 11)&1u ? '1' : '0', + (f >> 10)&1u ? '1' : '0', + (f >> 26)&1u ? '1' : '0', + (f >> 25)&1u ? '1' : '0', + (f >> 9)&1u ? '+' : '-', + (f >> 8)&1u ? '+' : '-', + (f >> 7)&1u ? '+' : '-', + (f >> 6)&1u ? '+' : '-', + (f >> 5)&1u ? '+' : '-', + modetab[(f >> 0)&31u]); +} + +static void dump_fpflags(const char *lbl, unsigned f) +{ + static const char *rcmap[] = { "nr", "+∞", "-∞", "0" }; + + printf(";; "); + if (lbl) printf("%s: ", lbl); + printf(" fpscr = 0x%08x\n", f); + printf(";;\t\tcond: %cn %cz %cc %cv %cqc\n", + (f >> 31)&1u ? '+' : '-', + (f >> 30)&1u ? '+' : '-', + (f >> 29)&1u ? '+' : '-', + (f >> 28)&1u ? '+' : '-', + (f >> 27)&1u ? '+' : '-'); + printf(";;\t\ttrap: %cide %cixe %cufe %cofe %cdze %cioe\n", + (f >> 15)&1u ? '+' : '-', + (f >> 12)&1u ? '+' : '-', + (f >> 11)&1u ? '+' : '-', + (f >> 10)&1u ? '+' : '-', + (f >> 9)&1u ? '+' : '-', + (f >> 8)&1u ? '+' : '-'); + printf(";;\t\terror: %cide %cixe %cufe %cofe %cdze %cioe\n", + (f >> 7)&1u ? '+' : '-', + (f >> 4)&1u ? '+' : '-', + (f >> 3)&1u ? '+' : '-', + (f >> 2)&1u ? '+' : '-', + (f >> 1)&1u ? '+' : '-', + (f >> 0)&1u ? '+' : '-'); + printf(";;\t\tcontrol: %cahp %cdn %cfz rm=%s str=%d len=%d\n", + (f >> 26)&1u ? '+' : '-', + (f >> 25)&1u ? '+' : '-', + (f >> 24)&1u ? '+' : '-', + rcmap[(f >> 22)&3u], + (f >> 20)&3u, + (f >> 16)&7u); +} + +void regdump_gp(const struct regmap *map) +{ + unsigned i; + + printf(";; General-purpose registers:\n"); + for (i = 0; i < 16; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_32 | REGSRC_GP | i); + + printf(";; Flags:\n"); + regdump(map, 0, REGSRC_GP | REGF_32 | REGIX_CPSR); +} + +void regdump_fp(const struct regmap *map) +{ + unsigned i, n; + + if (!(regdump__flags®F_VFP)) { + printf(";; Floating-point and SIMD not available\n"); + return; + } + + printf(";; Floating-point/SIMD registers:\n"); + if (regdump__flags®F_D32) n = 32; + else n = 16; + for (i = 0; i < n; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_FLT | REGF_CHR | + REGF_64 | REGF_32 | REGF_16 | REGF_8 | + REGSRC_SIMD | i | (6 << REGF_WDSHIFT)); + + printf(";; Floating-point state:\n"); + dump_fpflags(0, map->fp->fpscr); +} + +void regdump_simd(const struct regmap *map) { ; } + +#endif + +/*----- ARM64 -------------------------------------------------------------*/ + +#if CPUFAM_ARM64 + +void regdump_init(void) { ; } + +static void dump_flags(const char *lbl, unsigned f) +{ + printf(";; "); + if (lbl) printf("%s: ", lbl); + printf(" nzcv = 0x%08x\n", f); + printf(";;\t\tuser: %cn %cz %cc %cv\n", + (f >> 31)&1u ? '+' : '-', + (f >> 30)&1u ? '+' : '-', + (f >> 29)&1u ? '+' : '-', + (f >> 28)&1u ? '+' : '-'); +} + +static void dump_fpflags(const char *lbl, const struct fpsave *fp) +{ + static const char *rcmap[] = { "nr", "+∞", "-∞", "0" }; + int skip = lbl ? strlen(lbl) + 2 : 0; + + printf(";; "); + if (lbl) printf("%s: ", lbl); + printf(" fpsr = 0x%08x\n", fp->fpsr); + printf(";;\t\tcond: %cn %cz %cc %cv %cqc\n", + (fp->fpsr >> 31)&1u ? '+' : '-', + (fp->fpsr >> 30)&1u ? '+' : '-', + (fp->fpsr >> 29)&1u ? '+' : '-', + (fp->fpsr >> 28)&1u ? '+' : '-', + (fp->fpsr >> 27)&1u ? '+' : '-'); + printf(";;\t\terror: %cidc %cixc %cufc %cofc %cdzc %cioc\n", + (fp->fpsr >> 7)&1u ? '+' : '-', + (fp->fpsr >> 4)&1u ? '+' : '-', + (fp->fpsr >> 3)&1u ? '+' : '-', + (fp->fpsr >> 2)&1u ? '+' : '-', + (fp->fpsr >> 1)&1u ? '+' : '-', + (fp->fpsr >> 0)&1u ? '+' : '-'); + printf(";; %*s fpcr = 0x%08x\n", skip, "", fp->fpcr); + printf(";;\t\ttrap: %cide %cixe %cufe %cofe %cdze %cioe\n", + (fp->fpcr >> 15)&1u ? '+' : '-', + (fp->fpcr >> 12)&1u ? '+' : '-', + (fp->fpcr >> 11)&1u ? '+' : '-', + (fp->fpcr >> 10)&1u ? '+' : '-', + (fp->fpcr >> 9)&1u ? '+' : '-', + (fp->fpcr >> 8)&1u ? '+' : '-'); + printf(";;\t\tcontrol: %cahp %cdn %cfz rm=%s str=%d len=%d\n", + (fp->fpcr >> 26)&1u ? '+' : '-', + (fp->fpcr >> 25)&1u ? '+' : '-', + (fp->fpcr >> 24)&1u ? '+' : '-', + rcmap[(fp->fpcr >> 22)&3u], + (fp->fpcr >> 20)&3u, + (fp->fpcr >> 16)&7u); +} + +void regdump_gp(const struct regmap *map) +{ + unsigned i; + + printf(";; General-purpose registers:\n"); + for (i = 0; i < 32; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_64 | REGSRC_GP | i); + regdump(map, 0, REGF_HEX | REGF_64 | REGSRC_GP | REGIX_PC); + + printf(";; Flags:\n"); + regdump(map, 0, REGSRC_GP | REGF_32 | REGIX_NZCV); +} + +void regdump_fp(const struct regmap *map) +{ + unsigned i; + + printf(";; Floating-point/SIMD registers:\n"); + for (i = 0; i < 32; i++) + regdump(map, 0, + REGF_HEX | REGF_UNSGN | REGF_SGN | REGF_FLT | REGF_CHR | + REGF_64 | REGF_32 | REGF_16 | REGF_8 | + REGSRC_SIMD | i | (7 << REGF_WDSHIFT)); + + printf(";; Floating-point state:\n"); + dump_fpflags(0, map->fp); +} + +void regdump_simd(const struct regmap *map) { ; } + +#endif + +/*----- The main entry point ----------------------------------------------*/ + +/* --- @regdump@ --- * + * + * Arguments: @const void *base@ = pointer to base structure, corresponding + * to the @REGF_SRCMASK@ part of @f@ + * @const char *lbl@ = label to print + * @uint32 f@ = format control word; see @REGF_...@ + * + * Returns: --- + * + * Use: Dump a register value, or chunk of memory. + * + * This function is not usually called directly; instead, use + * the `reg' or `mem' assembler macros. + */ + +void regdump(const void *base, const char *lbl, uint32 f) +{ + unsigned ix = (f®F_IXMASK) >> REGF_IXSHIFT; + unsigned wd = 1 << ((f®F_WDMASK) >> REGF_WDSHIFT); + unsigned fmt, ty; + uint32 fmtbit, tybit; + const void *p; + char regbuf[8]; const char *reg = regname(regbuf, f); + const struct regmap *map; + const struct fmttab *tab; + struct fmtinfo fi; + int firstp = 1; + int skip; + size_t n; + +#if CPUFAM_X86 || CPUFAM_AMD64 + union vreg vr; +#endif + + if (reg) { + n = strlen(reg); + if (n < 7) { + memmove(regbuf + 7 - n, reg, n + 1); + memset(regbuf, ' ', 7 - n); + } + } + + switch (f®F_SRCMASK) { + case REGSRC_ABS: + p = base; + break; + +#if CPUFAM_X86 || CPUFAM_AMD64 + case REGSRC_GP: + map = (const struct regmap *)base; + if (ix == REGIX_FLAGS && !(f®F_FMTMASK)) + { dump_flags(lbl, reg, map->gp->gp[REGIX_FLAGS]); return; } + p = &map->gp->gp[ix]; + break; + case REGSRC_SEG: + map = (const struct regmap *)base; + assert(wd == 1); assert((f®F_TYMASK) == REGF_16); + p = &map->gp->seg[ix]; + break; + case REGSRC_STMMX: + map = (const struct regmap *)base; + if (ix == REGIX_FPFLAGS) + { assert(!(f®F_FMTMASK)); dump_fpflags(lbl, map->fx); return; } + if (!((map->fx->ftw << ix)&128u)) { + printf(";; "); + if (lbl) printf("%s: ", lbl); + if (reg) printf("%s = ", reg); + printf(" dead\n"); + return; + } + p = &map->fx->stmmx[ix]; + break; + case REGSRC_SIMD: + map = (const struct regmap *)base; + if (ix == REGIX_FPFLAGS) + { assert(!(f®F_FMTMASK)); dump_mxflags(lbl, map->fx); return; } + if (wd <= 128) + p = &map->fx->xmm[ix]; + else { + vr.v128[0] = map->fx->xmm[ix]; + vr.v128[1] = map->avx->ymmh[ix]; + assert(wd == 256); + p = &vr; + } + break; +#endif + +#if CPUFAM_ARMEL + case REGSRC_GP: + map = (const struct regmap *)base; + if (ix == REGIX_CPSR && !(f®F_FMTMASK)) + { dump_flags(lbl, map->gp->r[REGIX_CPSR].u32); return; } + p = &map->gp->r[ix]; + break; + case REGSRC_FP: + case REGSRC_SIMD: + map = (const struct regmap *)base; + if (ix == REGIX_FPSCR) { + assert(!(f®F_FMTMASK)); + dump_fpflags(lbl, map->fp->fpscr); + return; + } + switch (regwd(f)) { + case 32: p = &map->fp->u.s[ix]; break; + case 64: p = &map->fp->u.d[ix]; break; + case 128: p = &map->fp->u.q[ix]; break; + default: assert(0); + } + break; +#endif + +#if CPUFAM_ARM64 + case REGSRC_GP: + map = (const struct regmap *)base; + if (ix == REGIX_NZCV && !(f®F_FMTMASK)) + { dump_flags(lbl, map->gp->r[REGIX_NZCV].u64); return; } + p = &map->gp->r[ix]; + break; + case REGSRC_FP: + case REGSRC_SIMD: + map = (const struct regmap *)base; + if (ix == REGIX_FPFLAGS) + { assert(!(f®F_FMTMASK)); dump_fpflags(lbl, map->fp); return; } + p = &map->fp->v[ix]; + break; +#endif + + default: + assert(0); + } + + skip = (lbl ? strlen(lbl) + 2 : 0) + (reg ? strlen(reg) : 0); + fi.f = 0; if (wd > 1) fi.f |= FMTF_VECTOR; + + for (ty = (f®F_TYMASK) >> REGF_TYSHIFT, + tybit = 1 << REGF_TYSHIFT; + ty; + ty >>= 1, tybit <<= 1) { + if (!(ty&1u)) continue; + + for (fmt = (f®F_FMTMASK) >> REGF_FMTSHIFT, + fmtbit = 1 << REGF_FMTSHIFT; + fmt; + fmt >>= 1, fmtbit <<= 1) { + + if (!(fmt&1u)) continue; + + for (tab = fmttab; tab->mask; tab++) + if (tab->mask == (fmtbit | tybit)) goto found; + continue; + found: + + if (firstp) { + printf(";;"); + if (lbl) printf(" %s:", lbl); + if (reg) printf(" %s =", reg); + firstp = 0; + } else if (wd > 1) + printf("\n;; %*s =", skip, ""); + else + fputs(" =", stdout); + + fi.p = p; fi.wd = 0; + while (fi.wd < wd) { putchar(' '); tab->fmt(&fi); } + } + } + putchar('\n'); +} + +/*----- Other random utilities --------------------------------------------*/ + +/* --- @regdump_freshline@ --- * + * + * Arguments: --- + * + * Returns: --- + * + * Use: Begin a fresh line of output. + */ + +void regdump_freshline(void) { putchar('\n'); } + +/*----- That's all, folks -------------------------------------------------*/ diff --git a/base/regdump.h b/base/regdump.h new file mode 100644 index 00000000..66933fcf --- /dev/null +++ b/base/regdump.h @@ -0,0 +1,941 @@ +/* -*-c-*- + * + * Register dump and debugging support + * + * (c) 2019 Straylight/Edgeware + */ + +/*----- Licensing notice --------------------------------------------------* + * + * This file is part of Catacomb. + * + * Catacomb is free software: you can redistribute it and/or modify it + * under the terms of the GNU Library General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Catacomb is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with Catacomb. If not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + * USA. + */ + +#ifndef CATACOMB_REGDUMP_H +#define CATACOMB_REGDUMP_H + +#ifdef __cplusplus + extern "C" { +#endif + +/*----- Header files ------------------------------------------------------*/ + +#include "config.h" + +#ifndef ENABLE_ASM_DEBUG +# error "Assembler-level debug disabled by `configure' script." +#endif + +#if __ASSEMBLER__ +# include "asm-common.h" +#else +# include +# include +#endif + +/*----- Random utilities --------------------------------------------------*/ + +#define DO8(_) \ + _(0) _(1) _(2) _(3) _(4) _(5) _(6) _(7) +#define DOHI8(_) \ + _(8) _(9) _(10) _(11) _(12) _(13) _(14) _(15) + +#define DO16(_) DO8(_) DOHI8(_) + +#define DO32(_) \ + DO16(_) \ + _(16) _(17) _(18) _(19) _(20) _(21) _(22) _(23) \ + _(24) _(25) _(26) _(27) _(28) _(29) _(30) _(31) + +/*----- Common data structures --------------------------------------------*/ + +#if !__ASSEMBLER__ + +/* The following are good on our assembler targets. */ +typedef signed char int8; +typedef short int16; +typedef int int32; +#if LONG_MAX >> 31 > 0x7fffffff + typedef long int64; +#else + typedef long long int64; +#endif +typedef float float32; +typedef double float64; +typedef long double float80; + +#if CPUFAM_X86 || CPUFAM_ARMEL +# define PTR32 void *p; +# define PTR64 +#endif +#if CPUFAM_AMD64 || CPUFAM_ARM64 +# define PTR32 +# define PTR64 void *p; +#endif + +#define SIMD_COMMON(wd) \ + uint8 u8[wd/8]; \ + int8 i8[wd/8]; \ + uint16 u16[wd/16]; \ + int16 i16[wd/16]; \ + uint32 u32[wd/32]; \ + int32 i32[wd/32]; \ + uint64 u64[wd/64]; \ + int64 i64[wd/64]; \ + float32 f32[wd/32]; \ + float64 f64[wd/64] + +union gp32 { uint32 u32; int32 i32; PTR32 }; +union gp64 { uint64 u64; int64 i64; PTR64 }; + +#endif + +/*----- Format word layout ------------------------------------------------*/ + +#define REGF_IXMASK 0x000000ff +#define REGF_IXSHIFT 0 +/* The index into the vector indicated by `REGF_SRCMASK', if applicable. */ + +#define REGF_FMTMASK 0x0000ff00 +#define REGF_FMTSHIFT 8 +#define REGF_HEX 0x00000100 +#define REGF_CHR 0x00000200 +#define REGF_FLT 0x00000400 +#define REGF_UNSGN 0x00000800 +#define REGF_SGN 0x00001000 +/* How to format the value(s) found. */ + +#define REGF_TYMASK 0x00ff0000 +#define REGF_TYSHIFT 16 +#define REGF_80 0x00010000 +#define REGF_64 0x00020000 +#define REGF_32 0x00040000 +#define REGF_16 0x00080000 +#define REGF_8 0x00100000 +/* Size of the value(s) to dump. */ + +#define REGF_SRCMASK 0x0f000000 +#define REGSRC_ABS 0x01000000 /* absolute address */ +#define REGSRC_GP 0x02000000 /* general-purpose register */ +#define REGSRC_FP 0x03000000 /* floating-point register */ +#define REGSRC_SIMD 0x04000000 /* SIMD vector register */ +#define REGSRC_STMMX 0x05000000 /* x86-specific: x87/MMX register */ +#define REGSRC_SEG 0x06000000 /* x86-specific: segment register */ +/* Where to find the values. */ + +#define REGF_WDMASK 0xf0000000 +#define REGF_WDSHIFT 28 +/* If we're to print a scalar, this is zero; otherwise, log_2 of the vector + * register width, in bits. + */ + +/*----- x86 and AMD64 -----------------------------------------------------*/ + +#if CPUFAM_X86 || CPUFAM_AMD64 + +#define REGIX_FLAGS 0 +#define REGIX_IP 1 +#define REGIX_ADDR 2 +#define REGIX_AX 3 +#define REGIX_BX 4 +#define REGIX_CX 5 +#define REGIX_DX 6 +#define REGIX_SI 7 +#define REGIX_DI 8 +#define REGIX_BP 9 +#define REGIX_SP 10 +#if CPUFAM_X86 +# define REGIX_GPLIM 11 +#endif +#if CPUFAM_AMD64 +# define REGIX_R8 11 +# define REGIX_R9 12 +# define REGIX_R10 13 +# define REGIX_R11 14 +# define REGIX_R12 15 +# define REGIX_R13 16 +# define REGIX_R14 17 +# define REGIX_R15 18 +# define REGIX_GPLIM 19 +#endif + +#define REGIX_CS 0 +#define REGIX_DS 1 +#define REGIX_SS 2 +#define REGIX_ES 3 +#define REGIX_FS 4 +#define REGIX_GS 5 +#define REGIX_SEGLIM 6 + +#define REGIX_FPFLAGS 255 + +#if !__ASSEMBLER__ + +#if CPUFAM_X86 +typedef union gp32 gpreg; +#endif +#if CPUFAM_AMD64 +typedef union gp64 gpreg; +#endif + +struct gpsave { + gpreg gp[REGIX_GPLIM]; + uint16 seg[REGIX_SEGLIM]; +}; + +union stmmx { + SIMD_COMMON(64); +#if FLT_RADIX == 2 && LDBL_MANT_DIG == 64 + long double f80; +#endif +unsigned char _pad[16]; +}; + +union xmm { SIMD_COMMON(128); }; +union ymm { SIMD_COMMON(256); }; +union vreg { union xmm v128[2]; union ymm v256; }; + +struct fxsave { + unsigned short fcw; + unsigned short fsw; + unsigned char ftw; + unsigned char _res0; + unsigned short fop; +#if CPUFAM_X86 + unsigned int fpu_ip; + unsigned short fpu_cs; + unsigned short _res1; + unsigned int fpu_dp; + unsigned short fpu_ds; + unsigned short _res2; +#endif +#if CPUFAM_AMD64 + unsigned long long fpu_ip; + unsigned long long fpu_dp; +#endif + unsigned int mxcsr; + unsigned int mxcsr_mask; + + union stmmx stmmx[8]; + +#if CPUFAM_X86 + union xmm xmm[8]; + unsigned char _pad0[8*16]; +#endif +#if CPUFAM_AMD64 + union xmm xmm[16]; +#endif + + unsigned char _pad1[96]; +}; + +struct xsave_avx { +#if CPUFAM_X86 + union xmm ymmh[8]; + unsigned char _pad0[8*16]; +#endif +#if CPUFAM_AMD64 + union xmm ymmh[16]; +#endif +}; + +struct regmap { + struct gpsave *gp; + struct fxsave *fx; + struct xsave_avx *avx; +}; + +#else + + .extern regdump_gpsave + .extern regdump_xtsave + .extern regdump_xtrstr + .extern regdump_gprstr + + regmap_gp = 0*WORDSZ + regmap_fx = 1*WORDSZ + regmap_avx = 2*WORDSZ + regmap_size = 3*WORDSZ + +#define REGDEF_GPX86_COMMON(rn, RN) \ + regsrc.e##rn = REGSRC_GP | REGIX_##RN; \ + regty.e##rn = REGF_32; \ + regfmt.e##rn = REGF_HEX; \ + regsrc.r##rn = REGSRC_GP | REGIX_##RN; \ + regty.r##rn = REGF_64; \ + regfmt.r##rn = REGF_HEX + +#define REGDEF_GPX86_ABCD(rn, RN) \ + regsrc.rn##hl = (4 << REGF_WDSHIFT) | REGSRC_GP | REGIX_##RN##X; \ + regty.rn##hl = REGF_8; \ + regfmt.rn##hl = REGF_HEX; \ + regsrc.rn##l = REGSRC_GP | REGIX_##RN##X; \ + regty.rn##l = REGF_8; \ + regfmt.rn##l = REGF_HEX; \ + regsrc.rn##x = REGSRC_GP | REGIX_##RN##X; \ + regty.rn##x = REGF_16; \ + regfmt.rn##x = REGF_HEX; \ + REGDEF_GPX86_COMMON(rn##x, RN##X) +REGDEF_GPX86_ABCD(a, A) +REGDEF_GPX86_ABCD(b, B) +REGDEF_GPX86_ABCD(c, C) +REGDEF_GPX86_ABCD(d, D) + + regsrc.eflags = REGSRC_GP | REGIX_FLAGS + regty.eflags = REGF_32 + regty.eflags = 0 + +#if CPUFAM_AMD64 + regsrc.rflags = REGSRC_GP | REGIX_FLAGS + regty.rflags = REGF_64 + regty.rflags = 0 +#endif + +#define REGDEF_GPX86_XP(rn, RN) \ + regsrc.rn##l = REGSRC_GP | REGIX_##RN; \ + regty.rn##l = REGF_8; \ + regfmt.rn##l = REGF_HEX; \ + regsrc.rn = REGSRC_GP | REGIX_##RN; \ + regty.rn = REGF_16; \ + regfmt.rn = REGF_HEX; \ + REGDEF_GPX86_COMMON(rn, RN) +REGDEF_GPX86_XP(ip, IP) +REGDEF_GPX86_XP(si, SI) +REGDEF_GPX86_XP(di, DI) +REGDEF_GPX86_XP(bp, BP) +REGDEF_GPX86_XP(sp, SP) + +#if CPUFAM_AMD64 +# define REGDEF_GPAMD64(i) \ + regsrc.r##i##b = REGSRC_GP | REGIX_R##i; \ + regty.r##i##b = REGF_8; \ + regfmt.r##i##b = REGF_HEX; \ + regsrc.r##i##w = REGSRC_GP | REGIX_R##i; \ + regty.r##i##w = REGF_16; \ + regfmt.r##i##w = REGF_HEX; \ + regsrc.r##i##d = REGSRC_GP | REGIX_R##i; \ + regty.r##i##d = REGF_32; \ + regfmt.r##i##d = REGF_HEX; \ + regsrc.r##i = REGSRC_GP | REGIX_R##i; \ + regty.r##i = REGF_64; \ + regfmt.r##i = REGF_HEX; + DOHI8(REGDEF_GPAMD64) +#endif + +#define REGDEF_SEG(rn, RN) \ + regsrc.rn = REGSRC_SEG | REGIX_##RN; \ + regty.rn = REGF_16; \ + regfmt.rn = REGF_HEX +REGDEF_SEG(ss, SS) +REGDEF_SEG(cs, CS) +REGDEF_SEG(ds, DS) +REGDEF_SEG(es, ES) +REGDEF_SEG(fs, FS) +REGDEF_SEG(gs, GS) + +#define REGDEF_STMMX(i) \ + regsrc.st##i = REGSRC_STMMX | i; \ + regty.st##i = REGF_80; \ + regfmt.st##i = REGF_FLT; \ + regsrc.mm##i = (6 << REGF_WDSHIFT) | REGSRC_STMMX | i; \ + regty.mm##i = REGF_16; \ + regfmt.mm##i = REGF_HEX; +DO8(REGDEF_STMMX) + +#define REGDEF_SIMD(i) \ + regsrc.xmm##i = (7 << REGF_WDSHIFT) | REGSRC_SIMD | i; \ + regty.xmm##i = REGF_32; \ + regfmt.xmm##i = REGF_HEX; \ + regsrc.ymm##i = (8 << REGF_WDSHIFT) | REGSRC_SIMD | i; \ + regty.ymm##i = REGF_32; \ + regfmt.ymm##i = REGF_HEX; +DO8(REGDEF_SIMD) +#if CPUFAM_AMD64 + DOHI8(REGDEF_SIMD) +#endif + + REGDUMP_GPSIZE = REGIX_GPLIM*WORDSZ + REGIX_SEGLIM*2 + +# if CPUFAM_AMD64 && ABI_SYSV + REGDUMP_SPADJ = REGDUMP_GPSIZE + WORDSZ + 128 +# else + REGDUMP_SPADJ = REGDUMP_GPSIZE + WORDSZ +# endif + +.macro _saveregs addr=nil + // Save the registers, leaving r/ebp pointing to the register map. + + // Stash r/eax. This is bletcherous: hope we don't get a signal in + // the next few instructions. + mov [R_sp(r) - REGDUMP_SPADJ + (REGIX_AX - 1)*WORDSZ], R_a(r) + + .ifnes "\addr", "nil" + // Collect the effective address for the following dump, leaving it + // in the `addr' slot of the dump. + lea R_a(r), \addr + mov [R_sp(r) - REGDUMP_SPADJ + (REGIX_ADDR - 1)*WORDSZ], R_a(r) + .endif + + // Make space for the register save area. On AMD64 with System/V + // ABI, also skip the red zone. Use `lea' here to preserve the + // flags. + lea R_sp(r), [R_sp(r) - REGDUMP_SPADJ] + + // Save flags and general-purpose registers. On 32-bit x86, we save + // ebx here and establish a GOT pointer here for the benefit of the + // PLT-indirect calls made later on. + pushf +# if CPUFAM_X86 + mov [esp + 4*REGIX_BX], ebx + ldgot +# endif + callext regdump_gpsave + + // Make space for the extended registers. + sub R_sp(r), R_c(r) + callext regdump_xtsave + + // Prepare for calling back into C. On 32-bit x86, leave space for + // the arguments and set up the GOT pointer; on AMD64 Windows, leave + // the `shadow space' for the called-function's arguments. Also, + // forcibly align the stack pointer to a 16-byte boundary. +# if CPUFAM_X86 + sub esp, 16 +# elif ABI_WIN + sub rsp, 32 +# endif + and R_sp(r), ~15 +.endm + +.macro _rstrregs + // Restore registers. + + // We assume r/ebp still points to the register map. + callext regdump_xtrstr + mov R_sp(r), R_bp(r) + callext regdump_gprstr + popf + lea R_sp(r), [R_sp(r) + REGDUMP_SPADJ] +.endm + +.macro _regbase +# if CPUFAM_X86 + mov [esp + 0], ebp +# elif ABI_SYSV + mov rdi, rbp +# elif ABI_WIN + mov rcx, rbp +# endif +.endm + +.macro _membase + mov R_a(r), [R_bp(r) + regmap_gp] +# if CPUFAM_X86 + mov eax, [eax + REGIX_ADDR*WORDSZ] + mov [esp + 0], eax +# elif ABI_SYSV + mov rdi, [rax + REGIX_ADDR*WORDSZ] +# elif ABI_WIN + mov rcx, [rax + REGIX_ADDR*WORDSZ] +# endif +.endm + +.macro _reglbl msg + .ifeqs "\msg", "" +# if CPUFAM_X86 + mov dword ptr [esp + 4], 0 +# elif ABI_SYSV + xor esi, esi +# elif ABI_WIN + xor edx, edx +# endif + .else +# if CPUFAM_X86 + lea eax, [INTADDR(.L$_reglbl$\@)] + mov [esp + 4], eax +# elif ABI_SYSV + lea rsi, [INTADDR(.L$_reglbl$\@)] +# elif ABI_WIN + lea rdx, [INTADDR(.L$_reglbl$\@)] +# endif + _LIT +.L$_reglbl$\@: + .asciz "\msg" + _ENDLIT + .endif +.endm + +.macro _regfmt arg +# if CPUFAM_X86 + mov dword ptr [esp + 8], \arg +# elif ABI_SYSV + mov edx, \arg +# elif ABI_WIN + mov r8d, \arg +# endif +.endm + +#endif + +#endif + +/*----- ARM32 -------------------------------------------------------------*/ + +#if CPUFAM_ARMEL + +#if !__ASSEMBLER__ +extern unsigned regdump__flags; +#endif +#define REGF_VFP 1u +#define REGF_D32 2u + +#define REGIX_CPSR 16 +#define REGIX_ADDR 17 +#define REGIX_GPLIM 18 + +#define REGIX_FPSCR 255 + +#if !__ASSEMBLER__ + +union neon64 { SIMD_COMMON(64); }; +union neon128 { SIMD_COMMON(128); }; + +struct gpsave { union gp32 r[REGIX_GPLIM]; }; + +struct fpsave { + unsigned fpscr; + unsigned _pad0; + union { + float32 s[32]; + union neon64 d[32]; + union neon128 q[16]; + } u; +}; + +struct regmap { + struct gpsave *gp; + struct fpsave *fp; +}; + +#else + + .extern regdump_gpsave + .extern regdump_xtsave + .extern regdump_xtrstr + .extern regdump_gprstr + + regmap_gp = 0 + regmap_fp = 4 + regmap_size = 8 + +#define REGDEF_GP(i) \ + regsrc.r##i = REGSRC_GP | i; \ + regty.r##i = REGF_32; \ + regfmt.r##i = REGF_HEX; +DO16(REGDEF_GP) + + regsrc.cpsr = REGSRC_GP | REGIX_CPSR + regty.cpsr = REGF_32 + regfmt.cpsr = 0 + +#define REGDEF_NEONS(i) \ + regsrc.s##i = REGSRC_FP | i; \ + regty.s##i = REGF_32; \ + regfmt.s##i = REGF_FLT; +DO32(REGDEF_NEONS) + +#define REGDEF_NEOND(i) \ + regsrc.d##i = (6 << REGF_WDSHIFT) | REGSRC_FP | i; \ + regty.d##i = REGF_32; \ + regfmt.d##i = REGF_HEX; +DO32(REGDEF_NEOND) + +#define REGDEF_NEONQ(i) \ + regsrc.q##i = (7 << REGF_WDSHIFT) | REGSRC_FP | i; \ + regty.q##i = REGF_32; \ + regfmt.q##i = REGF_HEX; +DO16(REGDEF_NEONQ) + + regsrc.fpscr = REGSRC_FP | REGIX_FPSCR + regty.fpscr = REGF_32 + regfmt.fpscr = 0 + + REGDUMP_GPSIZE = 4*REGIX_GPLIM + REGDUMP_FPSIZE_D16 = 8 + 16*8 + REGDUMP_FPSIZE_D32 = 8 + 32*8 + +.macro _saveregs base=nil, off=#0 + // Save the registers, leaving r4 pointing to the register map. + + // Stash r14. This is bletcherous: hope we don't get a signal in + // the next few instructions. + str r14, [r13, #-REGDUMP_GPSIZE + 14*4] + + .ifnes "\base,\off", "nil,#0" + // Collect the effective address for the following dump, leaving it + // in the `addr' slot of the dump. + .ifeqs "\base", "nil" + adrl r14, \off + .else + add r14, \base, \off + .endif + str r14, [r13, #-REGDUMP_GPSIZE + 4*REGIX_ADDR] + .endif + + // Make space for the register save area. + sub r13, r13, #REGDUMP_GPSIZE + + // Save flags and general-purpose registers. + str r12, [r13, #4*12] + bl regdump_gpsave + + // Make space for the extended registers. + sub r13, r13, r0 + bl regdump_xtsave + + // Prepare for calling back into C. + ldgot + mov r0, r13 + bic r0, r0, #15 + mov r13, r0 +.endm + +.macro _rstrregs + // Restore registers. + + // We assume r4 still points to the register map. + bl regdump_xtrstr + mov r13, r4 + bl regdump_gprstr + ldr r14, [r13, #14*4] + add r13, r13, #REGDUMP_GPSIZE +.endm + +.macro _regbase + mov r0, r5 +.endm + +.macro _membase + mov r0, r6 +.endm + +.macro _reglbl msg + adrl r1, .L$_reglbl$\@ + _LIT +.L$_reglbl$\@: + .asciz "\msg" + .balign 4 + _ENDLIT +.endm + +.macro _regfmt arg + movw r2, #\arg&0xffff + movt r2, #(\arg >> 16)&0xffff +.endm + +#endif + +#endif + +/*----- ARM64 -------------------------------------------------------------*/ + +#if CPUFAM_ARM64 + +#define REGIX_NZCV 32 +#define REGIX_PC 33 +#define REGIX_ADDR 34 +#define REGIX_GPLIM 36 + +#define REGIX_FPFLAGS 255 + +#if !__ASSEMBLER__ + +union v128 { SIMD_COMMON(128); }; + +struct gpsave { union gp64 r[REGIX_GPLIM]; }; + +struct fpsave { + unsigned fpsr, fpcr; + union v128 v[32]; +}; + +struct regmap { + struct gpsave *gp; + struct fpsave *fp; +}; + +#else + + .extern regdump_gpsave + .extern regdump_xtsave + .extern regdump_xtrstr + .extern regdump_gprstr + + regmap_gp = 0 + regmap_fp = 8 + regmap_size = 16 + +#define REGDEF_GP(i) \ + regsrc.x##i = REGSRC_GP | i; \ + regty.x##i = REGF_64; \ + regfmt.x##i = REGF_HEX; \ + regsrc.w##i = REGSRC_GP | i; \ + regty.w##i = REGF_32; \ + regfmt.w##i = REGF_HEX; +DO32(REGDEF_GP) + + regsrc.sp = REGSRC_GP | 31 + regty.sp = REGF_64 + regfmt.sp = REGF_HEX + + regsrc.pc = REGSRC_GP | REGIX_PC + regty.pc = REGF_64 + regfmt.pc = REGF_HEX + + regsrc.nzcv = REGSRC_GP | REGIX_NZCV + regty.nzcv = REGF_32 + regfmt.nzcv = 0 + +#define REGDEF_FP(i) \ + regsrc.b##i = REGSRC_FP | i; \ + regty.b##i = REGF_8; \ + regfmt.b##i = REGF_HEX; \ + regsrc.h##i = REGSRC_FP | i; \ + regty.h##i = REGF_16; \ + regfmt.h##i = REGF_HEX; \ + regsrc.s##i = REGSRC_FP | i; \ + regty.s##i = REGF_32; \ + regfmt.s##i = REGF_FLT; \ + regsrc.d##i = REGSRC_FP | i; \ + regty.d##i = REGF_64; \ + regfmt.d##i = REGF_FLT; \ + regsrc.v##i = (7 << REGF_WDSHIFT) | REGSRC_FP | i; \ + regty.v##i = REGF_32; \ + regfmt.v##i = REGF_HEX; +DO32(REGDEF_FP) + + regsrc.fpflags = REGSRC_FP | REGIX_FPFLAGS + regty.fpflags = REGF_32 + regfmt.fpflags = 0 + + REGDUMP_GPSIZE = 8*REGIX_GPLIM + REGDUMP_FPSIZE = 16 + 16 + 32*16 + +.macro _saveregs base=nil, off=#0 + // Save the registers, leaving x20 pointing to the register map. + + // Stash x30. This is bletcherous: hope we don't get a signal in + // the next few instructions. + str x30, [sp, #-REGDUMP_GPSIZE + 30*8] + + .ifnes "\base,\off", "nil,#0" + // Collect the effective address for the following dump, leaving it + // in the `addr' slot of the dump. + .ifeqs "\base", "nil" + adr x30, \off + .else + add x30, \base, \off + .endif + str x30, [sp, #-REGDUMP_GPSIZE + 8*REGIX_ADDR] + .endif + + // Make space for the register save area. + sub sp, sp, #REGDUMP_GPSIZE + + // Save flags and general-purpose registers. + stp x16, x17, [sp, #8*16] + bl regdump_gpsave + + // Make space for the extended registers. + sub sp, sp, x0 + bl regdump_xtsave +.endm + +.macro _rstrregs + // Restore registers. + + // We assume x21 still points to the register map. + bl regdump_xtrstr + mov sp, x20 + bl regdump_gprstr + ldr x30, [sp, #30*8] + add sp, sp, #REGDUMP_GPSIZE +.endm + +.macro _regbase + mov x0, x21 +.endm + +.macro _membase + mov x0, x22 +.endm + +.macro _reglbl msg + adr x1, .L$_reglbl$\@ + _LIT +.L$_reglbl$\@: + .asciz "\msg" + .balign 4 + _ENDLIT +.endm + +.macro _regfmt arg + movz w2, #\arg&0xffff + movk w2, #(\arg >> 16)&0xffff, lsl #16 +.endm + +#endif + +#endif + +/*----- Functions provided ------------------------------------------------*/ + +/* --- @regdump_init@ --- * + * + * Arguments: --- + * + * Returns: --- + * + * Use: Performs one-time initialization for register dumping. In + * particular, this performs CPU feature detection on platforms + * where that is a difficult task: without it, registers + * corresponding to optional architectural features can be + * neither printed nor preserved by the register-dump machinery. + */ + +#if !__ASSEMBLER__ +extern void regdump_init(void); +#endif + +/* --- @regdump@ --- * + * + * Arguments: @const void *base@ = pointer to base structure, corresponding + * to the @REGF_SRCMASK@ part of @f@ + * @const char *lbl@ = label to print + * @uint32 f@ = format control word; see @REGF_...@ + * + * Returns: --- + * + * Use: Dump a register value, or chunk of memory. + * + * This function is not usually called directly; instead, use + * the `reg' or `mem' assembler macros. + */ + +#if !__ASSEMBLER__ +extern void regdump(const void *base, const char *lbl, uint32 f); +#else + .extern regdump +#endif + +/* --- @regdump_gp@, @regdump_fp@, @regdump_simd@ --- * + * + * Arguments: @const struct regmap *map@ = pointer to register map + * + * Returns: --- + * + * Use: Dump the general-purpose/floating-point/SIMD registers. + * + * This function is not usually called directly; instead, use + * the `regdump' assembler macro. + */ + +#if !__ASSEMBLER__ +extern void regdump_gp(const struct regmap */*map*/); +extern void regdump_fp(const struct regmap */*map*/); +extern void regdump_simd(const struct regmap */*map*/); +#else + .extern regdump_gp + .extern regdump_fp + .extern regdump_simd +#endif + +/* --- @regdump_freshline@ --- * + * + * Arguments: --- + * + * Returns: --- + * + * Use: Begin a fresh line of output. + */ + +#if !__ASSEMBLER__ +extern void regdump_freshline(void); +#else + .extern regdump_freshline +#endif + +/*----- Main user interface macros ----------------------------------------*/ + +#if __ASSEMBLER__ + +.macro terpri + _saveregs + callext regdump_freshline + _rstrregs +.endm + +.macro reg lbl, rn, fmt=0 + _saveregs + _regbase + _reglbl "\lbl" + .L$reg.fmt$\@ = regsrc.\rn | \fmt | \ + (((\fmt®F_TYMASK) == 0)®ty.\rn) | \ + (((\fmt®F_FMTMASK) == 0)®fmt.\rn) + _regfmt .L$reg.fmt$\@ + callext regdump + _rstrregs +.endm + +.macro mem lbl, addr, fmt=0 + _saveregs \addr + _membase + _reglbl "\lbl" + .L$mem.fmt$\@ = REGSRC_ABS | \fmt | \ + (((\fmt®F_TYMASK) == 0)®F_32) | \ + (((\fmt®F_FMTMASK) == 0)®F_HEX) + _regfmt .L$mem.fmt$\@ + callext regdump + _rstrregs +.endm + +.macro regdump gp=nil, fp=nil, simd=nil + _saveregs + .ifnes "\gp", "nil" + _regbase + callext regdump_gp + .endif + .ifnes "\fp", "nil" + _regbase + callext regdump_fp + .endif + .ifnes "\simd", "nil" + _regbase + callext regdump_simd + .endif + _rstrregs +.endm + +#endif + +/*----- That's all, folks -------------------------------------------------*/ + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/base/test-regdump-a64.S b/base/test-regdump-a64.S new file mode 100644 index 00000000..f91331e4 --- /dev/null +++ b/base/test-regdump-a64.S @@ -0,0 +1,27 @@ +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .text + + .p2align 5 +vec: + .long 1, 2, 3, 4, 5, 6, 7, 8 + +FUNC(main) + + pushreg x29, x30 + setfp + endprologue + + bl regdump_init + + cmp x0, x0 + reg "simd", v1 + + mov w0, #0 + dropfp + popreg x29, x30 + ret + +ENDFUNC diff --git a/base/test-regdump-arm.S b/base/test-regdump-arm.S new file mode 100644 index 00000000..a49b101c --- /dev/null +++ b/base/test-regdump-arm.S @@ -0,0 +1,33 @@ +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .text + .arch armv7-a + + .p2align 5 +vec: + .long 1, 2, 3, 4, 5, 6, 7, 8 + +FUNC(main) + + pushreg r11, r14 + setfp + endprologue + + bl regdump_init + + cmp r0, r0 + regdump gp=t + reg "general purpose", r1 + reg " flags", cpsr + reg " simd", q0, fmt=REGF_HEX | REGF_32 | REGF_16 + reg " float", d0, fmt=REGF_FLT | REGF_64 + reg " float", s0 + reg " float status", fpscr + + mov r0, #0 + dropfp + popreg r11, pc + +ENDFUNC diff --git a/base/test-regdump-x86ish.S b/base/test-regdump-x86ish.S new file mode 100644 index 00000000..a8c8d435 --- /dev/null +++ b/base/test-regdump-x86ish.S @@ -0,0 +1,38 @@ +#include "config.h" +#include "asm-common.h" +#include "regdump.h" + + .text + + .p2align 5 +vec: + .long 1, 2, 3, 4, 5, 6, 7, 8 + +FUNC(main) + + pushreg R_bp(r) + setfp + and R_sp(r), ~15 + endprologue + + fldz + fld1 + fldpi + fldl2t + fldl2e + fldlg2 + fldln2 + //fld1 + + ldgot + movdqa xmm2, [INTADDR(vec)] + //vmovdqa ymm2, [INTADDR(vec)] + + reg "my fp", xmm2, REGF_FLT | REGF_64 | REGF_32 + + xor eax, eax + dropfp + popreg R_bp(r) + ret + +ENDFUNC diff --git a/configure.ac b/configure.ac index db098dc1..042c7c32 100644 --- a/configure.ac +++ b/configure.ac @@ -158,6 +158,17 @@ catacomb_DEFINE_CPU_OR_ABI([CPUFAM], [2], [cpu], catacomb_DEFINE_CPU_OR_ABI([ABI], [3], [abi], [ABI_], [Define if target ABI is \`$][3\'.]) +AC_ARG_ENABLE([asm-debug], + AS_HELP_STRING([--enable-asm-debug], + [enable assembler debugging features]), + [mdw_asm_debug=$enableval], [mdw_asm_debug=no]) +case $CPUFAM in nil) mdw_asm_debug=no ;; esac +case mdw_asm_debug in + no) ;; + *) AC_DEFINE([ENABLE_ASM_DEBUG], [1], + [Define to enable assembler-level debugging.]) ;; +esac + dnl Establish Automake conditions for things. catacomb_CPU_FAMILIES([catacomb_CLEAR_FLAGS]) m4_define([catacomb_COND_CPU], @@ -172,6 +183,7 @@ m4_ifdef([catacomb_seen_abi/$3], [], m4_define([catacomb_seen_abi/$3], [t])])]) catacomb_CPU_FAMILIES([catacomb_COND_CPU]) AM_CONDITIONAL([KNOWN_CPUFAM], [test x$CPUFAM != xnil]) +AM_CONDITIONAL([ASM_DEBUG], [test x$mdw_asm_debug != xno]) dnl Report on what we found. case $CPUFAM in