--- /dev/null
+/// -*- mode: asm; asm-comment-char: ?/ -*-
+///
+/// Register dump and debugging for x86
+///
+/// (c) 2019 Straylight/Edgeware
+///
+
+///----- Licensing notice ---------------------------------------------------
+///
+/// This file is part of Catacomb.
+///
+/// Catacomb is free software: you can redistribute it and/or modify it
+/// under the terms of the GNU Library General Public License as published
+/// by the Free Software Foundation; either version 2 of the License, or
+/// (at your option) any later version.
+///
+/// Catacomb is distributed in the hope that it will be useful, but
+/// WITHOUT ANY WARRANTY; without even the implied warranty of
+/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+/// Library General Public License for more details.
+///
+/// You should have received a copy of the GNU Library General Public
+/// License along with Catacomb. If not, write to the Free Software
+/// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+/// USA.
+
+///--------------------------------------------------------------------------
+/// Preliminaries.
+
+#include "config.h"
+#include "asm-common.h"
+#include "regdump.h"
+
+ .text
+ .arch pentium4
+ .arch .xsave
+
+///--------------------------------------------------------------------------
+/// Main code.
+
+FUNC(regdump_gpsave)
+ endprologue
+ // On entry, r/esp should point to a return address and
+ // `REGDUMP_GPSIZE' bytes of word-aligned storage to be the
+ // general-purpose save area, with flags saved in the bottom word,
+ // r/eax saved in the fourth, and (on 32-bit x86) ebx in the fifth.
+ // On exit, the initial registers are saved in this space, and
+ // modified: r/ebp points to the general-purpose save area, ecx
+ // contains the number of bytes required in the extended save area,
+ // ebx is preserved on 32-bit x86, and other general-purpose
+ // registers are clobbered or used to communicate with
+ // `regdump_xtsave' below. Doing anything other than lowering the
+ // stack pointer and calling `regdump_xtsave' is not recommended.
+
+ // Other code will insist that df is clear.
+ cld
+
+ // Save r/ebp and establish it pointing to the save area.
+ mov [R_sp(r) + WORDSZ + REGIX_BP*WORDSZ], R_bp(r)
+ lea R_bp(r), [R_sp(r) + WORDSZ]
+
+ // Save the other easy general-purpose registers.
+#if !CPUFAM_X86
+ mov [R_bp(r) + REGIX_BX*WORDSZ], R_b(r)
+#endif
+ mov [R_bp(r) + REGIX_CX*WORDSZ], R_c(r)
+ mov [R_bp(r) + REGIX_DX*WORDSZ], R_d(r)
+ mov [R_bp(r) + REGIX_SI*WORDSZ], R_si(r)
+ mov [R_bp(r) + REGIX_DI*WORDSZ], R_di(r)
+#if CPUFAM_AMD64
+ mov [R_bp(r) + REGIX_R8*WORDSZ], R_r8(r)
+ mov [R_bp(r) + REGIX_R9*WORDSZ], R_r9(r)
+ mov [R_bp(r) + REGIX_R10*WORDSZ], R_r10(r)
+ mov [R_bp(r) + REGIX_R11*WORDSZ], R_r11(r)
+ mov [R_bp(r) + REGIX_R12*WORDSZ], R_r12(r)
+ mov [R_bp(r) + REGIX_R13*WORDSZ], R_r13(r)
+ mov [R_bp(r) + REGIX_R14*WORDSZ], R_r14(r)
+ mov [R_bp(r) + REGIX_R15*WORDSZ], R_r15(r)
+#endif
+
+ // Determine the previous stack pointer and save it.
+#if CPUFAM_AMD64 && ABI_SYSV
+ lea R_a(r), [R_bp(r) + 128 + REGDUMP_GPSIZE]
+#else
+ lea R_a(r), [R_bp(r) + REGDUMP_GPSIZE]
+#endif
+ mov [R_bp(r) + REGIX_SP*WORDSZ], R_a(r)
+
+ // Collect the return address and save it as r/eip.
+ mov R_a(r), [R_sp(r)]
+ mov [R_bp(r) + REGIX_IP*WORDSZ], R_a(r)
+
+ // Save the segment registers.
+ lea R_a(r), [R_bp(r) + REGIX_GPLIM*WORDSZ]
+ mov [R_a(r) + 2*REGIX_CS], cs
+ mov [R_a(r) + 2*REGIX_DS], ds
+ mov [R_a(r) + 2*REGIX_SS], ss
+ mov [R_a(r) + 2*REGIX_ES], es
+ mov [R_a(r) + 2*REGIX_FS], fs
+ mov [R_a(r) + 2*REGIX_GS], gs
+
+ // Determine the extended save area size. Preserve ebx on 32-bit x86
+ // here, because the caller needs it for PLT-indirect calls.
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 1f
+
+ mov eax, 0x0d
+ mov ecx, 0x00
+ cpuid
+ add ecx, regmap_size + 64 // map + align
+ jmp 8f
+
+1: mov ecx, 512 + regmap_size + 16 // fxsave + map + align
+
+ // Done.
+8:
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+FUNC(regdump_gprstr)
+ endprologue
+ // On entry, r/ebp points to a general-purpose save area, established
+ // by `regdump_gpsave'. On exit, the general-purpose registers
+ // (other than the stack pointer) are restored to their original
+ // values.
+
+ // We assume nobody actually fiddled with the segment registers. So
+ // just the actual integer registers to do.
+ mov R_a(r), [R_bp(r) + REGIX_AX*WORDSZ]
+ mov R_b(r), [R_bp(r) + REGIX_BX*WORDSZ]
+ mov R_c(r), [R_bp(r) + REGIX_CX*WORDSZ]
+ mov R_d(r), [R_bp(r) + REGIX_DX*WORDSZ]
+ mov R_si(r), [R_bp(r) + REGIX_SI*WORDSZ]
+ mov R_di(r), [R_bp(r) + REGIX_DI*WORDSZ]
+#if CPUFAM_AMD64
+ mov R_r8(r), [R_bp(r) + REGIX_R8*WORDSZ]
+ mov R_r9(r), [R_bp(r) + REGIX_R9*WORDSZ]
+ mov R_r10(r), [R_bp(r) + REGIX_R10*WORDSZ]
+ mov R_r11(r), [R_bp(r) + REGIX_R11*WORDSZ]
+ mov R_r12(r), [R_bp(r) + REGIX_R12*WORDSZ]
+ mov R_r13(r), [R_bp(r) + REGIX_R13*WORDSZ]
+ mov R_r14(r), [R_bp(r) + REGIX_R14*WORDSZ]
+ mov R_r15(r), [R_bp(r) + REGIX_R15*WORDSZ]
+#endif
+ mov R_bp(r), [R_bp(r) + REGIX_BP*WORDSZ]
+
+ // Done.
+ ret
+
+ENDFUNC
+
+#ifdef CPUFAM_AMD64
+# define fxsave fxsave64
+# define fxrstor fxrstor64
+# define xsave xsave64
+# define xrstor xrstor64
+#endif
+
+FUNC(regdump_xtsave)
+ endprologue
+ // On entry, r/esp points to a return address and extended save area,
+ // of size determined by `regdump_gpsave' above. On exit, the save
+ // area is filled in and a handy map placed at its base, the x87
+ // floating-point state is reset, r/ebp is left pointing to the
+ // register map, ebx is preserved on 32-bit x86, and the other
+ // general registers are clobbered.
+
+ // Start by filling in the easy parts of the map.
+ mov [R_sp(r) + WORDSZ + regmap_gp], R_bp(r)
+ lea R_bp(r), [R_sp(r) + WORDSZ]
+
+ xor eax, eax // clears rax too on amd64
+ mov [R_bp(r) + regmap_avx], R_a(r)
+
+ // Find out whether we use `xsave'. (Preserve ebx.)
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 5f
+
+ // We have the `xsave' machinery. Select the base address.
+ lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 63]
+ and R_si(r), ~63
+ mov [R_bp(r) + regmap_fx], R_si(r)
+
+ // Clear out the header area.
+ xor eax, eax
+ lea R_di(r), [R_si(r) + 512]
+ mov ecx, 16
+ rep stosd
+
+ // Save the registers.
+ mov eax, 0x00000007
+ xor edx, edx
+ xsave [R_si(r)]
+
+ // Establish the AVX pointer, if available.
+ test dword ptr [R_si(r) + 512], 4 // = xstate_bv
+ je 8f
+
+ mov eax, 13
+ mov ecx, 2
+ cpuid
+ add R_b(r), R_si(r)
+ mov [R_bp(r) + regmap_avx], R_b(r)
+
+ jmp 8f
+
+ // We have only `fxsave'. Set the base address.
+5: lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 15]
+ and R_si(r), ~15
+ mov [R_bp(r) + regmap_fx], R_si(r)
+
+ // Save the registers.
+ fxsave [R_si(r)]
+
+ // Clear the x87 state; otherwise it can cause trouble later.
+8: fninit
+
+ // Done.
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+FUNC(regdump_xtrstr)
+ endprologue
+ // On entry, r/ebp points to a register-save map. On exit, the
+ // extended registers are restored from the save area; r/ebp is left
+ // pointing to the general-purpose save area, ebx is preserved on
+ // 32-bit x86, and the other general registers are clobbered.
+
+ // Find the extended register dump.
+ mov R_si(r), [R_bp(r) + regmap_fx]
+
+ // Probe to find out whether we have `xsave'.
+#if CPUFAM_X86
+ push ebx
+#endif
+ mov eax, 0x01
+ cpuid
+ test ecx, 1 << 26
+ je 1f
+
+ // We have the `xsave' machinery.
+ mov eax, 0x00000007
+ xor edx, edx
+ xrstor [R_si(r)]
+ jmp 8f
+
+ // We must fake it up.
+1: fxrstor [R_si(r)]
+
+ // Done.
+8: mov R_bp(r), [R_bp(r) + regmap_gp]
+#if CPUFAM_X86
+ pop ebx
+#endif
+ ret
+
+ENDFUNC
+
+///----- That's all, folks --------------------------------------------------