| 1 | /// -*- mode: asm; asm-comment-char: ?/ -*- |
| 2 | /// |
| 3 | /// Register dump and debugging for x86 |
| 4 | /// |
| 5 | /// (c) 2019 Straylight/Edgeware |
| 6 | /// |
| 7 | |
| 8 | ///----- Licensing notice --------------------------------------------------- |
| 9 | /// |
| 10 | /// This file is part of Catacomb. |
| 11 | /// |
| 12 | /// Catacomb is free software: you can redistribute it and/or modify it |
| 13 | /// under the terms of the GNU Library General Public License as published |
| 14 | /// by the Free Software Foundation; either version 2 of the License, or |
| 15 | /// (at your option) any later version. |
| 16 | /// |
| 17 | /// Catacomb is distributed in the hope that it will be useful, but |
| 18 | /// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 20 | /// Library General Public License for more details. |
| 21 | /// |
| 22 | /// You should have received a copy of the GNU Library General Public |
| 23 | /// License along with Catacomb. If not, write to the Free Software |
| 24 | /// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
| 25 | /// USA. |
| 26 | |
| 27 | ///-------------------------------------------------------------------------- |
| 28 | /// Preliminaries. |
| 29 | |
| 30 | #include "config.h" |
| 31 | #include "asm-common.h" |
| 32 | #include "regdump.h" |
| 33 | |
| 34 | .text |
| 35 | .arch pentium4 |
| 36 | .arch .xsave |
| 37 | |
| 38 | ///-------------------------------------------------------------------------- |
| 39 | /// Main code. |
| 40 | |
| 41 | FUNC(regdump_gpsave) |
| 42 | endprologue |
| 43 | // On entry, r/esp should point to a return address and |
| 44 | // `REGDUMP_GPSIZE' bytes of word-aligned storage to be the |
| 45 | // general-purpose save area, with flags saved in the bottom word, |
| 46 | // r/eax saved in the fourth, and (on 32-bit x86) ebx in the fifth. |
| 47 | // On exit, the initial registers are saved in this space, and |
| 48 | // modified: r/ebp points to the general-purpose save area, ecx |
| 49 | // contains the number of bytes required in the extended save area, |
| 50 | // ebx is preserved on 32-bit x86, and other general-purpose |
| 51 | // registers are clobbered or used to communicate with |
| 52 | // `regdump_xtsave' below. Doing anything other than lowering the |
| 53 | // stack pointer and calling `regdump_xtsave' is not recommended. |
| 54 | |
| 55 | // Other code will insist that df is clear. |
| 56 | cld |
| 57 | |
| 58 | // Save r/ebp and establish it pointing to the save area. |
| 59 | mov [R_sp(r) + WORDSZ + REGIX_BP*WORDSZ], R_bp(r) |
| 60 | lea R_bp(r), [R_sp(r) + WORDSZ] |
| 61 | |
| 62 | // Save the other easy general-purpose registers. |
| 63 | #if !CPUFAM_X86 |
| 64 | mov [R_bp(r) + REGIX_BX*WORDSZ], R_b(r) |
| 65 | #endif |
| 66 | mov [R_bp(r) + REGIX_CX*WORDSZ], R_c(r) |
| 67 | mov [R_bp(r) + REGIX_DX*WORDSZ], R_d(r) |
| 68 | mov [R_bp(r) + REGIX_SI*WORDSZ], R_si(r) |
| 69 | mov [R_bp(r) + REGIX_DI*WORDSZ], R_di(r) |
| 70 | #if CPUFAM_AMD64 |
| 71 | mov [R_bp(r) + REGIX_R8*WORDSZ], R_r8(r) |
| 72 | mov [R_bp(r) + REGIX_R9*WORDSZ], R_r9(r) |
| 73 | mov [R_bp(r) + REGIX_R10*WORDSZ], R_r10(r) |
| 74 | mov [R_bp(r) + REGIX_R11*WORDSZ], R_r11(r) |
| 75 | mov [R_bp(r) + REGIX_R12*WORDSZ], R_r12(r) |
| 76 | mov [R_bp(r) + REGIX_R13*WORDSZ], R_r13(r) |
| 77 | mov [R_bp(r) + REGIX_R14*WORDSZ], R_r14(r) |
| 78 | mov [R_bp(r) + REGIX_R15*WORDSZ], R_r15(r) |
| 79 | #endif |
| 80 | |
| 81 | // Determine the previous stack pointer and save it. |
| 82 | #if CPUFAM_AMD64 && ABI_SYSV |
| 83 | lea R_a(r), [R_bp(r) + 128 + REGDUMP_GPSIZE] |
| 84 | #else |
| 85 | lea R_a(r), [R_bp(r) + REGDUMP_GPSIZE] |
| 86 | #endif |
| 87 | mov [R_bp(r) + REGIX_SP*WORDSZ], R_a(r) |
| 88 | |
| 89 | // Collect the return address and save it as r/eip. |
| 90 | mov R_a(r), [R_sp(r)] |
| 91 | mov [R_bp(r) + REGIX_IP*WORDSZ], R_a(r) |
| 92 | |
| 93 | // Save the segment registers. |
| 94 | lea R_a(r), [R_bp(r) + REGIX_GPLIM*WORDSZ] |
| 95 | mov [R_a(r) + 2*REGIX_CS], cs |
| 96 | mov [R_a(r) + 2*REGIX_DS], ds |
| 97 | mov [R_a(r) + 2*REGIX_SS], ss |
| 98 | mov [R_a(r) + 2*REGIX_ES], es |
| 99 | mov [R_a(r) + 2*REGIX_FS], fs |
| 100 | mov [R_a(r) + 2*REGIX_GS], gs |
| 101 | |
| 102 | // Determine the extended save area size. Preserve ebx on 32-bit x86 |
| 103 | // here, because the caller needs it for PLT-indirect calls. |
| 104 | #if CPUFAM_X86 |
| 105 | push ebx |
| 106 | #endif |
| 107 | mov eax, 0x01 |
| 108 | cpuid |
| 109 | test ecx, 1 << 26 |
| 110 | je 1f |
| 111 | |
| 112 | mov eax, 0x0d |
| 113 | mov ecx, 0x00 |
| 114 | cpuid |
| 115 | add ecx, regmap_size + 64 // map + align |
| 116 | jmp 8f |
| 117 | |
| 118 | 1: mov ecx, 512 + regmap_size + 16 // fxsave + map + align |
| 119 | |
| 120 | // Done. |
| 121 | 8: |
| 122 | #if CPUFAM_X86 |
| 123 | pop ebx |
| 124 | #endif |
| 125 | ret |
| 126 | |
| 127 | ENDFUNC |
| 128 | |
| 129 | FUNC(regdump_gprstr) |
| 130 | endprologue |
| 131 | // On entry, r/ebp points to a general-purpose save area, established |
| 132 | // by `regdump_gpsave'. On exit, the general-purpose registers |
| 133 | // (other than the stack pointer) are restored to their original |
| 134 | // values. |
| 135 | |
| 136 | // We assume nobody actually fiddled with the segment registers. So |
| 137 | // just the actual integer registers to do. |
| 138 | mov R_a(r), [R_bp(r) + REGIX_AX*WORDSZ] |
| 139 | mov R_b(r), [R_bp(r) + REGIX_BX*WORDSZ] |
| 140 | mov R_c(r), [R_bp(r) + REGIX_CX*WORDSZ] |
| 141 | mov R_d(r), [R_bp(r) + REGIX_DX*WORDSZ] |
| 142 | mov R_si(r), [R_bp(r) + REGIX_SI*WORDSZ] |
| 143 | mov R_di(r), [R_bp(r) + REGIX_DI*WORDSZ] |
| 144 | #if CPUFAM_AMD64 |
| 145 | mov R_r8(r), [R_bp(r) + REGIX_R8*WORDSZ] |
| 146 | mov R_r9(r), [R_bp(r) + REGIX_R9*WORDSZ] |
| 147 | mov R_r10(r), [R_bp(r) + REGIX_R10*WORDSZ] |
| 148 | mov R_r11(r), [R_bp(r) + REGIX_R11*WORDSZ] |
| 149 | mov R_r12(r), [R_bp(r) + REGIX_R12*WORDSZ] |
| 150 | mov R_r13(r), [R_bp(r) + REGIX_R13*WORDSZ] |
| 151 | mov R_r14(r), [R_bp(r) + REGIX_R14*WORDSZ] |
| 152 | mov R_r15(r), [R_bp(r) + REGIX_R15*WORDSZ] |
| 153 | #endif |
| 154 | mov R_bp(r), [R_bp(r) + REGIX_BP*WORDSZ] |
| 155 | |
| 156 | // Done. |
| 157 | ret |
| 158 | |
| 159 | ENDFUNC |
| 160 | |
| 161 | #ifdef CPUFAM_AMD64 |
| 162 | # define fxsave fxsave64 |
| 163 | # define fxrstor fxrstor64 |
| 164 | # define xsave xsave64 |
| 165 | # define xrstor xrstor64 |
| 166 | #endif |
| 167 | |
| 168 | FUNC(regdump_xtsave) |
| 169 | endprologue |
| 170 | // On entry, r/esp points to a return address and extended save area, |
| 171 | // of size determined by `regdump_gpsave' above. On exit, the save |
| 172 | // area is filled in and a handy map placed at its base, the x87 |
| 173 | // floating-point state is reset, r/ebp is left pointing to the |
| 174 | // register map, ebx is preserved on 32-bit x86, and the other |
| 175 | // general registers are clobbered. |
| 176 | |
| 177 | // Start by filling in the easy parts of the map. |
| 178 | mov [R_sp(r) + WORDSZ + regmap_gp], R_bp(r) |
| 179 | lea R_bp(r), [R_sp(r) + WORDSZ] |
| 180 | |
| 181 | xor eax, eax // clears rax too on amd64 |
| 182 | mov [R_bp(r) + regmap_avx], R_a(r) |
| 183 | |
| 184 | // Find out whether we use `xsave'. (Preserve ebx.) |
| 185 | #if CPUFAM_X86 |
| 186 | push ebx |
| 187 | #endif |
| 188 | mov eax, 0x01 |
| 189 | cpuid |
| 190 | test ecx, 1 << 26 |
| 191 | je 5f |
| 192 | |
| 193 | // We have the `xsave' machinery. Select the base address. |
| 194 | lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 63] |
| 195 | and R_si(r), ~63 |
| 196 | mov [R_bp(r) + regmap_fx], R_si(r) |
| 197 | |
| 198 | // Clear out the header area. |
| 199 | xor eax, eax |
| 200 | lea R_di(r), [R_si(r) + 512] |
| 201 | mov ecx, 16 |
| 202 | rep stosd |
| 203 | |
| 204 | // Save the registers. |
| 205 | mov eax, 0x00000007 |
| 206 | xor edx, edx |
| 207 | xsave [R_si(r)] |
| 208 | |
| 209 | // Establish the AVX pointer, if available. |
| 210 | test dword ptr [R_si(r) + 512], 4 // = xstate_bv |
| 211 | je 8f |
| 212 | |
| 213 | mov eax, 13 |
| 214 | mov ecx, 2 |
| 215 | cpuid |
| 216 | add R_b(r), R_si(r) |
| 217 | mov [R_bp(r) + regmap_avx], R_b(r) |
| 218 | |
| 219 | jmp 8f |
| 220 | |
| 221 | // We have only `fxsave'. Set the base address. |
| 222 | 5: lea R_si(r), [R_sp(r) + WORDSZ + regmap_size + 15] |
| 223 | and R_si(r), ~15 |
| 224 | mov [R_bp(r) + regmap_fx], R_si(r) |
| 225 | |
| 226 | // Save the registers. |
| 227 | fxsave [R_si(r)] |
| 228 | |
| 229 | // Clear the x87 state; otherwise it can cause trouble later. |
| 230 | 8: fninit |
| 231 | |
| 232 | // Done. |
| 233 | #if CPUFAM_X86 |
| 234 | pop ebx |
| 235 | #endif |
| 236 | ret |
| 237 | |
| 238 | ENDFUNC |
| 239 | |
| 240 | FUNC(regdump_xtrstr) |
| 241 | endprologue |
| 242 | // On entry, r/ebp points to a register-save map. On exit, the |
| 243 | // extended registers are restored from the save area; r/ebp is left |
| 244 | // pointing to the general-purpose save area, ebx is preserved on |
| 245 | // 32-bit x86, and the other general registers are clobbered. |
| 246 | |
| 247 | // Find the extended register dump. |
| 248 | mov R_si(r), [R_bp(r) + regmap_fx] |
| 249 | |
| 250 | // Probe to find out whether we have `xsave'. |
| 251 | #if CPUFAM_X86 |
| 252 | push ebx |
| 253 | #endif |
| 254 | mov eax, 0x01 |
| 255 | cpuid |
| 256 | test ecx, 1 << 26 |
| 257 | je 1f |
| 258 | |
| 259 | // We have the `xsave' machinery. |
| 260 | mov eax, 0x00000007 |
| 261 | xor edx, edx |
| 262 | xrstor [R_si(r)] |
| 263 | jmp 8f |
| 264 | |
| 265 | // We must fake it up. |
| 266 | 1: fxrstor [R_si(r)] |
| 267 | |
| 268 | // Done. |
| 269 | 8: mov R_bp(r), [R_bp(r) + regmap_gp] |
| 270 | #if CPUFAM_X86 |
| 271 | pop ebx |
| 272 | #endif |
| 273 | ret |
| 274 | |
| 275 | ENDFUNC |
| 276 | |
| 277 | ///----- That's all, folks -------------------------------------------------- |