Ian Rogers | 706a10e | 2012-03-23 17:00:55 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2012 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "disassembler_x86.h" |
| 18 | |
| 19 | #include "stringprintf.h" |
| 20 | |
| 21 | #include <stdint.h> |
| 22 | #include <iostream> |
| 23 | |
| 24 | namespace art { |
| 25 | namespace x86 { |
| 26 | |
| 27 | DisassemblerX86::DisassemblerX86() { |
| 28 | } |
| 29 | |
| 30 | void DisassemblerX86::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) { |
| 31 | size_t length = 0; |
| 32 | for (const uint8_t* cur = begin; cur < end; cur += length) { |
| 33 | length = DumpInstruction(os, cur); |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | static const char* gReg8Names[] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; |
| 38 | static const char* gReg16Names[] = { "ax", "cx", "dx", "bx", "sp", "bp", "di", "si" }; |
| 39 | static const char* gReg32Names[] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "edi", "esi" }; |
| 40 | |
| 41 | static void DumpReg0(std::ostream& os, uint8_t /*rex*/, size_t reg, |
| 42 | bool byte_operand, uint8_t size_override) { |
| 43 | DCHECK_LT(reg, 8u); |
| 44 | // TODO: combine rex into size |
| 45 | size_t size = byte_operand ? 1 : (size_override == 0x66 ? 2 : 4); |
| 46 | switch (size) { |
| 47 | case 1: os << gReg8Names[reg]; break; |
| 48 | case 2: os << gReg16Names[reg]; break; |
| 49 | case 4: os << gReg32Names[reg]; break; |
| 50 | default: LOG(FATAL) << "unexpected size " << size; |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | static void DumpReg(std::ostream& os, uint8_t rex, uint8_t reg, |
| 55 | bool byte_operand, uint8_t size_override) { |
| 56 | size_t reg_num = reg; // TODO: combine with REX.R on 64bit |
| 57 | DumpReg0(os, rex, reg_num, byte_operand, size_override); |
| 58 | } |
| 59 | |
| 60 | static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg, |
| 61 | bool byte_operand, uint8_t size_override) { |
| 62 | size_t reg_num = reg; // TODO: combine with REX.B on 64bit |
| 63 | DumpReg0(os, rex, reg_num, byte_operand, size_override); |
| 64 | } |
| 65 | |
| 66 | static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg, |
| 67 | bool byte_operand, uint8_t size_override) { |
| 68 | int reg_num = reg; // TODO: combine with REX.X on 64bit |
| 69 | DumpReg0(os, rex, reg_num, byte_operand, size_override); |
| 70 | } |
| 71 | |
| 72 | static void DumpSegmentOverride(std::ostream& os, uint8_t segment_prefix) { |
| 73 | switch (segment_prefix) { |
| 74 | case 0x2E: os << "cs:"; break; |
| 75 | case 0x36: os << "ss:"; break; |
| 76 | case 0x3E: os << "ds:"; break; |
| 77 | case 0x26: os << "es:"; break; |
| 78 | case 0x64: os << "fs:"; break; |
| 79 | case 0x65: os << "gs:"; break; |
| 80 | default: break; |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) { |
| 85 | const uint8_t* begin_instr = instr; |
| 86 | bool have_prefixes = true; |
| 87 | uint8_t prefix[4] = {0, 0, 0, 0}; |
| 88 | const char** modrm_opcodes = NULL; |
| 89 | do { |
| 90 | switch (*instr) { |
| 91 | // Group 1 - lock and repeat prefixes: |
| 92 | case 0xF0: |
| 93 | case 0xF2: |
| 94 | case 0xF3: |
| 95 | prefix[0] = *instr; |
| 96 | break; |
| 97 | // Group 2 - segment override prefixes: |
| 98 | case 0x2E: |
| 99 | case 0x36: |
| 100 | case 0x3E: |
| 101 | case 0x26: |
| 102 | case 0x64: |
| 103 | case 0x65: |
| 104 | prefix[1] = *instr; |
| 105 | break; |
| 106 | // Group 3 - operand size override: |
| 107 | case 0x66: |
| 108 | prefix[2] = *instr; |
| 109 | break; |
| 110 | // Group 4 - address size override: |
| 111 | case 0x67: |
| 112 | prefix[3] = *instr; |
| 113 | break; |
| 114 | default: |
| 115 | have_prefixes = false; |
| 116 | break; |
| 117 | } |
| 118 | if (have_prefixes) { |
| 119 | instr++; |
| 120 | } |
| 121 | } while (have_prefixes); |
| 122 | uint8_t rex = (*instr >= 0x40 && *instr <= 0x4F) ? *instr : 0; |
| 123 | bool has_modrm = false; |
| 124 | bool reg_is_opcode = false; |
| 125 | size_t immediate_bytes = 0; |
| 126 | size_t branch_bytes = 0; |
| 127 | std::ostringstream opcode; |
| 128 | bool store = false; // stores to memory (ie rm is on the left) |
| 129 | bool load = false; // loads from memory (ie rm is on the right) |
| 130 | bool byte_operand = false; |
| 131 | bool ax = false; // implicit use of ax |
| 132 | bool reg_in_opcode = false; // low 3-bits of opcode encode register parameter |
| 133 | switch (*instr) { |
| 134 | #define DISASSEMBLER_ENTRY(opname, \ |
| 135 | rm8_r8, rm32_r32, \ |
| 136 | r8_rm8, r32_rm32, \ |
| 137 | ax8_i8, ax32_i32) \ |
| 138 | case rm8_r8: opcode << #opname; store = true; has_modrm = true; byte_operand = true; break; \ |
| 139 | case rm32_r32: opcode << #opname; store = true; has_modrm = true; break; \ |
| 140 | case r8_rm8: opcode << #opname; load = true; has_modrm = true; byte_operand = true; break; \ |
| 141 | case r32_rm32: opcode << #opname; load = true; has_modrm = true; break; \ |
| 142 | case ax8_i8: opcode << #opname; ax = true; immediate_bytes = 1; byte_operand = true; break; \ |
| 143 | case ax32_i32: opcode << #opname; ax = true; immediate_bytes = 4; break; |
| 144 | |
| 145 | DISASSEMBLER_ENTRY(add, |
| 146 | 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, |
| 147 | 0x02 /* Reg8/RegMem8 */, 0x03 /* Reg32/RegMem32 */, |
| 148 | 0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */) |
| 149 | DISASSEMBLER_ENTRY(or, |
| 150 | 0x08 /* RegMem8/Reg8 */, 0x09 /* RegMem32/Reg32 */, |
| 151 | 0x0A /* Reg8/RegMem8 */, 0x0B /* Reg32/RegMem32 */, |
| 152 | 0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */) |
| 153 | DISASSEMBLER_ENTRY(adc, |
| 154 | 0x10 /* RegMem8/Reg8 */, 0x11 /* RegMem32/Reg32 */, |
| 155 | 0x12 /* Reg8/RegMem8 */, 0x13 /* Reg32/RegMem32 */, |
| 156 | 0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */) |
| 157 | DISASSEMBLER_ENTRY(sbb, |
| 158 | 0x18 /* RegMem8/Reg8 */, 0x19 /* RegMem32/Reg32 */, |
| 159 | 0x1A /* Reg8/RegMem8 */, 0x1B /* Reg32/RegMem32 */, |
| 160 | 0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */) |
| 161 | DISASSEMBLER_ENTRY(and, |
| 162 | 0x20 /* RegMem8/Reg8 */, 0x21 /* RegMem32/Reg32 */, |
| 163 | 0x22 /* Reg8/RegMem8 */, 0x23 /* Reg32/RegMem32 */, |
| 164 | 0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */) |
| 165 | DISASSEMBLER_ENTRY(sub, |
| 166 | 0x28 /* RegMem8/Reg8 */, 0x29 /* RegMem32/Reg32 */, |
| 167 | 0x2A /* Reg8/RegMem8 */, 0x2B /* Reg32/RegMem32 */, |
| 168 | 0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */) |
| 169 | DISASSEMBLER_ENTRY(xor, |
| 170 | 0x30 /* RegMem8/Reg8 */, 0x31 /* RegMem32/Reg32 */, |
| 171 | 0x32 /* Reg8/RegMem8 */, 0x33 /* Reg32/RegMem32 */, |
| 172 | 0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */) |
| 173 | DISASSEMBLER_ENTRY(cmp, |
| 174 | 0x38 /* RegMem8/Reg8 */, 0x39 /* RegMem32/Reg32 */, |
| 175 | 0x3A /* Reg8/RegMem8 */, 0x3B /* Reg32/RegMem32 */, |
| 176 | 0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */) |
| 177 | |
| 178 | #undef DISASSEMBLER_ENTRY |
| 179 | case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: |
| 180 | opcode << "push"; |
| 181 | reg_in_opcode = true; |
| 182 | break; |
| 183 | case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: |
| 184 | opcode << "pop"; |
| 185 | reg_in_opcode = true; |
| 186 | break; |
| 187 | case 0x68: opcode << "push"; immediate_bytes = 4; break; |
| 188 | case 0x6A: opcode << "push"; immediate_bytes = 1; break; |
| 189 | case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: |
| 190 | case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: |
| 191 | static const char* condition_codes[] = |
Elliott Hughes | b25c3f6 | 2012-03-26 16:35:06 -0700 | [diff] [blame^] | 192 | {"o", "no", "b/nae/c", "nb/ae/nc", "z/eq", "nz/ne", "be/na", "nbe/a", |
| 193 | "s", "ns", "p/pe", "np/po", "l/nge", "nl/ge", "le/ng", "nle/g" |
Ian Rogers | 706a10e | 2012-03-23 17:00:55 -0700 | [diff] [blame] | 194 | }; |
| 195 | opcode << "j" << condition_codes[*instr & 0xF]; |
| 196 | branch_bytes = 1; |
| 197 | break; |
| 198 | case 0x88: opcode << "mov"; store = true; has_modrm = true; byte_operand = true; break; |
| 199 | case 0x89: opcode << "mov"; store = true; has_modrm = true; break; |
| 200 | case 0x8A: opcode << "mov"; load = true; has_modrm = true; byte_operand = true; break; |
| 201 | case 0x8B: opcode << "mov"; load = true; has_modrm = true; break; |
| 202 | |
| 203 | case 0x0F: // 2 byte extended opcode |
| 204 | instr++; |
| 205 | switch (*instr) { |
| 206 | case 0x38: // 3 byte extended opcode |
| 207 | opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); |
| 208 | break; |
| 209 | case 0x3A: // 3 byte extended opcode |
| 210 | opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); |
| 211 | break; |
| 212 | case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87: |
| 213 | case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: |
| 214 | opcode << "j" << condition_codes[*instr & 0xF]; |
| 215 | branch_bytes = 4; |
| 216 | break; |
| 217 | default: |
| 218 | opcode << StringPrintf("unknown opcode '0F %02X'", *instr); |
| 219 | break; |
| 220 | } |
| 221 | break; |
| 222 | case 0x80: case 0x81: case 0x82: case 0x83: |
| 223 | static const char* x80_opcodes[] = {"add", "or", "adc", "sbb", "and", "sub", "xor", "cmp"}; |
| 224 | modrm_opcodes = x80_opcodes; |
| 225 | has_modrm = true; |
| 226 | reg_is_opcode = true; |
| 227 | store = true; |
| 228 | byte_operand = (*instr & 1) == 0; |
| 229 | immediate_bytes = *instr == 0x81 ? 4 : 1; |
| 230 | break; |
| 231 | case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: |
| 232 | opcode << "mov"; |
| 233 | immediate_bytes = 1; |
| 234 | reg_in_opcode = true; |
| 235 | break; |
| 236 | case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: |
| 237 | opcode << "mov"; |
| 238 | immediate_bytes = 4; |
| 239 | reg_in_opcode = true; |
| 240 | break; |
| 241 | case 0xC3: opcode << "ret"; break; |
| 242 | case 0xE9: opcode << "jmp"; branch_bytes = 4; break; |
| 243 | case 0xEB: opcode << "jmp"; branch_bytes = 1; break; |
| 244 | case 0xFF: |
| 245 | static const char* ff_opcodes[] = {"inc", "dec", "call", "call", "jmp", "jmp", "push", "unknown-ff"}; |
| 246 | modrm_opcodes = ff_opcodes; |
| 247 | has_modrm = true; |
| 248 | reg_is_opcode = true; |
| 249 | load = true; |
| 250 | break; |
| 251 | default: |
| 252 | opcode << StringPrintf("unknown opcode '%02X'", *instr); |
| 253 | break; |
| 254 | } |
| 255 | std::ostringstream args; |
| 256 | if (reg_in_opcode) { |
| 257 | DCHECK(!has_modrm); |
| 258 | DumpReg(args, rex, *instr & 0x7, false, prefix[2]); |
| 259 | } |
| 260 | instr++; |
| 261 | if (has_modrm) { |
| 262 | uint8_t modrm = *instr; |
| 263 | instr++; |
| 264 | uint8_t mod = modrm >> 6; |
| 265 | uint8_t reg_or_opcode = (modrm >> 3) & 7; |
| 266 | uint8_t rm = modrm & 7; |
| 267 | std::ostringstream address; |
| 268 | if (mod == 0 && rm == 5) { // fixed address |
| 269 | address << StringPrintf("[0x%X]", *reinterpret_cast<const uint32_t*>(instr)); |
| 270 | instr += 4; |
| 271 | } else if (rm == 4 && mod != 3) { // SIB |
| 272 | uint8_t sib = *instr; |
| 273 | instr++; |
| 274 | uint8_t ss = (sib >> 6) & 3; |
| 275 | uint8_t index = (sib >> 3) & 7; |
| 276 | uint8_t base = sib & 7; |
| 277 | address << "["; |
| 278 | if (base != 5 || mod != 0) { |
| 279 | DumpBaseReg(address, rex, base, byte_operand, prefix[2]); |
| 280 | if (index != 4) { |
| 281 | address << " + "; |
| 282 | } |
| 283 | } |
| 284 | if (index != 4) { |
| 285 | DumpIndexReg(address, rex, index, byte_operand, prefix[2]); |
| 286 | if (ss != 0) { |
| 287 | address << StringPrintf(" * %d", 1 << ss); |
| 288 | } |
| 289 | } |
| 290 | if (mod == 1) { |
| 291 | address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); |
| 292 | instr++; |
| 293 | } else if (mod == 2) { |
| 294 | address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); |
| 295 | instr += 4; |
| 296 | } |
| 297 | address << "]"; |
| 298 | } else { |
| 299 | if (mod != 3) { |
| 300 | address << "["; |
| 301 | } |
| 302 | DumpBaseReg(address, rex, rm, byte_operand, prefix[2]); |
| 303 | if (mod == 1) { |
| 304 | address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); |
| 305 | instr++; |
| 306 | } else if (mod == 2) { |
| 307 | address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); |
| 308 | instr += 4; |
| 309 | } |
| 310 | if (mod != 3) { |
| 311 | address << "]"; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | if (reg_is_opcode) { |
| 316 | opcode << modrm_opcodes[reg_or_opcode]; |
| 317 | } |
| 318 | if (load) { |
| 319 | if (!reg_is_opcode) { |
| 320 | DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2]); |
| 321 | args << ", "; |
| 322 | } |
| 323 | DumpSegmentOverride(args, prefix[1]); |
| 324 | args << address.str(); |
| 325 | } else { |
| 326 | DCHECK(store); |
| 327 | DumpSegmentOverride(args, prefix[1]); |
| 328 | args << address.str(); |
| 329 | if (!reg_is_opcode) { |
| 330 | args << ", "; |
| 331 | DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2]); |
| 332 | } |
| 333 | } |
| 334 | } |
| 335 | if (ax) { |
| 336 | DumpReg(args, rex, 0 /* EAX */, byte_operand, prefix[2]); |
| 337 | } |
| 338 | if (immediate_bytes > 0) { |
| 339 | if (has_modrm || reg_in_opcode || ax) { |
| 340 | args << ", "; |
| 341 | } |
| 342 | if (immediate_bytes == 1) { |
| 343 | args << StringPrintf("%d", *reinterpret_cast<const int8_t*>(instr)); |
| 344 | instr++; |
| 345 | } else { |
| 346 | CHECK_EQ(immediate_bytes, 4u); |
| 347 | args << StringPrintf("%d", *reinterpret_cast<const int32_t*>(instr)); |
| 348 | instr += 4; |
| 349 | } |
| 350 | } else if (branch_bytes > 0) { |
| 351 | DCHECK(!has_modrm); |
| 352 | int32_t displacement; |
| 353 | if (branch_bytes == 1) { |
| 354 | displacement = *reinterpret_cast<const int8_t*>(instr); |
| 355 | instr++; |
| 356 | } else { |
| 357 | CHECK_EQ(branch_bytes, 4u); |
| 358 | displacement = *reinterpret_cast<const int32_t*>(instr); |
| 359 | instr += 4; |
| 360 | } |
| 361 | args << StringPrintf("%d (%p)", displacement, instr + displacement); |
| 362 | } |
| 363 | os << StringPrintf("\t\t\t%p: ", begin_instr); |
| 364 | for (size_t i = 0; begin_instr + i < instr; ++i) { |
| 365 | os << StringPrintf("%02X", begin_instr[i]); |
| 366 | } |
| 367 | os << StringPrintf("\t%-7s ", opcode.str().c_str()) << args.str() << std::endl; |
| 368 | return instr - begin_instr; |
| 369 | } |
| 370 | |
| 371 | } // namespace x86 |
| 372 | } // namespace art |