Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 1 | // Copyright 2011 Google Inc. All Rights Reserved. |
| 2 | |
Brian Carlstrom | 578bbdc | 2011-07-21 14:07:47 -0700 | [diff] [blame] | 3 | #include "dex_instruction.h" |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 4 | |
Ian Rogers | d81871c | 2011-10-03 13:57:23 -0700 | [diff] [blame^] | 5 | #include "dex_file.h" |
| 6 | #include <iomanip> |
| 7 | |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 8 | namespace art { |
| 9 | |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 10 | const char* const Instruction::kInstructionNames[] = { |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 11 | #define INSTRUCTION_NAME(o, c, pname, f, r, i, a, v) pname, |
Brian Carlstrom | 578bbdc | 2011-07-21 14:07:47 -0700 | [diff] [blame] | 12 | #include "dex_instruction_list.h" |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 13 | DEX_INSTRUCTION_LIST(INSTRUCTION_NAME) |
| 14 | #undef DEX_INSTRUCTION_LIST |
| 15 | #undef INSTRUCTION_NAME |
| 16 | }; |
| 17 | |
| 18 | Instruction::InstructionFormat const Instruction::kInstructionFormats[] = { |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 19 | #define INSTRUCTION_FORMAT(o, c, p, format, r, i, a, v) format, |
Brian Carlstrom | 578bbdc | 2011-07-21 14:07:47 -0700 | [diff] [blame] | 20 | #include "dex_instruction_list.h" |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 21 | DEX_INSTRUCTION_LIST(INSTRUCTION_FORMAT) |
| 22 | #undef DEX_INSTRUCTION_LIST |
| 23 | #undef INSTRUCTION_FORMAT |
| 24 | }; |
| 25 | |
| 26 | int const Instruction::kInstructionFlags[] = { |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 27 | #define INSTRUCTION_FLAGS(o, c, p, f, r, i, flags, v) flags, |
Brian Carlstrom | 578bbdc | 2011-07-21 14:07:47 -0700 | [diff] [blame] | 28 | #include "dex_instruction_list.h" |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 29 | DEX_INSTRUCTION_LIST(INSTRUCTION_FLAGS) |
| 30 | #undef DEX_INSTRUCTION_LIST |
| 31 | #undef INSTRUCTION_FLAGS |
| 32 | }; |
| 33 | |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 34 | int const Instruction::kInstructionVerifyFlags[] = { |
| 35 | #define INSTRUCTION_VERIFY_FLAGS(o, c, p, f, r, i, a, vflags) vflags, |
| 36 | #include "dex_instruction_list.h" |
| 37 | DEX_INSTRUCTION_LIST(INSTRUCTION_VERIFY_FLAGS) |
| 38 | #undef DEX_INSTRUCTION_LIST |
| 39 | #undef INSTRUCTION_VERIFY_FLAGS |
| 40 | }; |
| 41 | |
| 42 | /* |
| 43 | * Handy macros for helping decode instructions. |
| 44 | */ |
| 45 | #define FETCH(_offset) (insns[(_offset)]) |
| 46 | #define FETCH_u4(_offset) (fetch_u4_impl((_offset), insns)) |
| 47 | #define INST_A(_insn) (((uint16_t)(_insn) >> 8) & 0x0f) |
| 48 | #define INST_B(_insn) ((uint16_t)(_insn) >> 12) |
| 49 | #define INST_AA(_insn) ((_insn) >> 8) |
| 50 | |
| 51 | /* Helper for FETCH_u4, above. */ |
| 52 | static inline uint32_t fetch_u4_impl(uint32_t offset, const uint16_t* insns) { |
| 53 | return insns[offset] | ((uint32_t) insns[offset+1] << 16); |
| 54 | } |
| 55 | |
| 56 | void Instruction::Decode(uint32_t &vA, uint32_t &vB, uint64_t &vB_wide, uint32_t &vC, uint32_t arg[]) const { |
| 57 | const uint16_t* insns = reinterpret_cast<const uint16_t*>(this); |
| 58 | uint16_t insn = *insns; |
| 59 | int opcode = insn & 0xFF; |
| 60 | |
| 61 | switch (Format()) { |
| 62 | case k10x: // op |
| 63 | /* nothing to do; copy the AA bits out for the verifier */ |
| 64 | vA = INST_AA(insn); |
| 65 | break; |
| 66 | case k12x: // op vA, vB |
| 67 | vA = INST_A(insn); |
| 68 | vB = INST_B(insn); |
| 69 | break; |
| 70 | case k11n: // op vA, #+B |
| 71 | vA = INST_A(insn); |
| 72 | vB = (int32_t) (INST_B(insn) << 28) >> 28; // sign extend 4-bit value |
| 73 | break; |
| 74 | case k11x: // op vAA |
| 75 | vA = INST_AA(insn); |
| 76 | break; |
| 77 | case k10t: // op +AA |
| 78 | vA = (int8_t) INST_AA(insn); // sign-extend 8-bit value |
| 79 | break; |
jeffhao | e0cfb6f | 2011-09-22 16:42:56 -0700 | [diff] [blame] | 80 | case k20bc: // op AA, kind@BBBB |
| 81 | break; |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 82 | case k20t: // op +AAAA |
| 83 | vA = (int16_t) FETCH(1); // sign-extend 16-bit value |
| 84 | break; |
| 85 | case k21c: // op vAA, thing@BBBB |
| 86 | case k22x: // op vAA, vBBBB |
| 87 | vA = INST_AA(insn); |
| 88 | vB = FETCH(1); |
| 89 | break; |
| 90 | case k21s: // op vAA, #+BBBB |
| 91 | case k21t: // op vAA, +BBBB |
| 92 | vA = INST_AA(insn); |
| 93 | vB = (int16_t) FETCH(1); // sign-extend 16-bit value |
| 94 | break; |
| 95 | case k21h: // op vAA, #+BBBB0000[00000000] |
| 96 | vA = INST_AA(insn); |
| 97 | /* |
| 98 | * The value should be treated as right-zero-extended, but we don't |
| 99 | * actually do that here. Among other things, we don't know if it's |
| 100 | * the top bits of a 32- or 64-bit value. |
| 101 | */ |
| 102 | vB = FETCH(1); |
| 103 | break; |
| 104 | case k23x: // op vAA, vBB, vCC |
| 105 | vA = INST_AA(insn); |
| 106 | vB = FETCH(1) & 0xff; |
| 107 | vC = FETCH(1) >> 8; |
| 108 | break; |
| 109 | case k22b: // op vAA, vBB, #+CC |
| 110 | vA = INST_AA(insn); |
| 111 | vB = FETCH(1) & 0xff; |
| 112 | vC = (int8_t) (FETCH(1) >> 8); // sign-extend 8-bit value |
| 113 | break; |
| 114 | case k22s: // op vA, vB, #+CCCC |
| 115 | case k22t: // op vA, vB, +CCCC |
| 116 | vA = INST_A(insn); |
| 117 | vB = INST_B(insn); |
| 118 | vC = (int16_t) FETCH(1); // sign-extend 16-bit value |
| 119 | break; |
| 120 | case k22c: // op vA, vB, thing@CCCC |
| 121 | vA = INST_A(insn); |
| 122 | vB = INST_B(insn); |
| 123 | vC = FETCH(1); |
| 124 | break; |
| 125 | case k30t: // op +AAAAAAAA |
| 126 | vA = FETCH_u4(1); // signed 32-bit value |
| 127 | break; |
| 128 | case k31t: // op vAA, +BBBBBBBB |
| 129 | case k31c: // op vAA, string@BBBBBBBB |
| 130 | vA = INST_AA(insn); |
| 131 | vB = FETCH_u4(1); // 32-bit value |
| 132 | break; |
| 133 | case k32x: // op vAAAA, vBBBB |
| 134 | vA = FETCH(1); |
| 135 | vB = FETCH(2); |
| 136 | break; |
| 137 | case k31i: // op vAA, #+BBBBBBBB |
| 138 | vA = INST_AA(insn); |
| 139 | vB = FETCH_u4(1); // signed 32-bit value |
| 140 | break; |
| 141 | case k35c: // op {vC, vD, vE, vF, vG}, thing@BBBB |
| 142 | { |
| 143 | /* |
| 144 | * Note that the fields mentioned in the spec don't appear in |
| 145 | * their "usual" positions here compared to most formats. This |
| 146 | * was done so that the field names for the argument count and |
| 147 | * reference index match between this format and the corresponding |
| 148 | * range formats (3rc and friends). |
| 149 | * |
| 150 | * Bottom line: The argument count is always in vA, and the |
| 151 | * method constant (or equivalent) is always in vB. |
| 152 | */ |
| 153 | uint16_t regList; |
| 154 | int count; |
| 155 | |
| 156 | vA = INST_B(insn); // This is labeled A in the spec. |
| 157 | vB = FETCH(1); |
| 158 | regList = FETCH(2); |
| 159 | |
| 160 | count = vA; |
| 161 | |
| 162 | /* |
| 163 | * Copy the argument registers into the arg[] array, and |
| 164 | * also copy the first argument (if any) into vC. (The |
| 165 | * DecodedInstruction structure doesn't have separate |
| 166 | * fields for {vD, vE, vF, vG}, so there's no need to make |
| 167 | * copies of those.) Note that cases 5..2 fall through. |
| 168 | */ |
| 169 | switch (count) { |
| 170 | case 5: arg[4] = INST_A(insn); |
| 171 | case 4: arg[3] = (regList >> 12) & 0x0f; |
| 172 | case 3: arg[2] = (regList >> 8) & 0x0f; |
| 173 | case 2: arg[1] = (regList >> 4) & 0x0f; |
| 174 | case 1: vC = arg[0] = regList & 0x0f; break; |
| 175 | case 0: break; // Valid, but no need to do anything. |
| 176 | default: |
| 177 | LOG(ERROR) << "Invalid arg count in 35c (" << count << ")"; |
| 178 | return; |
| 179 | } |
| 180 | } |
| 181 | break; |
| 182 | case k3rc: // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB |
| 183 | vA = INST_AA(insn); |
| 184 | vB = FETCH(1); |
| 185 | vC = FETCH(2); |
| 186 | break; |
| 187 | case k51l: // op vAA, #+BBBBBBBBBBBBBBBB |
| 188 | vA = INST_AA(insn); |
| 189 | vB_wide = FETCH_u4(1) | ((uint64_t) FETCH_u4(3) << 32); |
| 190 | break; |
| 191 | default: |
| 192 | LOG(ERROR) << "Can't decode unexpected format " << (int) Format() << " (op=" << opcode << ")"; |
| 193 | return; |
| 194 | } |
| 195 | } |
| 196 | |
Ian Rogers | d81871c | 2011-10-03 13:57:23 -0700 | [diff] [blame^] | 197 | size_t Instruction::SizeInCodeUnits() const { |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 198 | const uint16_t* insns = reinterpret_cast<const uint16_t*>(this); |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 199 | if (*insns == kPackedSwitchSignature) { |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 200 | return (4 + insns[1] * 2); |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 201 | } else if (*insns == kSparseSwitchSignature) { |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 202 | return (2 + insns[1] * 4); |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 203 | } else if (*insns == kArrayDataSignature) { |
| 204 | uint16_t element_size = insns[1]; |
| 205 | uint32_t length = insns[2] | (((uint32_t)insns[3]) << 16); |
| 206 | // The plus 1 is to round up for odd size and width. |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 207 | return (4 + (element_size * length + 1) / 2); |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 208 | } else { |
| 209 | switch (Format()) { |
| 210 | case k10x: |
| 211 | case k12x: |
| 212 | case k11n: |
| 213 | case k11x: |
| 214 | case k10t: |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 215 | return 1; |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 216 | case k20t: |
| 217 | case k22x: |
| 218 | case k21t: |
| 219 | case k21s: |
| 220 | case k21h: |
| 221 | case k21c: |
| 222 | case k23x: |
| 223 | case k22b: |
| 224 | case k22t: |
| 225 | case k22s: |
| 226 | case k22c: |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 227 | return 2; |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 228 | case k32x: |
| 229 | case k30t: |
| 230 | case k31t: |
| 231 | case k31i: |
| 232 | case k31c: |
| 233 | case k35c: |
| 234 | case k3rc: |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 235 | return 3; |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 236 | case k51l: |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 237 | return 5; |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 238 | default: |
| 239 | LOG(FATAL) << "Unreachable"; |
| 240 | } |
| 241 | } |
jeffhao | ba5ebb9 | 2011-08-25 17:24:37 -0700 | [diff] [blame] | 242 | return 0; |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 243 | } |
| 244 | |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 245 | Instruction::Code Instruction::Opcode() const { |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 246 | const uint16_t* insns = reinterpret_cast<const uint16_t*>(this); |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 247 | int opcode = *insns & 0xFF; |
| 248 | return static_cast<Code>(opcode); |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 249 | } |
| 250 | |
Carl Shapiro | e4c1ce4 | 2011-07-09 02:31:57 -0700 | [diff] [blame] | 251 | const Instruction* Instruction::Next() const { |
Ian Rogers | d81871c | 2011-10-03 13:57:23 -0700 | [diff] [blame^] | 252 | size_t current_size_in_bytes = SizeInCodeUnits() * sizeof(uint16_t); |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 253 | const uint8_t* ptr = reinterpret_cast<const uint8_t*>(this); |
Ian Rogers | d81871c | 2011-10-03 13:57:23 -0700 | [diff] [blame^] | 254 | return reinterpret_cast<const Instruction*>(ptr + current_size_in_bytes); |
| 255 | } |
| 256 | |
| 257 | void Instruction::DumpHex(std::ostream& os, size_t code_units) const { |
| 258 | size_t inst_length = SizeInCodeUnits(); |
| 259 | if (inst_length > code_units) { |
| 260 | inst_length = code_units; |
| 261 | } |
| 262 | const uint16_t* insn = reinterpret_cast<const uint16_t*>(this); |
| 263 | for (size_t i = 0; i < inst_length; i++) { |
| 264 | os << "0x" << StringPrintf("0x%04X", insn[i]) << " "; |
| 265 | } |
| 266 | for (size_t i = inst_length; i < code_units; i++) { |
| 267 | os << " "; |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | void Instruction::Dump(std::ostream& os, const DexFile* file) const { |
| 272 | DecodedInstruction insn(this); |
| 273 | const char* opcode = kInstructionNames[insn.opcode_]; |
| 274 | switch (Format()) { |
| 275 | case k10x: os << opcode; break; |
| 276 | case k12x: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_; break; |
| 277 | case k11n: os << opcode << " v" << insn.vA_ << ", #+" << insn.vB_; break; |
| 278 | case k11x: os << opcode << " v" << insn.vA_; break; |
| 279 | case k10t: os << opcode << " +" << (int)insn.vA_; break; |
| 280 | case k20bc: os << opcode << " " << insn.vA_ << ", kind@" << insn.vB_; break; |
| 281 | case k20t: os << opcode << " +" << (int)insn.vA_; break; |
| 282 | case k22x: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_; break; |
| 283 | case k21t: os << opcode << " v" << insn.vA_ << ", +" << insn.vB_; break; |
| 284 | case k21s: os << opcode << " v" << insn.vA_ << ", #+" << insn.vB_; break; |
| 285 | case k21h: os << opcode << " v" << insn.vA_ << ", #+" << insn.vB_ << "00000[00000000]"; break; |
| 286 | case k21c: os << opcode << " " << insn.vA_ << ", thing@" << insn.vB_; break; |
| 287 | case k23x: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_ << ", v" << insn.vC_; break; |
| 288 | case k22b: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_ << ", #+" << insn.vC_; break; |
| 289 | case k22t: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_ << ", +" << insn.vC_; break; |
| 290 | case k22s: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_ << ", #+" << insn.vC_; break; |
| 291 | case k22c: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_ << ", thing@" << insn.vC_; break; |
| 292 | case k32x: os << opcode << " v" << insn.vA_ << ", v" << insn.vB_; break; |
| 293 | case k30t: os << opcode << " +" << (int)insn.vA_; break; |
| 294 | case k31t: os << opcode << " v" << insn.vA_ << ", +" << insn.vB_; break; |
| 295 | case k31i: os << opcode << " v" << insn.vA_ << ", #+" << insn.vB_; break; |
| 296 | case k31c: os << opcode << " v" << insn.vA_ << ", thing@" << insn.vB_; break; |
| 297 | case k35c: { |
| 298 | switch (insn.opcode_) { |
| 299 | case INVOKE_VIRTUAL: |
| 300 | case INVOKE_SUPER: |
| 301 | case INVOKE_DIRECT: |
| 302 | case INVOKE_STATIC: |
| 303 | case INVOKE_INTERFACE: |
| 304 | if (file != NULL) { |
| 305 | const DexFile::MethodId& meth_id = file->GetMethodId(insn.vB_); |
| 306 | os << opcode << " {v" << insn.arg_[0] << ", v" << insn.arg_[1] << ", v" << insn.arg_[2] |
| 307 | << ", v" << insn.arg_[3] << ", v" << insn.arg_[4] << "}, " |
| 308 | << file->GetMethodName(meth_id) << file->GetMethodSignature(meth_id) |
| 309 | << " // method@" << insn.vB_; |
| 310 | break; |
| 311 | } // else fall-through |
| 312 | default: |
| 313 | os << opcode << " {v" << insn.arg_[0] << ", v" << insn.arg_[1] << ", v" << insn.arg_[2] |
| 314 | << ", v" << insn.arg_[3] << ", v" << insn.arg_[4] << "}, thing@" << insn.vB_; |
| 315 | break; |
| 316 | } |
| 317 | break; |
| 318 | } |
| 319 | case k3rc: os << opcode << " {v" << insn.vC_ << " .. v" << (insn.vC_+ insn.vA_ - 1) << "}, method@" << insn.vB_; break; |
| 320 | case k51l: os << opcode << " v" << insn.vA_ << ", #+" << insn.vB_; break; |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | std::ostream& operator<<(std::ostream& os, const Instruction& rhs) { |
| 325 | rhs.Dump(os, NULL); |
| 326 | return os; |
Carl Shapiro | 12eb78e | 2011-06-24 14:51:06 -0700 | [diff] [blame] | 327 | } |
| 328 | |
| 329 | } // namespace art |