blob: e677b428efadd185187f5942da809814beb16fb1 [file] [log] [blame]
Carl Shapiro1fb86202011-06-27 17:43:13 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#ifndef ART_SRC_DEX_FILE_H_
4#define ART_SRC_DEX_FILE_H_
5
Brian Carlstrom7e49dca2011-07-22 18:07:34 -07006#include <map>
7
Brian Carlstrom578bbdc2011-07-21 14:07:47 -07008#include "globals.h"
Brian Carlstrom7e49dca2011-07-22 18:07:34 -07009#include "leb128.h"
10#include "logging.h"
11#include "scoped_ptr.h"
12#include "stringpiece.h"
13#include "strutil.h"
Carl Shapiro1fb86202011-06-27 17:43:13 -070014
15namespace art {
16
Carl Shapiro5fafe2b2011-07-09 15:34:41 -070017union JValue;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070018
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070019// TODO: move all of the macro functionality into the DexCache class.
20class RawDexFile {
Carl Shapiro1fb86202011-06-27 17:43:13 -070021 public:
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070022 static const byte kDexMagic[];
23 static const byte kDexMagicVersion[];
24 static const size_t kSha1DigestSize = 20;
Carl Shapiro80d4dde2011-06-28 16:24:07 -070025
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070026 static const byte kEncodedValueTypeMask = 0x1f; // 0b11111
27 static const byte kEncodedValueArgShift = 5;
28
29 // The value of an invalid index.
30 static const uint32_t kDexNoIndex = 0xFFFFFFFF;
31
32 enum ValueType {
33 kByte = 0x00,
34 kShort = 0x02,
35 kChar = 0x03,
36 kInt = 0x04,
37 kLong = 0x06,
38 kFloat = 0x10,
39 kDouble = 0x11,
40 kString = 0x17,
41 kType = 0x18,
42 kField = 0x19,
43 kMethod = 0x1a,
44 kEnum = 0x1b,
45 kArray = 0x1c,
46 kAnnotation = 0x1d,
47 kNull = 0x1e,
48 kBoolean = 0x1f
Brian Carlstrom578bbdc2011-07-21 14:07:47 -070049 };
Carl Shapiro1fb86202011-06-27 17:43:13 -070050
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070051 // Raw header_item.
52 struct Header {
53 uint8_t magic_[8];
54 uint32_t checksum_;
55 uint8_t signature_[kSha1DigestSize];
56 uint32_t file_size_; // length of entire file
57 uint32_t header_size_; // offset to start of next section
58 uint32_t endian_tag_;
59 uint32_t link_size_;
60 uint32_t link_off_;
61 uint32_t map_off_;
62 uint32_t string_ids_size_;
63 uint32_t string_ids_off_;
64 uint32_t type_ids_size_;
65 uint32_t type_ids_off_;
66 uint32_t proto_ids_size_;
67 uint32_t proto_ids_off_;
68 uint32_t field_ids_size_;
69 uint32_t field_ids_off_;
70 uint32_t method_ids_size_;
71 uint32_t method_ids_off_;
72 uint32_t class_defs_size_;
73 uint32_t class_defs_off_;
74 uint32_t data_size_;
75 uint32_t data_off_;
76 };
Carl Shapiro1fb86202011-06-27 17:43:13 -070077
Brian Carlstrom7e49dca2011-07-22 18:07:34 -070078 // Raw string_id_item.
79 struct StringId {
80 uint32_t string_data_off_; // offset in bytes from the base address
81 };
82
83 // Raw type_id_item.
84 struct TypeId {
85 uint32_t descriptor_idx_; // index into string_ids
86 };
87
88 // Raw field_id_item.
89 struct FieldId {
90 uint16_t class_idx_; // index into typeIds list for defining class
91 uint16_t type_idx_; // index into typeIds for field type
92 uint32_t name_idx_; // index into stringIds for field name
93 };
94
95 // Raw method_id_item.
96 struct MethodId {
97 uint16_t class_idx_; // index into typeIds list for defining class
98 uint16_t proto_idx_; // index into protoIds for method prototype
99 uint32_t name_idx_; // index into stringIds for method name
100 };
101
102 // Raw proto_id_item.
103 struct ProtoId {
104 uint32_t shorty_idx_; // index into string_ids for shorty descriptor
105 uint32_t return_type_idx_; // index into type_ids list for return type
106 uint32_t parameters_off_; // file offset to type_list for parameter types
107 };
108
109 // Raw class_def_item.
110 struct ClassDef {
111 uint32_t class_idx_; // index into typeIds for this class
112 uint32_t access_flags_;
113 uint32_t superclass_idx_; // index into typeIds for superclass
114 uint32_t interfaces_off_; // file offset to TypeList
115 uint32_t source_file_idx_; // index into stringIds for source file name
116 uint32_t annotations_off_; // file offset to annotations_directory_item
117 uint32_t class_data_off_; // file offset to class_data_item
118 uint32_t static_values_off_; // file offset to EncodedArray
119 };
120
121 // Raw type_item.
122 struct TypeItem {
123 uint16_t type_idx_; // index into type_ids section
124 };
125
126 // Raw type_list.
127 class TypeList {
128 public:
129 uint32_t Size() const {
130 return size_;
131 }
132
133 const TypeItem& GetTypeItem(uint32_t idx) const {
134 CHECK_LT(idx, this->size_);
135 return this->list_[idx];
136 }
137
138 private:
139 uint32_t size_; // size of the list, in entries
140 TypeItem list_[1]; // elements of the list
141 };
142
143 class ParameterIterator { // TODO: stream
144 public:
145 ParameterIterator(const RawDexFile& raw, const ProtoId& proto_id)
146 : raw_(raw), size_(0), pos_(0) {
147 type_list_ = raw_.GetProtoParameters(proto_id);
148 if (type_list_ != NULL) {
149 size_ = type_list_->Size();
150 }
151 }
152 bool HasNext() const { return pos_ != size_; }
153 void Next() { ++pos_; }
154 const char* GetDescriptor() {
155 uint32_t type_idx = type_list_->GetTypeItem(pos_).type_idx_;
156 return raw_.dexStringByTypeIdx(type_idx);
157 }
158 private:
159 const RawDexFile& raw_;
160 const TypeList* type_list_;
161 uint32_t size_;
162 uint32_t pos_;
163 DISALLOW_IMPLICIT_CONSTRUCTORS(ParameterIterator);
164 };
165
166 ParameterIterator* GetParameterIterator(const ProtoId& proto_id) const {
167 return new ParameterIterator(*this, proto_id);
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700168 }
169
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700170 const char* GetReturnTypeDescriptor(const ProtoId& proto_id) const {
171 return dexStringByTypeIdx(proto_id.return_type_idx_);
Carl Shapiro1fb86202011-06-27 17:43:13 -0700172 }
173
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700174 // Raw code_item.
175 struct CodeItem {
176 uint16_t registers_size_;
177 uint16_t ins_size_;
178 uint16_t outs_size_;
179 uint16_t tries_size_;
180 uint32_t debug_info_off_; // file offset to debug info stream
181 uint32_t insns_size_; // size of the insns array, in 2 byte code units
182 uint16_t insns_[1];
183 };
184
185 // Partially decoded form of class_data_item.
186 struct ClassDataHeader {
187 uint32_t static_fields_size_; // the number of static fields
188 uint32_t instance_fields_size_; // the number of instance fields
189 uint32_t direct_methods_size_; // the number of direct methods
190 uint32_t virtual_methods_size_; // the number of virtual methods
191 };
192
193 // Decoded form of encoded_field.
194 struct Field {
195 uint32_t field_idx_; // index into the field_ids list for the identity of this field
196 uint32_t access_flags_; // access flags for the field
197 };
198
199 // Decoded form of encoded_method.
200 struct Method {
201 uint32_t method_idx_;
202 uint32_t access_flags_;
203 uint32_t code_off_;
204 };
205
206 // Opens a .dex file from the file system.
207 static RawDexFile* OpenFile(const char* filename);
208
209 // Opens a .dex file from a new allocated pointer
210 static RawDexFile* OpenPtr(byte* ptr, size_t length);
211
212 // Closes a .dex file.
213 virtual ~RawDexFile();
214
215 const Header& GetHeader() {
216 CHECK(header_ != NULL);
217 return *header_;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700218 }
219
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700220 // Looks up a class definition by its class descriptor.
221 const ClassDef* FindClassDef(const StringPiece& descriptor) const;
222
223 // Returns the number of string identifiers in the .dex file.
224 size_t NumStringIds() const {
225 CHECK(header_ != NULL);
226 return header_->string_ids_size_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700227 }
228
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700229 // Returns the number of type identifiers in the .dex file.
230 size_t NumTypeIds() const {
231 CHECK(header_ != NULL);
232 return header_->type_ids_size_;
Carl Shapiro5fafe2b2011-07-09 15:34:41 -0700233 }
234
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700235 // Returns the number of prototype identifiers in the .dex file.
236 size_t NumProtoIds() const {
237 CHECK(header_ != NULL);
238 return header_->proto_ids_size_;
Carl Shapiro5fafe2b2011-07-09 15:34:41 -0700239 }
240
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700241 // Returns the number of field identifiers in the .dex file.
242 size_t NumFieldIds() const {
243 CHECK(header_ != NULL);
244 return header_->field_ids_size_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700245 }
246
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700247 // Returns the number of method identifiers in the .dex file.
248 size_t NumMethodIds() const {
249 CHECK(header_ != NULL);
250 return header_->method_ids_size_;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700251 }
252
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700253 // Returns the number of class definitions in the .dex file.
254 size_t NumClassDefs() const {
255 CHECK(header_ != NULL);
256 return header_->class_defs_size_;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700257 }
258
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700259 // Returns a pointer to the memory mapped class data.
260 // TODO: return a stream
261 const byte* GetClassData(const ClassDef& class_def) const {
262 if (class_def.class_data_off_ == 0) {
263 return NULL;
264 } else {
265 return base_ + class_def.class_data_off_;
266 }
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700267 }
268
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700269 // Decodes the header section from the raw class data bytes.
270 ClassDataHeader ReadClassDataHeader(const byte** class_data) const {
271 CHECK(class_data != NULL);
272 ClassDataHeader header;
273 memset(&header, 0, sizeof(ClassDataHeader));
274 if (*class_data != NULL) {
275 header.static_fields_size_ = DecodeUnsignedLeb128(class_data);
276 header.instance_fields_size_ = DecodeUnsignedLeb128(class_data);
277 header.direct_methods_size_ = DecodeUnsignedLeb128(class_data);
278 header.virtual_methods_size_ = DecodeUnsignedLeb128(class_data);
279 }
280 return header;
Brian Carlstrom578bbdc2011-07-21 14:07:47 -0700281 }
282
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700283 // Returns the class descriptor string of a class definition.
284 const char* GetClassDescriptor(const ClassDef& class_def) const {
285 return dexStringByTypeIdx(class_def.class_idx_);
286 }
287
288 // Returns the StringId at the specified index.
289 const StringId& GetStringId(uint32_t idx) const {
290 CHECK_LT(idx, NumStringIds());
291 return string_ids_[idx];
292 }
293
294 // Returns the TypeId at the specified index.
295 const TypeId& GetTypeId(uint32_t idx) const {
296 CHECK_LT(idx, NumTypeIds());
297 return type_ids_[idx];
298 }
299
300 // Returns the FieldId at the specified index.
301 const FieldId& GetFieldId(uint32_t idx) const {
302 CHECK_LT(idx, NumFieldIds());
303 return field_ids_[idx];
304 }
305
306 // Returns the MethodId at the specified index.
307 const MethodId& GetMethodId(uint32_t idx) const {
308 CHECK_LT(idx, NumMethodIds());
309 return method_ids_[idx];
310 }
311
312 // Returns the ProtoId at the specified index.
313 const ProtoId& GetProtoId(uint32_t idx) const {
314 CHECK_LT(idx, NumProtoIds());
315 return proto_ids_[idx];
316 }
317
318 // Returns the ClassDef at the specified index.
319 const ClassDef& GetClassDef(uint32_t idx) const {
320 CHECK_LT(idx, NumClassDefs());
321 return class_defs_[idx];
322 }
323
324 const TypeList* GetInterfacesList(const ClassDef& class_def) const {
325 if (class_def.interfaces_off_ == 0) {
326 return NULL;
327 } else {
328 const byte* addr = base_ + class_def.interfaces_off_;
329 return reinterpret_cast<const TypeList*>(addr);
330 }
331 }
332
333 const CodeItem* GetCodeItem(const Method& method) const {
334 if (method.code_off_ == 0) {
335 return NULL; // native or abstract method
336 } else {
337 const byte* addr = base_ + method.code_off_;
338 return reinterpret_cast<const CodeItem*>(addr);
339 }
340 }
341
342 // Returns the short form method descriptor for the given prototype.
343 const char* GetShorty(uint32_t proto_idx) const {
344 const ProtoId& proto_id = GetProtoId(proto_idx);
345 return dexStringById(proto_id.shorty_idx_);
346 }
347
348 const TypeList* GetProtoParameters(const ProtoId& proto_id) const {
349 if (proto_id.parameters_off_ == 0) {
350 return NULL;
351 } else {
352 const byte* addr = base_ + proto_id.parameters_off_;
353 return reinterpret_cast<const TypeList*>(addr);
354 }
355 }
356
357 const byte* GetEncodedArray(const ClassDef& class_def) const {
358 if (class_def.static_values_off_ == 0) {
359 return 0;
360 } else {
361 return base_ + class_def.static_values_off_;
362 }
363 }
364
365 int32_t GetStringLength(const StringId& string_id) const {
366 const byte* ptr = base_ + string_id.string_data_off_;
367 return DecodeUnsignedLeb128(&ptr);
368 }
369
370 ValueType ReadEncodedValue(const byte** encoded_value, JValue* value) const;
371
372 // From libdex...
373
374 // Returns a pointer to the UTF-8 string data referred to by the
375 // given string_id.
376 const char* GetStringData(const StringId& string_id) const {
377 const byte* ptr = base_ + string_id.string_data_off_;
378 // Skip the uleb128 length.
379 while (*(ptr++) > 0x7f) /* empty */ ;
380 return (const char*) ptr;
381 }
382
383 // return the UTF-8 encoded string with the specified string_id index
384 const char* dexStringById(uint32_t idx) const {
385 const StringId& string_id = GetStringId(idx);
386 return GetStringData(string_id);
387 }
388
389 // Get the descriptor string associated with a given type index.
390 const char* dexStringByTypeIdx(uint32_t idx) const {
391 const TypeId& type_id = GetTypeId(idx);
392 return dexStringById(type_id.descriptor_idx_);
393 }
394
395 // TODO: encoded_field is actually a stream of bytes
396 void dexReadClassDataField(const byte** encoded_field,
397 RawDexFile::Field* field,
398 uint32_t* last_idx) const {
399 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_field);
400 field->access_flags_ = DecodeUnsignedLeb128(encoded_field);
401 field->field_idx_ = idx;
402 *last_idx = idx;
403 }
404
405 // TODO: encoded_method is actually a stream of bytes
406 void dexReadClassDataMethod(const byte** encoded_method,
407 RawDexFile::Method* method,
408 uint32_t* last_idx) const {
409 uint32_t idx = *last_idx + DecodeUnsignedLeb128(encoded_method);
410 method->access_flags_ = DecodeUnsignedLeb128(encoded_method);
411 method->code_off_ = DecodeUnsignedLeb128(encoded_method);
412 method->method_idx_ = idx;
413 *last_idx = idx;
414 }
415
416
417 // TODO: const reference
418 uint32_t dexGetIndexForClassDef(const ClassDef* class_def) const {
419 CHECK_GE(class_def, class_defs_);
420 CHECK_LT(class_def, class_defs_ + header_->class_defs_size_);
421 return class_def - class_defs_;
422 }
423
424 const char* dexGetSourceFile(const ClassDef& class_def) const {
425 if (class_def.source_file_idx_ == 0xffffffff) {
426 return NULL;
427 } else {
428 return dexStringById(class_def.source_file_idx_);
429 }
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700430 }
431
Carl Shapiro1fb86202011-06-27 17:43:13 -0700432 private:
Brian Carlstrom7e49dca2011-07-22 18:07:34 -0700433 // Helper class to deallocate underlying storage.
434 class Closer {
435 public:
436 virtual ~Closer();
437 };
438
439 // Helper class to deallocate mmap-backed .dex files.
440 class MmapCloser : public Closer {
441 public:
442 MmapCloser(void* addr, size_t length);
443 virtual ~MmapCloser();
444 private:
445 void* addr_;
446 size_t length_;
447 };
448
449 // Helper class for deallocating new/delete-backed .dex files.
450 class PtrCloser : public Closer {
451 public:
452 PtrCloser(byte* addr);
453 virtual ~PtrCloser();
454 private:
455 byte* addr_;
456 };
457
458 // Opens a .dex file at a the given address.
459 static RawDexFile* Open(const byte* dex_file, size_t length, Closer* closer);
460
461 RawDexFile(const byte* addr, size_t length, Closer* closer)
462 : base_(addr),
463 length_(length),
464 closer_(closer),
465 header_(0),
466 string_ids_(0),
467 type_ids_(0),
468 field_ids_(0),
469 method_ids_(0),
470 proto_ids_(0),
471 class_defs_(0) {}
472
473 // Top-level initializer that calls other Init methods.
474 bool Init();
475
476 // Caches pointers into to the various file sections.
477 void InitMembers();
478
479 // Builds the index of descriptors to class definitions.
480 void InitIndex();
481
482 // Returns true if the byte string equals the magic value.
483 bool CheckMagic(const byte* magic);
484
485 // Returns true if the header magic is of the expected value.
486 bool IsMagicValid();
487
488 // The index of descriptors to class definitions.
489 typedef std::map<const StringPiece, const RawDexFile::ClassDef*> Index;
490 Index index_;
491
492 // The base address of the memory mapping.
493 const byte* base_;
494
495 // The size of the underlying memory allocation in bytes.
496 size_t length_;
497
498 // Helper object to free the underlying allocation.
499 scoped_ptr<Closer> closer_;
500
501 // Points to the header section.
502 const Header* header_;
503
504 // Points to the base of the string identifier list.
505 const StringId* string_ids_;
506
507 // Points to the base of the type identifier list.
508 const TypeId* type_ids_;
509
510 // Points to the base of the field identifier list.
511 const FieldId* field_ids_;
512
513 // Points to the base of the method identifier list.
514 const MethodId* method_ids_;
515
516 // Points to the base of the prototype identifier list.
517 const ProtoId* proto_ids_;
518
519 // Points to the base of the class definition list.
520 const ClassDef* class_defs_;
Carl Shapiro1fb86202011-06-27 17:43:13 -0700521};
522
523} // namespace art
524
525#endif // ART_SRC_DEX_FILE_H_