Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "dexanalyze_experiments.h" |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 18 | |
| 19 | #include <stdint.h> |
| 20 | #include <inttypes.h> |
| 21 | #include <iostream> |
| 22 | #include <map> |
| 23 | #include <vector> |
| 24 | |
| 25 | #include "android-base/stringprintf.h" |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 26 | #include "dex/class_accessor-inl.h" |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 27 | #include "dex/class_iterator.h" |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 28 | #include "dex/code_item_accessors-inl.h" |
| 29 | #include "dex/dex_instruction-inl.h" |
| 30 | #include "dex/standard_dex_file.h" |
Mathieu Chartier | f275979 | 2018-05-18 13:16:54 -0700 | [diff] [blame] | 31 | #include "dex/utf-inl.h" |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 32 | |
| 33 | namespace art { |
| 34 | |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 35 | std::string Percent(uint64_t value, uint64_t max) { |
| 36 | if (max == 0) { |
| 37 | ++max; |
| 38 | } |
| 39 | return android::base::StringPrintf("%" PRId64 "(%.2f%%)", |
| 40 | value, |
| 41 | static_cast<double>(value * 100) / static_cast<double>(max)); |
| 42 | } |
| 43 | |
| 44 | static size_t PrefixLen(const std::string& a, const std::string& b) { |
| 45 | size_t len = 0; |
| 46 | for (; len < a.length() && len < b.length() && a[len] == b[len]; ++len) {} |
| 47 | return len; |
| 48 | } |
| 49 | |
| 50 | void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) { |
| 51 | std::vector<std::string> strings; |
| 52 | for (size_t i = 0; i < dex_file.NumStringIds(); ++i) { |
| 53 | uint32_t length = 0; |
Mathieu Chartier | f275979 | 2018-05-18 13:16:54 -0700 | [diff] [blame] | 54 | const char* data = dex_file.StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length); |
| 55 | // Analyze if the string has any UTF16 chars. |
| 56 | bool have_wide_char = false; |
| 57 | const char* ptr = data; |
| 58 | for (size_t j = 0; j < length; ++j) { |
| 59 | have_wide_char = have_wide_char || GetUtf16FromUtf8(&ptr) >= 0x100; |
| 60 | } |
| 61 | if (have_wide_char) { |
| 62 | wide_string_bytes_ += 2 * length; |
| 63 | } else { |
| 64 | ascii_string_bytes_ += length; |
| 65 | } |
| 66 | string_data_bytes_ += ptr - data; |
| 67 | |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 68 | strings.push_back(data); |
| 69 | } |
| 70 | // Note that the strings are probably already sorted. |
| 71 | std::sort(strings.begin(), strings.end()); |
| 72 | |
| 73 | // Tunable parameters. |
| 74 | static const size_t kMinPrefixLen = 3; |
| 75 | static const size_t kPrefixConstantCost = 5; |
| 76 | static const size_t kPrefixIndexCost = 2; |
| 77 | |
| 78 | // Calculate total shared prefix. |
| 79 | std::vector<size_t> shared_len; |
| 80 | std::set<std::string> prefixes; |
| 81 | for (size_t i = 0; i < strings.size(); ++i) { |
| 82 | size_t best_len = 0; |
| 83 | if (i > 0) { |
| 84 | best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1])); |
| 85 | } |
| 86 | if (i < strings.size() - 1) { |
| 87 | best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1])); |
| 88 | } |
| 89 | std::string prefix; |
| 90 | if (best_len >= kMinPrefixLen) { |
| 91 | prefix = strings[i].substr(0, best_len); |
| 92 | prefixes.insert(prefix); |
| 93 | total_prefix_savings_ += prefix.length(); |
| 94 | } |
| 95 | total_prefix_index_cost_ += kPrefixIndexCost; |
| 96 | } |
| 97 | total_num_prefixes_ += prefixes.size(); |
| 98 | for (const std::string& s : prefixes) { |
| 99 | // 4 bytes for an offset, one for length. |
| 100 | total_prefix_dict_ += s.length(); |
| 101 | total_prefix_table_ += kPrefixConstantCost; |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const { |
Mathieu Chartier | f275979 | 2018-05-18 13:16:54 -0700 | [diff] [blame] | 106 | os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n"; |
| 107 | os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n"; |
| 108 | os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n"; |
| 109 | |
| 110 | // Prefix based strings. |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 111 | os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n"; |
| 112 | os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n"; |
| 113 | os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n"; |
| 114 | os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n"; |
| 115 | int64_t net_savings = total_prefix_savings_; |
| 116 | net_savings -= total_prefix_dict_; |
| 117 | net_savings -= total_prefix_table_; |
| 118 | net_savings -= total_prefix_index_cost_; |
| 119 | os << "Prefix net savings " << Percent(net_savings, total_size) << "\n"; |
| 120 | os << "Prefix dictionary elements " << total_num_prefixes_ << "\n"; |
| 121 | } |
| 122 | |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 123 | void CountDexIndices::ProcessDexFile(const DexFile& dex_file) { |
| 124 | num_string_ids_ += dex_file.NumStringIds(); |
| 125 | num_method_ids_ += dex_file.NumMethodIds(); |
| 126 | num_field_ids_ += dex_file.NumFieldIds(); |
| 127 | num_type_ids_ += dex_file.NumTypeIds(); |
| 128 | num_class_defs_ += dex_file.NumClassDefs(); |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 129 | for (ClassAccessor accessor : dex_file.GetClasses()) { |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 130 | std::set<size_t> unique_method_ids; |
| 131 | std::set<size_t> unique_string_ids; |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 132 | accessor.VisitMethods([&](const ClassAccessor::Method& method) { |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 133 | dex_code_bytes_ += method.GetInstructions().InsnsSizeInBytes(); |
| 134 | for (const DexInstructionPcPair& inst : method.GetInstructions()) { |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 135 | switch (inst->Opcode()) { |
| 136 | case Instruction::CONST_STRING: { |
| 137 | const dex::StringIndex string_index(inst->VRegB_21c()); |
| 138 | unique_string_ids.insert(string_index.index_); |
| 139 | ++num_string_ids_from_code_; |
| 140 | break; |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 141 | } |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 142 | case Instruction::CONST_STRING_JUMBO: { |
| 143 | const dex::StringIndex string_index(inst->VRegB_31c()); |
| 144 | unique_string_ids.insert(string_index.index_); |
| 145 | ++num_string_ids_from_code_; |
| 146 | break; |
| 147 | } |
| 148 | // Invoke cases. |
| 149 | case Instruction::INVOKE_VIRTUAL: |
| 150 | case Instruction::INVOKE_VIRTUAL_RANGE: { |
| 151 | bool is_range = (inst->Opcode() == Instruction::INVOKE_VIRTUAL_RANGE); |
| 152 | uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 153 | if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetDescriptorIndex()) { |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 154 | ++same_class_virtual_; |
| 155 | } else { |
| 156 | ++other_class_virtual_; |
| 157 | unique_method_ids.insert(method_idx); |
| 158 | } |
| 159 | break; |
| 160 | } |
| 161 | case Instruction::INVOKE_DIRECT: |
| 162 | case Instruction::INVOKE_DIRECT_RANGE: { |
| 163 | bool is_range = (inst->Opcode() == Instruction::INVOKE_DIRECT_RANGE); |
| 164 | uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c(); |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 165 | if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetDescriptorIndex()) { |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 166 | ++same_class_direct_; |
| 167 | } else { |
| 168 | ++other_class_direct_; |
| 169 | unique_method_ids.insert(method_idx); |
| 170 | } |
| 171 | break; |
| 172 | } |
| 173 | case Instruction::INVOKE_STATIC: |
| 174 | case Instruction::INVOKE_STATIC_RANGE: { |
| 175 | bool is_range = (inst->Opcode() == Instruction::INVOKE_STATIC_RANGE); |
| 176 | uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c(); |
Mathieu Chartier | 05dc23e | 2018-05-22 11:56:14 -0700 | [diff] [blame^] | 177 | if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetDescriptorIndex()) { |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 178 | ++same_class_static_; |
| 179 | } else { |
| 180 | ++other_class_static_; |
| 181 | unique_method_ids.insert(method_idx); |
| 182 | } |
| 183 | break; |
| 184 | } |
| 185 | default: |
| 186 | break; |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 187 | } |
| 188 | } |
Mathieu Chartier | c2b4db6 | 2018-05-18 13:58:12 -0700 | [diff] [blame] | 189 | }); |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 190 | total_unique_method_idx_ += unique_method_ids.size(); |
| 191 | total_unique_string_ids_ += unique_string_ids.size(); |
| 192 | } |
| 193 | } |
| 194 | |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 195 | void CountDexIndices::Dump(std::ostream& os, uint64_t total_size) const { |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 196 | os << "Num string ids: " << num_string_ids_ << "\n"; |
| 197 | os << "Num method ids: " << num_method_ids_ << "\n"; |
| 198 | os << "Num field ids: " << num_field_ids_ << "\n"; |
| 199 | os << "Num type ids: " << num_type_ids_ << "\n"; |
| 200 | os << "Num class defs: " << num_class_defs_ << "\n"; |
| 201 | os << "Same class direct: " << same_class_direct_ << "\n"; |
| 202 | os << "Other class direct: " << other_class_direct_ << "\n"; |
| 203 | os << "Same class virtual: " << same_class_virtual_ << "\n"; |
| 204 | os << "Other class virtual: " << other_class_virtual_ << "\n"; |
| 205 | os << "Same class static: " << same_class_static_ << "\n"; |
| 206 | os << "Other class static: " << other_class_static_ << "\n"; |
| 207 | os << "Num strings accessed from code: " << num_string_ids_from_code_ << "\n"; |
| 208 | os << "Unique(per class) method ids accessed from code: " << total_unique_method_idx_ << "\n"; |
| 209 | os << "Unique(per class) string ids accessed from code: " << total_unique_string_ids_ << "\n"; |
| 210 | size_t same_class_total = same_class_direct_ + same_class_virtual_ + same_class_static_; |
| 211 | size_t other_class_total = other_class_direct_ + other_class_virtual_ + other_class_static_; |
| 212 | os << "Same class invoke: " << same_class_total << "\n"; |
| 213 | os << "Other class invoke: " << other_class_total << "\n"; |
| 214 | os << "Invokes from code: " << (same_class_total + other_class_total) << "\n"; |
Mathieu Chartier | 35ddc6f | 2018-05-09 11:34:07 -0700 | [diff] [blame] | 215 | os << "Total dex size: " << total_size << "\n"; |
Mathieu Chartier | ef2fa26 | 2018-04-12 15:14:07 -0700 | [diff] [blame] | 216 | } |
| 217 | |
| 218 | } // namespace art |
| 219 | |