Add experiment for analyzing debug info
Analyze entropy of debug info to estimates a close upper bound
savings from huffman encoding debug infos.
Also measure how many debug info bytes are dedupable if you exclude
the line number and parameter names.
Sample output:
Debug info bytes 96101012(5.70%)
DBG_END_SEQUENCE: 6401069(0.38%)
DBG_ADVANCE_PC: 3709064(0.22%)
DBG_ADVANCE_LINE: 8620724(0.51%)
DBG_START_LOCAL: 5244232(0.31%)
DBG_START_LOCAL_EXTENDED: 1763845(0.10%)
DBG_END_LOCAL: 1216044(0.07%)
DBG_RESTART_LOCAL: 565412(0.03%)
DBG_SET_PROLOGUE bytes 5768714(0.34%)
DBG_SET_FILE bytes 0(0.00%)
special: 36310220(2.15%)
Debug info entropy 69199724(4.10%)
Debug info opcode bytes 55613021(3.30%)
Debug info opcode entropy 34792401(2.06%)
Debug info non header bytes 69599324(4.13%)
Debug info deduped non header bytes 52475493(3.11%)
Bug: 77721545
Test: test-art-host
Change-Id: I031322e3b79a1572fcbb9e513ded9708e3b48354
diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc
index 7006370..1a3b89c 100644
--- a/tools/dexanalyze/dexanalyze_experiments.cc
+++ b/tools/dexanalyze/dexanalyze_experiments.cc
@@ -75,6 +75,128 @@
return len;
}
+void AnalyzeDebugInfo::ProcessDexFile(const DexFile& dex_file) {
+ std::set<const uint8_t*> seen;
+ std::vector<size_t> counts(256, 0u);
+ std::vector<size_t> opcode_counts(256, 0u);
+ std::set<std::vector<uint8_t>> unique_non_header;
+ for (ClassAccessor accessor : dex_file.GetClasses()) {
+ for (const ClassAccessor::Method& method : accessor.GetMethods()) {
+ CodeItemDebugInfoAccessor code_item(dex_file, method.GetCodeItem(), method.GetIndex());
+ const uint8_t* debug_info = dex_file.GetDebugInfoStream(code_item.DebugInfoOffset());
+ if (debug_info != nullptr && seen.insert(debug_info).second) {
+ const uint8_t* stream = debug_info;
+ DecodeUnsignedLeb128(&stream); // line_start
+ uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+ for (uint32_t i = 0; i < parameters_size; ++i) {
+ DecodeUnsignedLeb128P1(&stream); // Parameter name.
+ }
+ bool done = false;
+ const uint8_t* after_header_start = stream;
+ while (!done) {
+ const uint8_t* const op_start = stream;
+ uint8_t opcode = *stream++;
+ ++opcode_counts[opcode];
+ ++total_opcode_bytes_;
+ switch (opcode) {
+ case DexFile::DBG_END_SEQUENCE:
+ ++total_end_seq_bytes_;
+ done = true;
+ break;
+ case DexFile::DBG_ADVANCE_PC:
+ DecodeUnsignedLeb128(&stream); // addr_diff
+ total_advance_pc_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_ADVANCE_LINE:
+ DecodeSignedLeb128(&stream); // line_diff
+ total_advance_line_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_START_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ DecodeUnsignedLeb128P1(&stream); // type_idx
+ total_start_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_START_LOCAL_EXTENDED:
+ DecodeUnsignedLeb128(&stream); // register_num
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ DecodeUnsignedLeb128P1(&stream); // type_idx
+ DecodeUnsignedLeb128P1(&stream); // sig_idx
+ total_start_local_extended_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_END_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ total_end_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_RESTART_LOCAL:
+ DecodeUnsignedLeb128(&stream); // register_num
+ total_restart_local_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_SET_PROLOGUE_END:
+ case DexFile::DBG_SET_EPILOGUE_BEGIN:
+ total_epilogue_bytes_ += stream - op_start;
+ break;
+ case DexFile::DBG_SET_FILE: {
+ DecodeUnsignedLeb128P1(&stream); // name_idx
+ total_set_file_bytes_ += stream - op_start;
+ break;
+ }
+ default: {
+ total_other_bytes_ += stream - op_start;
+ break;
+ }
+ }
+ }
+ const size_t bytes = stream - debug_info;
+ total_bytes_ += bytes;
+ total_non_header_bytes_ += stream - after_header_start;
+ if (unique_non_header.insert(std::vector<uint8_t>(after_header_start, stream)).second) {
+ total_unique_non_header_bytes_ += stream - after_header_start;
+ }
+ for (size_t i = 0; i < bytes; ++i) {
+ ++counts[debug_info[i]];
+ }
+ }
+ }
+ }
+ auto calc_entropy = [](std::vector<size_t> data) {
+ size_t total = std::accumulate(data.begin(), data.end(), 0u);
+ double avg_entropy = 0.0;
+ for (size_t c : data) {
+ if (c > 0) {
+ double ratio = static_cast<double>(c) / static_cast<double>(total);
+ avg_entropy -= ratio * log(ratio) / log(256.0);
+ }
+ }
+ return avg_entropy * total;
+ };
+ total_entropy_ += calc_entropy(counts);
+ total_opcode_entropy_ += calc_entropy(opcode_counts);
+}
+
+void AnalyzeDebugInfo::Dump(std::ostream& os, uint64_t total_size) const {
+ os << "Debug info bytes " << Percent(total_bytes_, total_size) << "\n";
+
+ os << " DBG_END_SEQUENCE: " << Percent(total_end_seq_bytes_, total_size) << "\n";
+ os << " DBG_ADVANCE_PC: " << Percent(total_advance_pc_bytes_, total_size) << "\n";
+ os << " DBG_ADVANCE_LINE: " << Percent(total_advance_line_bytes_, total_size) << "\n";
+ os << " DBG_START_LOCAL: " << Percent(total_start_local_bytes_, total_size) << "\n";
+ os << " DBG_START_LOCAL_EXTENDED: "
+ << Percent(total_start_local_extended_bytes_, total_size) << "\n";
+ os << " DBG_END_LOCAL: " << Percent(total_end_local_bytes_, total_size) << "\n";
+ os << " DBG_RESTART_LOCAL: " << Percent(total_restart_local_bytes_, total_size) << "\n";
+ os << " DBG_SET_PROLOGUE bytes " << Percent(total_epilogue_bytes_, total_size) << "\n";
+ os << " DBG_SET_FILE bytes " << Percent(total_set_file_bytes_, total_size) << "\n";
+ os << " special: "
+ << Percent(total_other_bytes_, total_size) << "\n";
+ os << "Debug info entropy " << Percent(total_entropy_, total_size) << "\n";
+ os << "Debug info opcode bytes " << Percent(total_opcode_bytes_, total_size) << "\n";
+ os << "Debug info opcode entropy " << Percent(total_opcode_entropy_, total_size) << "\n";
+ os << "Debug info non header bytes " << Percent(total_non_header_bytes_, total_size) << "\n";
+ os << "Debug info deduped non header bytes "
+ << Percent(total_unique_non_header_bytes_, total_size) << "\n";
+}
+
void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) {
std::vector<std::string> strings;
for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {