Add ASCII vs UTF-16 string data analysis
Measure how many strings are fully ASCII characters or a UTF-16 mix.
Bug: 77721545
Test: test-art-host-gtest-dexanalyze_test
Change-Id: Ic59613596c1542c693bcf49cee379310414c44cd
diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc
index bfeb4b9..adc5154 100644
--- a/tools/dexanalyze/dexanalyze_experiments.cc
+++ b/tools/dexanalyze/dexanalyze_experiments.cc
@@ -26,6 +26,7 @@
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_instruction-inl.h"
#include "dex/standard_dex_file.h"
+#include "dex/utf-inl.h"
namespace art {
@@ -48,8 +49,20 @@
std::vector<std::string> strings;
for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
uint32_t length = 0;
- const char* data =
- dex_file.GetStringDataAndUtf16Length(dex_file.GetStringId(dex::StringIndex(i)), &length);
+ const char* data = dex_file.StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
+ // Analyze if the string has any UTF16 chars.
+ bool have_wide_char = false;
+ const char* ptr = data;
+ for (size_t j = 0; j < length; ++j) {
+ have_wide_char = have_wide_char || GetUtf16FromUtf8(&ptr) >= 0x100;
+ }
+ if (have_wide_char) {
+ wide_string_bytes_ += 2 * length;
+ } else {
+ ascii_string_bytes_ += length;
+ }
+ string_data_bytes_ += ptr - data;
+
strings.push_back(data);
}
// Note that the strings are probably already sorted.
@@ -88,6 +101,11 @@
}
void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
+ os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
+ os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
+ os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";
+
+ // Prefix based strings.
os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n";
os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";