Blame - tools/dexanalyze/dexanalyze_strings.cc - SHIFTPHONES/android_art

blob: dcadb59e35bb7b14df4c72e2179905f2889eb15d [file] [log] [blame]

Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2018 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#include "dexanalyze_strings.h"
				18
				19	#include <algorithm>
				20	#include <iomanip>
				21	#include <iostream>
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	22	#include <queue>
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	23
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	24	#include "base/time_utils.h"
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	25	#include "dex/class_accessor-inl.h"
				26	#include "dex/code_item_accessors-inl.h"
				27	#include "dex/dex_instruction-inl.h"
				28
				29	namespace art {
				30	namespace dexanalyze {
				31
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	32	// Tunable parameters.
				33	static const size_t kMinPrefixLen = 1;
				34	static const size_t kMaxPrefixLen = 255;
				35	static const size_t kPrefixConstantCost = 4;
				36	static const size_t kPrefixIndexCost = 2;
				37
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	38	class PrefixDictionary {
				39	public:
				40	// Add prefix data and return the offset to the start of the added data.
				41	size_t AddPrefixData(const uint8_t* data, size_t len) {
				42	const size_t offset = prefix_data_.size();
				43	prefix_data_.insert(prefix_data_.end(), data, data + len);
				44	return offset;
				45	}
				46
				47	static constexpr size_t kLengthBits = 8;
				48	static constexpr size_t kLengthMask = (1u << kLengthBits) - 1;
				49
				50	// Return the prefix offset and length.
				51	ALWAYS_INLINE void GetOffset(uint32_t prefix_index, uint32_t* offset, uint32_t* length) const {
				52	CHECK_LT(prefix_index, offsets_.size());
				53	const uint32_t data = offsets_[prefix_index];
				54	*length = data & kLengthMask;
				55	*offset = data >> kLengthBits;
				56	}
				57
				58	uint32_t AddOffset(uint32_t offset, uint32_t length) {
				59	CHECK_LE(length, kLengthMask);
				60	offsets_.push_back((offset << kLengthBits) \| length);
				61	return offsets_.size() - 1;
				62	}
				63
				64	public:
				65	std::vector<uint32_t> offsets_;
				66	std::vector<uint8_t> prefix_data_;
				67	};
				68
				69	class PrefixStrings {
				70	public:
				71	class Builder {
				72	public:
				73	explicit Builder(PrefixStrings* output) : output_(output) {}
				74	void Build(const std::vector<std::string>& strings);
				75
				76	private:
				77	PrefixStrings* const output_;
				78	};
				79
				80	// Return the string index that was added.
				81	size_t AddString(uint16_t prefix, const std::string& str) {
				82	const size_t string_offset = chars_.size();
				83	chars_.push_back(static_cast<uint8_t>(prefix >> 8));
				84	chars_.push_back(static_cast<uint8_t>(prefix >> 0));
				85	EncodeUnsignedLeb128(&chars_, str.length());
				86	const uint8_t* ptr = reinterpret_cast<const uint8_t*>(&str[0]);
				87	chars_.insert(chars_.end(), ptr, ptr + str.length());
				88	string_offsets_.push_back(string_offset);
				89	return string_offsets_.size() - 1;
				90	}
				91
				92	std::string GetString(uint32_t string_idx) const {
				93	const size_t offset = string_offsets_[string_idx];
				94	const uint8_t* suffix_data = &chars_[offset];
				95	uint16_t prefix_idx = (static_cast<uint16_t>(suffix_data[0]) << 8) +
				96	suffix_data[1];
				97	suffix_data += 2;
				98	uint32_t prefix_offset;
				99	uint32_t prefix_len;
				100	dictionary_.GetOffset(prefix_idx, &prefix_offset, &prefix_len);
				101	const uint8_t* prefix_data = &dictionary_.prefix_data_[prefix_offset];
				102	std::string ret(prefix_data, prefix_data + prefix_len);
				103	uint32_t suffix_len = DecodeUnsignedLeb128(&suffix_data);
				104	ret.insert(ret.end(), suffix_data, suffix_data + suffix_len);
				105	return ret;
				106	}
				107
				108	ALWAYS_INLINE bool Equal(uint32_t string_idx, const uint8_t* data, size_t len) const {
				109	const size_t offset = string_offsets_[string_idx];
				110	const uint8_t* suffix_data = &chars_[offset];
				111	uint16_t prefix_idx = (static_cast<uint16_t>(suffix_data[0]) << 8) +
				112	suffix_data[1];
				113	suffix_data += 2;
				114	uint32_t prefix_offset;
				115	uint32_t prefix_len;
				116	dictionary_.GetOffset(prefix_idx, &prefix_offset, &prefix_len);
				117	uint32_t suffix_len = DecodeUnsignedLeb128(&suffix_data);
				118	if (prefix_len + suffix_len != len) {
				119	return false;
				120	}
				121	const uint8_t* prefix_data = &dictionary_.prefix_data_[prefix_offset];
Mathieu Chartier	e5afbf3	2018-09-12 17:51:54 -0700	[diff] [blame]	122	if ((true)) {
				123	return memcmp(prefix_data, data, prefix_len) == 0u &&
				124	memcmp(suffix_data, data + prefix_len, len - prefix_len) == 0u;
				125	} else {
				126	len -= prefix_len;
				127	while (prefix_len != 0u) {
				128	if (prefix_data++ != data++) {
				129	return false;
				130	}
				131	--prefix_len;
				132	}
				133	while (len != 0u) {
				134	if (suffix_data++ != data++) {
				135	return false;
				136	}
				137	--len;
				138	}
				139	return true;
				140	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	141	}
				142
				143	public:
				144	PrefixDictionary dictionary_;
				145	std::vector<uint8_t> chars_;
				146	std::vector<uint32_t> string_offsets_;
				147	};
				148
				149	// Normal non prefix strings.
				150	class NormalStrings {
				151	public:
				152	// Return the string index that was added.
				153	size_t AddString(const std::string& str) {
				154	const size_t string_offset = chars_.size();
				155	EncodeUnsignedLeb128(&chars_, str.length());
				156	const uint8_t* ptr = reinterpret_cast<const uint8_t*>(&str[0]);
				157	chars_.insert(chars_.end(), ptr, ptr + str.length());
				158	string_offsets_.push_back(string_offset);
				159	return string_offsets_.size() - 1;
				160	}
				161
				162	std::string GetString(uint32_t string_idx) const {
				163	const size_t offset = string_offsets_[string_idx];
				164	const uint8_t* data = &chars_[offset];
				165	uint32_t len = DecodeUnsignedLeb128(&data);
				166	return std::string(data, data + len);
				167	}
				168
				169	ALWAYS_INLINE bool Equal(uint32_t string_idx, const uint8_t* data, size_t len) const {
				170	const size_t offset = string_offsets_[string_idx];
				171	const uint8_t* str_data = &chars_[offset];
				172	uint32_t str_len = DecodeUnsignedLeb128(&str_data);
				173	if (str_len != len) {
				174	return false;
				175	}
				176	return memcmp(data, str_data, len) == 0u;
				177	}
				178
				179	public:
				180	std::vector<uint8_t> chars_;
				181	std::vector<uint32_t> string_offsets_;
				182	};
				183
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	184	// Node value = (distance from root) * (occurrences - 1).
				185	class MatchTrie {
				186	public:
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	187	MatchTrie* Add(const std::string& str) {
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	188	MatchTrie* node = this;
				189	size_t depth = 0u;
				190	for (uint8_t c : str) {
				191	++depth;
				192	if (node->nodes_[c] == nullptr) {
				193	MatchTrie* new_node = new MatchTrie();
				194	node->nodes_[c].reset(new_node);
				195	new_node->parent_ = node;
				196	new_node->depth_ = depth;
				197	new_node->incoming_ = c;
				198	node = new_node;
				199	} else {
				200	node = node->nodes_[c].get();
				201	}
				202	++node->count_;
				203	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	204	return node;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	205	}
				206
				207	// Returns the length of the longest prefix and if it's a leaf node.
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	208	MatchTrie* LongestPrefix(const std::string& str) {
				209	MatchTrie* node = this;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	210	for (uint8_t c : str) {
				211	if (node->nodes_[c] == nullptr) {
				212	break;
				213	}
				214	node = node->nodes_[c].get();
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	215	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	216	return node;
				217	}
				218
				219	bool IsLeaf() const {
				220	for (const std::unique_ptr<MatchTrie>& cur_node : nodes_) {
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	221	if (cur_node != nullptr) {
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	222	return false;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	223	}
				224	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	225	return true;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	226	}
				227
				228	int32_t Savings() const {
				229	int32_t cost = kPrefixConstantCost;
				230	int32_t first_used = 0u;
				231	if (chosen_suffix_count_ == 0u) {
				232	cost += depth_;
				233	}
				234	uint32_t extra_savings = 0u;
				235	for (MatchTrie* cur = parent_; cur != nullptr; cur = cur->parent_) {
				236	if (cur->chosen_) {
				237	first_used = cur->depth_;
				238	if (cur->chosen_suffix_count_ == 0u) {
				239	// First suffix for the chosen parent, remove the cost of the dictionary entry.
				240	extra_savings += first_used;
				241	}
				242	break;
				243	}
				244	}
				245	return count_ * (depth_ - first_used) - cost + extra_savings;
				246	}
				247
				248	template <typename T, typename... Args, template <typename...> class Queue>
				249	T PopRealTop(Queue<T, Args...>& queue) {
				250	auto pair = queue.top();
				251	queue.pop();
				252	// Keep updating values until one sticks.
				253	while (pair.second->Savings() != pair.first) {
				254	pair.first = pair.second->Savings();
				255	queue.push(pair);
				256	pair = queue.top();
				257	queue.pop();
				258	}
				259	return pair;
				260	}
				261
				262	std::vector<std::string> ExtractPrefixes(size_t max) {
				263	std::vector<std::string> ret;
				264	// Make priority queue and adaptively update it. Each node value is the savings from picking
				265	// it. Insert all of the interesting nodes in the queue (children != 1).
				266	std::priority_queue<std::pair<int32_t, MatchTrie*>> queue;
				267	// Add all of the nodes to the queue.
				268	std::vector<MatchTrie*> work(1, this);
				269	while (!work.empty()) {
				270	MatchTrie* elem = work.back();
				271	work.pop_back();
				272	size_t num_childs = 0u;
				273	for (const std::unique_ptr<MatchTrie>& child : elem->nodes_) {
				274	if (child != nullptr) {
				275	work.push_back(child.get());
				276	++num_childs;
				277	}
				278	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	279	if (num_childs > 1u \|\| elem->value_ != 0u) {
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	280	queue.emplace(elem->Savings(), elem);
				281	}
				282	}
				283	std::priority_queue<std::pair<int32_t, MatchTrie*>> prefixes;
				284	// The savings can only ever go down for a given node, never up.
				285	while (max != 0u && !queue.empty()) {
				286	std::pair<int32_t, MatchTrie*> pair = PopRealTop(queue);
				287	if (pair.second != this && pair.first > 0) {
				288	// Pick this node.
				289	uint32_t count = pair.second->count_;
				290	pair.second->chosen_ = true;
				291	for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
				292	if (cur->chosen_) {
				293	break;
				294	}
				295	cur->count_ -= count;
				296	}
				297	for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
				298	++cur->chosen_suffix_count_;
				299	}
				300	prefixes.emplace(pair.first, pair.second);
				301	--max;
				302	} else {
				303	// Negative or no EV, just delete the node.
				304	}
				305	}
				306	while (!prefixes.empty()) {
				307	std::pair<int32_t, MatchTrie*> pair = PopRealTop(prefixes);
				308	if (pair.first <= 0) {
				309	continue;
				310	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	311	ret.push_back(pair.second->GetString());
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	312	}
				313	return ret;
				314	}
				315
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	316	std::string GetString() const {
				317	std::vector<uint8_t> chars;
				318	for (const MatchTrie* cur = this; cur->parent_ != nullptr; cur = cur->parent_) {
				319	chars.push_back(cur->incoming_);
				320	}
				321	return std::string(chars.rbegin(), chars.rend());
				322	}
				323
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	324	std::unique_ptr<MatchTrie> nodes_[256];
				325	MatchTrie* parent_ = nullptr;
				326	uint32_t count_ = 0u;
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	327	uint32_t depth_ = 0u;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	328	int32_t savings_ = 0u;
				329	uint8_t incoming_ = 0u;
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	330	// Value of the current node, non zero if the node is chosen.
				331	uint32_t value_ = 0u;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	332	// If the current node is chosen to be a used prefix.
				333	bool chosen_ = false;
				334	// If the current node is a prefix of a longer chosen prefix.
				335	uint32_t chosen_suffix_count_ = 0u;
				336	};
				337
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	338	void PrefixStrings::Builder::Build(const std::vector<std::string>& strings) {
				339	std::unique_ptr<MatchTrie> prefixe_trie(new MatchTrie());
				340	for (size_t i = 0; i < strings.size(); ++i) {
				341	size_t len = 0u;
				342	if (i > 0u) {
				343	CHECK_GT(strings[i], strings[i - 1]);
				344	len = std::max(len, PrefixLen(strings[i], strings[i - 1]));
				345	}
				346	if (i < strings.size() - 1) {
				347	len = std::max(len, PrefixLen(strings[i], strings[i + 1]));
				348	}
				349	len = std::min(len, kMaxPrefixLen);
				350	if (len >= kMinPrefixLen) {
				351	prefixe_trie->Add(strings[i].substr(0, len))->value_ = 1u;
				352	}
				353	}
				354
				355	// Build prefixes.
				356	{
				357	static constexpr size_t kPrefixBits = 15;
				358	std::vector<std::string> prefixes(prefixe_trie->ExtractPrefixes(1 << kPrefixBits));
				359	// Add longest prefixes first so that subprefixes can share data.
				360	std::sort(prefixes.begin(), prefixes.end(), [](const std::string& a, const std::string& b) {
				361	return a.length() > b.length();
				362	});
				363	prefixe_trie.reset();
				364	prefixe_trie.reset(new MatchTrie());
				365	uint32_t prefix_idx = 0u;
				366	CHECK_EQ(output_->dictionary_.AddOffset(0u, 0u), prefix_idx++);
				367	for (const std::string& str : prefixes) {
				368	uint32_t prefix_offset = 0u;
				369	MatchTrie* node = prefixe_trie->LongestPrefix(str);
				370	if (node != nullptr && node->depth_ == str.length() && node->value_ != 0u) {
				371	CHECK_EQ(node->GetString(), str);
				372	uint32_t existing_len = 0u;
				373	output_->dictionary_.GetOffset(node->value_, &prefix_offset, &existing_len);
				374	// Make sure to register the current node.
				375	prefixe_trie->Add(str)->value_ = prefix_idx;
				376	} else {
				377	auto add_str = [&](const std::string& s) {
				378	node = prefixe_trie->Add(s);
				379	node->value_ = prefix_idx;
				380	while (node != nullptr) {
				381	node->value_ = prefix_idx;
				382	node = node->parent_;
				383	}
				384	};
				385	static constexpr size_t kNumSubstrings = 1u;
				386	// Increasing kNumSubstrings provides savings since it enables common substrings and not
				387	// only prefixes to share data. The problem is that it's slow.
				388	for (size_t i = 0; i < std::min(str.length(), kNumSubstrings); ++i) {
				389	add_str(str.substr(i));
				390	}
				391	prefix_offset = output_->dictionary_.AddPrefixData(
				392	reinterpret_cast<const uint8_t*>(&str[0]),
				393	str.length());
				394	}
				395	// TODO: Validiate the prefix offset.
				396	CHECK_EQ(output_->dictionary_.AddOffset(prefix_offset, str.length()), prefix_idx);
				397	++prefix_idx;
				398	}
				399	}
				400
				401	// Add strings to the dictionary.
				402	for (const std::string& str : strings) {
				403	MatchTrie* node = prefixe_trie->LongestPrefix(str);
				404	uint32_t prefix_idx = 0u;
				405	uint32_t best_length = 0u;
				406	while (node != nullptr) {
				407	uint32_t offset = 0u;
				408	uint32_t length = 0u;
				409	output_->dictionary_.GetOffset(node->value_, &offset, &length);
				410	if (node->depth_ == length) {
				411	prefix_idx = node->value_;
				412	best_length = node->depth_;
				413	break;
				414	// Actually the prefix we want.
				415	}
				416	node = node->parent_;
				417	}
				418	output_->AddString(prefix_idx, str.substr(best_length));
				419	}
				420	}
				421
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	422	void AnalyzeStrings::ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
				423	std::set<std::string> unique_strings;
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	424	// Accumulate the strings.
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	425	for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
				426	for (size_t i = 0; i < dex_file->NumStringIds(); ++i) {
				427	uint32_t length = 0;
				428	const char* data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
				429	// Analyze if the string has any UTF16 chars.
				430	bool have_wide_char = false;
				431	const char* ptr = data;
				432	for (size_t j = 0; j < length; ++j) {
				433	have_wide_char = have_wide_char \|\| GetUtf16FromUtf8(&ptr) >= 0x100;
				434	}
				435	if (have_wide_char) {
				436	wide_string_bytes_ += 2 * length;
				437	} else {
				438	ascii_string_bytes_ += length;
				439	}
				440	string_data_bytes_ += ptr - data;
				441	unique_strings.insert(data);
				442	}
				443	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	444	// Unique strings only since we want to exclude savings from multi-dex duplication.
				445	ProcessStrings(std::vector<std::string>(unique_strings.begin(), unique_strings.end()));
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	446	}
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	447
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	448	void AnalyzeStrings::ProcessStrings(const std::vector<std::string>& strings) {
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	449	// Calculate total shared prefix.
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	450	size_t prefix_index_cost_ = 0u;
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	451	for (size_t i = 0; i < strings.size(); ++i) {
				452	size_t best_len = 0;
				453	if (i > 0) {
				454	best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
				455	}
				456	if (i < strings.size() - 1) {
				457	best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
				458	}
				459	best_len = std::min(best_len, kMaxPrefixLen);
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	460	if (best_len >= kMinPrefixLen) {
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	461	total_shared_prefix_bytes_ += best_len;
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	462	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	463	prefix_index_cost_ += kPrefixIndexCost;
				464	if (strings[i].length() < 64) {
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	465	++short_strings_;
				466	} else {
				467	++long_strings_;
				468	}
				469	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	470	total_prefix_index_cost_ += prefix_index_cost_;
				471
				472	PrefixStrings prefix_strings;
				473	{
				474	PrefixStrings::Builder prefix_builder(&prefix_strings);
				475	prefix_builder.Build(strings);
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	476	}
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	477	Benchmark(prefix_strings, strings, &prefix_timings_);
				478	const size_t num_prefixes = prefix_strings.dictionary_.offsets_.size();
				479	total_num_prefixes_ += num_prefixes;
				480	total_prefix_table_ += num_prefixes * sizeof(prefix_strings.dictionary_.offsets_[0]);
				481	total_prefix_dict_ += prefix_strings.dictionary_.prefix_data_.size();
				482
				483	{
				484	NormalStrings normal_strings;
				485	for (const std::string& s : strings) {
				486	normal_strings.AddString(s);
				487	}
				488	const uint64_t unique_string_data_bytes = normal_strings.chars_.size();
				489	total_unique_string_data_bytes_ += unique_string_data_bytes;
				490	total_prefix_savings_ += unique_string_data_bytes - prefix_strings.chars_.size() +
				491	prefix_index_cost_;
				492	Benchmark(normal_strings, strings, &normal_timings_);
				493	}
				494	}
				495
				496	template <typename Strings>
				497	void AnalyzeStrings::Benchmark(const Strings& strings,
				498	const std::vector<std::string>& reference,
				499	StringTimings* timings) {
				500	const size_t kIterations = 100;
				501	timings->num_comparisons_ += reference.size() * kIterations;
				502
				503	uint64_t start = NanoTime();
				504	for (size_t j = 0; j < kIterations; ++j) {
				505	for (size_t i = 0; i < reference.size(); ++i) {
				506	CHECK(strings.Equal(
				507	i,
				508	reinterpret_cast<const uint8_t*>(&reference[i][0]),
				509	reference[i].length()))
				510	<< i << ": " << strings.GetString(i) << " vs " << reference[i];
				511	}
				512	}
				513	timings->time_equal_comparisons_ += NanoTime() - start;
				514
				515	start = NanoTime();
				516	for (size_t j = 0; j < kIterations; ++j) {
				517	size_t count = 0u;
				518	for (size_t i = 0; i < reference.size(); ++i) {
				519	count += strings.Equal(
				520	reference.size() - 1 - i,
				521	reinterpret_cast<const uint8_t*>(&reference[i][0]),
				522	reference[i].length());
				523	}
				524	CHECK_LT(count, 2u);
				525	}
				526	timings->time_non_equal_comparisons_ += NanoTime() - start;
				527	}
				528
				529	template void AnalyzeStrings::Benchmark(const PrefixStrings&,
				530	const std::vector<std::string>&,
				531	StringTimings* timings);
				532	template void AnalyzeStrings::Benchmark(const NormalStrings&,
				533	const std::vector<std::string>&,
				534	StringTimings* timings);
				535
				536	void StringTimings::Dump(std::ostream& os) const {
				537	const double comparisons = static_cast<double>(num_comparisons_);
				538	os << "Compare equal " << static_cast<double>(time_equal_comparisons_) / comparisons << "\n";
				539	os << "Compare not equal " << static_cast<double>(time_non_equal_comparisons_) / comparisons << "\n";
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	540	}
				541
				542	void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
				543	os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	544	os << "Total unique string data bytes "
				545	<< Percent(total_unique_string_data_bytes_, total_size) << "\n";
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	546	os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
				547	os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";
				548
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	549	os << "Prefix string timings\n";
				550	prefix_timings_.Dump(os);
				551	os << "Normal string timings\n";
				552	normal_timings_.Dump(os);
				553
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	554	// Prefix based strings.
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	555	os << "Total shared prefix bytes " << Percent(total_shared_prefix_bytes_, total_size) << "\n";
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	556	os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
				557	os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
				558	os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	559	int64_t net_savings = total_prefix_savings_;
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	560	net_savings -= total_prefix_dict_;
				561	net_savings -= total_prefix_table_;
				562	net_savings -= total_prefix_index_cost_;
				563	os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	564	os << "Prefix base savings " << Percent(total_prefix_savings_, total_size) << "\n";
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	565	os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
Mathieu Chartier	49ca869	2018-08-24 09:34:49 -0700	[diff] [blame]	566	os << "Strings using prefix "
				567	<< Percent(strings_used_prefixed_, total_prefix_index_cost_ / kPrefixIndexCost) << "\n";
				568	os << "Short strings " << Percent(short_strings_, short_strings_ + long_strings_) << "\n";
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	569	if (verbose_level_ >= VerboseLevel::kEverything) {
Mathieu Chartier	5eae4c5	2018-08-30 15:52:15 -0700	[diff] [blame]	570	std::vector<std::pair<std::string, size_t>> pairs; // (prefixes_.begin(), prefixes_.end());
Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame]	571	// Sort lexicographically.
				572	std::sort(pairs.begin(), pairs.end());
				573	for (const auto& pair : pairs) {
				574	os << pair.first << " : " << pair.second << "\n";
				575	}
				576	}
				577	}
				578
				579	} // namespace dexanalyze
				580	} // namespace art