Blame - tools/dexanalyze/dexanalyze_strings.cc - SHIFTPHONES/android_art

blob: 9f67ff431a30044745b6a77a225bc5c671199109 [file] [log] [blame]

Mathieu Chartier	0ddf626	2018-08-23 17:37:46 -0700	[diff] [blame^]	1	/*
				2	* Copyright (C) 2018 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#include "dexanalyze_strings.h"
				18
				19	#include <algorithm>
				20	#include <iomanip>
				21	#include <iostream>
				22
				23	#include "dex/class_accessor-inl.h"
				24	#include "dex/code_item_accessors-inl.h"
				25	#include "dex/dex_instruction-inl.h"
				26
				27	namespace art {
				28	namespace dexanalyze {
				29
				30	void AnalyzeStrings::ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
				31	std::set<std::string> unique_strings;
				32	for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
				33	for (size_t i = 0; i < dex_file->NumStringIds(); ++i) {
				34	uint32_t length = 0;
				35	const char* data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
				36	// Analyze if the string has any UTF16 chars.
				37	bool have_wide_char = false;
				38	const char* ptr = data;
				39	for (size_t j = 0; j < length; ++j) {
				40	have_wide_char = have_wide_char \|\| GetUtf16FromUtf8(&ptr) >= 0x100;
				41	}
				42	if (have_wide_char) {
				43	wide_string_bytes_ += 2 * length;
				44	} else {
				45	ascii_string_bytes_ += length;
				46	}
				47	string_data_bytes_ += ptr - data;
				48	unique_strings.insert(data);
				49	}
				50	}
				51	// Unique strings only since we want to exclude savings from multidex duplication.
				52	std::vector<std::string> strings(unique_strings.begin(), unique_strings.end());
				53	unique_strings.clear();
				54
				55	// Tunable parameters.
				56	static const size_t kMinPrefixLen = 1;
				57	static const size_t kMaxPrefixLen = 255;
				58	static const size_t kPrefixConstantCost = 4;
				59	static const size_t kPrefixIndexCost = 2;
				60
				61	// Calculate total shared prefix.
				62	std::vector<size_t> shared_len;
				63	prefixes_.clear();
				64	for (size_t i = 0; i < strings.size(); ++i) {
				65	size_t best_len = 0;
				66	if (i > 0) {
				67	best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
				68	}
				69	if (i < strings.size() - 1) {
				70	best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
				71	}
				72	best_len = std::min(best_len, kMaxPrefixLen);
				73	std::string prefix;
				74	if (best_len >= kMinPrefixLen) {
				75	prefix = strings[i].substr(0, best_len);
				76	++prefixes_[prefix];
				77	}
				78	total_prefix_index_cost_ += kPrefixIndexCost;
				79	}
				80	// Optimize the result by moving long prefixes to shorter ones if it causes savings.
				81	while (true) {
				82	bool have_savings = false;
				83	auto it = prefixes_.begin();
				84	std::vector<std::string> longest;
				85	for (const auto& pair : prefixes_) {
				86	longest.push_back(pair.first);
				87	}
				88	std::sort(longest.begin(), longest.end(), [](const std::string& a, const std::string& b) {
				89	return a.length() > b.length();
				90	});
				91	// Do longest first since this provides the best results.
				92	for (const std::string& s : longest) {
				93	it = prefixes_.find(s);
				94	CHECK(it != prefixes_.end());
				95	const std::string& prefix = it->first;
				96	int64_t best_savings = 0u;
				97	int64_t best_len = -1;
				98	for (int64_t len = prefix.length() - 1; len >= 0; --len) {
				99	auto found = prefixes_.find(prefix.substr(0, len));
				100	if (len != 0 && found == prefixes_.end()) {
				101	continue;
				102	}
				103	// Calculate savings from downgrading the prefix.
				104	int64_t savings = kPrefixConstantCost + prefix.length() -
				105	(prefix.length() - len) * it->second;
				106	if (savings > best_savings) {
				107	best_savings = savings;
				108	best_len = len;
				109	break;
				110	}
				111	}
				112	if (best_len != -1) {
				113	prefixes_[prefix.substr(0, best_len)] += it->second;
				114	it = prefixes_.erase(it);
				115	optimization_savings_ += best_savings;
				116	have_savings = true;
				117	} else {
				118	++it;
				119	}
				120	}
				121	if (!have_savings) {
				122	break;
				123	}
				124	}
				125	total_num_prefixes_ += prefixes_.size();
				126	for (const auto& pair : prefixes_) {
				127	// 4 bytes for an offset, one for length.
				128	total_prefix_dict_ += pair.first.length();
				129	total_prefix_table_ += kPrefixConstantCost;
				130	total_prefix_savings_ += pair.first.length() * pair.second;
				131	}
				132	}
				133
				134	void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
				135	os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
				136	os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
				137	os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";
				138
				139	// Prefix based strings.
				140	os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n";
				141	os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
				142	os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
				143	os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
				144	int64_t net_savings = total_prefix_savings_;
				145	net_savings -= total_prefix_dict_;
				146	net_savings -= total_prefix_table_;
				147	net_savings -= total_prefix_index_cost_;
				148	os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
				149	os << "Optimization savings " << Percent(optimization_savings_, total_size) << "\n";
				150	os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
				151	if (verbose_level_ >= VerboseLevel::kEverything) {
				152	std::vector<std::pair<std::string, size_t>> pairs(prefixes_.begin(), prefixes_.end());
				153	// Sort lexicographically.
				154	std::sort(pairs.begin(), pairs.end());
				155	for (const auto& pair : pairs) {
				156	os << pair.first << " : " << pair.second << "\n";
				157	}
				158	}
				159	}
				160
				161	} // namespace dexanalyze
				162	} // namespace art