tools/runtime_memusage/symbol_trace_info.py - SHIFTPHONES/android_art - Gitiles

 #!/usr/bin/env python
 #
 # Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """Outputs quantitative information about Address Sanitizer traces."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 from collections import Counter
 from datetime import datetime
 import argparse
 import bisect
 import os
 import sys


 def find_match(list_substrings, big_string):
     """Returns the category a trace belongs to by searching substrings."""
     for ind, substr in enumerate(list_substrings):
         if big_string.find(substr) != -1:
             return ind
     return list_substrings.index("Uncategorized")


 def absolute_to_relative(plot_list, dex_start_list, cat_list):
     """Address changed to Dex File offset and shifting time to 0 min in ms."""
     time_format_str = "%H:%M:%S.%f"
     first_access_time = datetime.strptime(plot_list[0][0],
                                           time_format_str)
     for ind, elem in enumerate(plot_list):
         elem_date_time = datetime.strptime(elem[0], time_format_str)
         # Shift time values so that first access is at time 0 milliseconds
         elem[0] = int((elem_date_time - first_access_time).total_seconds() *
                       1000)
         address_access = int(elem[1], 16)
         # For each poisoned address, find highest Dex File starting address less
         # than address_access
         dex_file_start = dex_start_list[bisect.bisect(dex_start_list,
                                                       address_access) - 1
                                         ]
         elem.insert(1, address_access - dex_file_start)
         # Category that a data point belongs to
         elem.insert(2, cat_list[ind])


 def print_category_info(cat_split, outname, out_dir_name, title):
     """Prints information of category and puts related traces in a files."""
     trace_counts_dict = Counter(cat_split)
     trace_counts_list_ordered = trace_counts_dict.most_common()
     print(53 * "-")
     print(title)
     print("\tNumber of distinct traces: " +
           str(len(trace_counts_list_ordered)))
     print("\tSum of trace counts: " +
           str(sum([trace[1] for trace in trace_counts_list_ordered])))
     print("\n\tCount: How many traces appeared with count\n\t")
     print(Counter([trace[1] for trace in trace_counts_list_ordered]))
     with open(os.path.join(out_dir_name, outname), "w") as output_file:
         for trace in trace_counts_list_ordered:
             output_file.write("\n\nNumber of times appeared: " +
                               str(trace[1]) +
                               "\n")
             output_file.write(trace[0].strip())


 def print_categories(categories, symbol_file_split, out_dir_name):
     """Prints details of all categories."""
     # Info of traces containing a call to current category
     for cat_num, cat_name in enumerate(categories[1:]):
         print("\nCategory #%d" % (cat_num + 1))
         cat_split = [trace for trace in symbol_file_split
                      if cat_name in trace]
         cat_file_name = cat_name.lower() + "cat_output"
         print_category_info(cat_split, cat_file_name, out_dir_name,
                             "Traces containing: " + cat_name)
         noncat_split = [trace for trace in symbol_file_split
                         if cat_name not in trace]
         print_category_info(noncat_split, "non" + cat_file_name,
                             out_dir_name,
                             "Traces not containing: " +
                             cat_name)

     # All traces (including uncategorized) together
     print_category_info(symbol_file_split, "allcat_output",
                         out_dir_name,
                         "All traces together:")
     # Traces containing none of keywords
     # Only used if categories are passed in
     if len(categories) > 1:
         noncat_split = [trace for trace in symbol_file_split if
                         all(cat_name not in trace
                             for cat_name in categories)]
         print_category_info(noncat_split, "noncat_output",
                             out_dir_name,
                             "Uncategorized calls")


 def is_directory(path_name):
     """Checks if a path is an actual directory."""
     if not os.path.isdir(path_name):
         dir_error = "%s is not a directory" % (path_name)
         raise argparse.ArgumentTypeError(dir_error)
     return path_name


 def parse_args(argv):
     """Parses arguments passed in."""
     parser = argparse.ArgumentParser()
     parser.add_argument('-d', action='store',
                         default="", dest="out_dir_name", type=is_directory,
                         help='Output Directory')
     parser.add_argument('sanitizer_trace', action='store',
                         type=argparse.FileType('r'),
                         help='File containing sanitizer traces filtered by '
                              'prune_sanitizer_output.py')
     parser.add_argument('symbol_trace', action='store',
                         type=argparse.FileType('r'),
                         help='File containing symbolized traces that match '
                              'sanitizer_trace')
     parser.add_argument('dex_starts', action='store',
                         type=argparse.FileType('r'),
                         help='File containing starting addresses of Dex Files')
     parser.add_argument('categories', action='store', nargs='*',
                         help='Keywords expected to show in large amounts of'
                              ' symbolized traces')

     return parser.parse_args(argv)


 def read_data(parsed_argv):
     """Reads data from filepath arguments and parses them into lists."""
     # Using a dictionary to establish relation between lists added
     data_lists = {}
     categories = parsed_argv.categories
     # Makes sure each trace maps to some category
     categories.insert(0, "Uncategorized")

     logcat_file_data = parsed_argv.sanitizer_trace.readlines()
     parsed_argv.sanitizer_trace.close()

     symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")[
         1:]
     parsed_argv.symbol_trace.close()

     dex_start_file_data = parsed_argv.dex_starts.readlines()
     parsed_argv.dex_starts.close()

     # Each element is a tuple of time and address accessed
     data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
                                 if elem[0] in (1, 11)
                                 ]
                                for line in logcat_file_data
                                if "use-after-poison" in line
                                ]
     # Contains a mapping between traces and the category they belong to
     # based on arguments
     data_lists["cat_list"] = [categories[find_match(categories, trace)]
                               for trace in symbol_file_split]

     # Contains a list of starting address of all dex files to calculate dex
     # offsets
     data_lists["dex_start_list"] = [int(line.split("@")[1], 16)
                                     for line in dex_start_file_data
                                     if "RegisterDexFile" in line
                                     ]
     return data_lists, categories, symbol_file_split


 def main(argv=None):
     """Takes in trace information and outputs details about them."""
     if argv is None:
         argv = sys.argv
     parsed_argv = parse_args(argv[1:])

     data_lists, categories, symbol_file_split = read_data(parsed_argv)
     # Formats plot_list such that each element is a data point
     absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"],
                          data_lists["cat_list"])
     for file_ext, cat_name in enumerate(categories):
         out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
                                      str(file_ext) +
                                      ".dat")
         with open(out_file_name, "w") as output_file:
             output_file.write("# Category: " + cat_name + "\n")
             output_file.write("# Time, Dex File Offset, Address \n")
             for time, dex_offset, category, address in data_lists["plot_list"]:
                 if category == cat_name:
                     output_file.write(
                         str(time) +
                         " " +
                         str(dex_offset) +
                         " #" +
                         str(address) +
                         "\n")

     print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)


 if __name__ == '__main__':
     main()
	#!/usr/bin/env python
	#
	# Copyright (C) 2017 The Android Open Source Project
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Outputs quantitative information about Address Sanitizer traces."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	from collections import Counter
	from datetime import datetime
	import argparse
	import bisect
	import os
	import sys


	def find_match(list_substrings, big_string):
	"""Returns the category a trace belongs to by searching substrings."""
	for ind, substr in enumerate(list_substrings):
	if big_string.find(substr) != -1:
	return ind
	return list_substrings.index("Uncategorized")


	def absolute_to_relative(plot_list, dex_start_list, cat_list):
	"""Address changed to Dex File offset and shifting time to 0 min in ms."""
	time_format_str = "%H:%M:%S.%f"
	first_access_time = datetime.strptime(plot_list[0][0],
	time_format_str)
	for ind, elem in enumerate(plot_list):
	elem_date_time = datetime.strptime(elem[0], time_format_str)
	# Shift time values so that first access is at time 0 milliseconds
	elem[0] = int((elem_date_time - first_access_time).total_seconds() *
	1000)
	address_access = int(elem[1], 16)
	# For each poisoned address, find highest Dex File starting address less
	# than address_access
	dex_file_start = dex_start_list[bisect.bisect(dex_start_list,
	address_access) - 1
	]
	elem.insert(1, address_access - dex_file_start)
	# Category that a data point belongs to
	elem.insert(2, cat_list[ind])


	def print_category_info(cat_split, outname, out_dir_name, title):
	"""Prints information of category and puts related traces in a files."""
	trace_counts_dict = Counter(cat_split)
	trace_counts_list_ordered = trace_counts_dict.most_common()
	print(53 * "-")
	print(title)
	print("\tNumber of distinct traces: " +
	str(len(trace_counts_list_ordered)))
	print("\tSum of trace counts: " +
	str(sum([trace[1] for trace in trace_counts_list_ordered])))
	print("\n\tCount: How many traces appeared with count\n\t")
	print(Counter([trace[1] for trace in trace_counts_list_ordered]))
	with open(os.path.join(out_dir_name, outname), "w") as output_file:
	for trace in trace_counts_list_ordered:
	output_file.write("\n\nNumber of times appeared: " +
	str(trace[1]) +
	"\n")
	output_file.write(trace[0].strip())


	def print_categories(categories, symbol_file_split, out_dir_name):
	"""Prints details of all categories."""
	# Info of traces containing a call to current category
	for cat_num, cat_name in enumerate(categories[1:]):
	print("\nCategory #%d" % (cat_num + 1))
	cat_split = [trace for trace in symbol_file_split
	if cat_name in trace]
	cat_file_name = cat_name.lower() + "cat_output"
	print_category_info(cat_split, cat_file_name, out_dir_name,
	"Traces containing: " + cat_name)
	noncat_split = [trace for trace in symbol_file_split
	if cat_name not in trace]
	print_category_info(noncat_split, "non" + cat_file_name,
	out_dir_name,
	"Traces not containing: " +
	cat_name)

	# All traces (including uncategorized) together
	print_category_info(symbol_file_split, "allcat_output",
	out_dir_name,
	"All traces together:")
	# Traces containing none of keywords
	# Only used if categories are passed in
	if len(categories) > 1:
	noncat_split = [trace for trace in symbol_file_split if
	all(cat_name not in trace
	for cat_name in categories)]
	print_category_info(noncat_split, "noncat_output",
	out_dir_name,
	"Uncategorized calls")


	def is_directory(path_name):
	"""Checks if a path is an actual directory."""
	if not os.path.isdir(path_name):
	dir_error = "%s is not a directory" % (path_name)
	raise argparse.ArgumentTypeError(dir_error)
	return path_name


	def parse_args(argv):
	"""Parses arguments passed in."""
	parser = argparse.ArgumentParser()
	parser.add_argument('-d', action='store',
	default="", dest="out_dir_name", type=is_directory,
	help='Output Directory')
	parser.add_argument('sanitizer_trace', action='store',
	type=argparse.FileType('r'),
	help='File containing sanitizer traces filtered by '
	'prune_sanitizer_output.py')
	parser.add_argument('symbol_trace', action='store',
	type=argparse.FileType('r'),
	help='File containing symbolized traces that match '
	'sanitizer_trace')
	parser.add_argument('dex_starts', action='store',
	type=argparse.FileType('r'),
	help='File containing starting addresses of Dex Files')
	parser.add_argument('categories', action='store', nargs='*',
	help='Keywords expected to show in large amounts of'
	' symbolized traces')

	return parser.parse_args(argv)


	def read_data(parsed_argv):
	"""Reads data from filepath arguments and parses them into lists."""
	# Using a dictionary to establish relation between lists added
	data_lists = {}
	categories = parsed_argv.categories
	# Makes sure each trace maps to some category
	categories.insert(0, "Uncategorized")

	logcat_file_data = parsed_argv.sanitizer_trace.readlines()
	parsed_argv.sanitizer_trace.close()

	symbol_file_split = parsed_argv.symbol_trace.read().split("Stack Trace")[
	1:]
	parsed_argv.symbol_trace.close()

	dex_start_file_data = parsed_argv.dex_starts.readlines()
	parsed_argv.dex_starts.close()

	# Each element is a tuple of time and address accessed
	data_lists["plot_list"] = [[elem[1] for elem in enumerate(line.split())
	if elem[0] in (1, 11)
	]
	for line in logcat_file_data
	if "use-after-poison" in line
	]
	# Contains a mapping between traces and the category they belong to
	# based on arguments
	data_lists["cat_list"] = [categories[find_match(categories, trace)]
	for trace in symbol_file_split]

	# Contains a list of starting address of all dex files to calculate dex
	# offsets
	data_lists["dex_start_list"] = [int(line.split("@")[1], 16)
	for line in dex_start_file_data
	if "RegisterDexFile" in line
	]
	return data_lists, categories, symbol_file_split


	def main(argv=None):
	"""Takes in trace information and outputs details about them."""
	if argv is None:
	argv = sys.argv
	parsed_argv = parse_args(argv[1:])

	data_lists, categories, symbol_file_split = read_data(parsed_argv)
	# Formats plot_list such that each element is a data point
	absolute_to_relative(data_lists["plot_list"], data_lists["dex_start_list"],
	data_lists["cat_list"])
	for file_ext, cat_name in enumerate(categories):
	out_file_name = os.path.join(parsed_argv.out_dir_name, "time_output_" +
	str(file_ext) +
	".dat")
	with open(out_file_name, "w") as output_file:
	output_file.write("# Category: " + cat_name + "\n")
	output_file.write("# Time, Dex File Offset, Address \n")
	for time, dex_offset, category, address in data_lists["plot_list"]:
	if category == cat_name:
	output_file.write(
	str(time) +
	" " +
	str(dex_offset) +
	" #" +
	str(address) +
	"\n")

	print_categories(categories, symbol_file_split, parsed_argv.out_dir_name)


	if __name__ == '__main__':
	main()