blob: e49d0923117b6592322e0389393d7f8bd34ca654 [file] [log] [blame]
Jihoon Kang3d38b6d2022-10-28 22:21:42 +00001#!/usr/bin/env python
2#
3# Copyright (C) 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17from sys import exit
18from typing import List
19from glob import glob
20from pathlib import Path
21from collections import defaultdict
22from difflib import Differ
23from re import split
24from tqdm import tqdm
25import argparse
26
27
28DIFFER_CODE_LEN = 2
29
30class DifferCodes:
31 COMMON = ' '
32 UNIQUE_FIRST = '- '
33 UNIQUE_SECOND = '+ '
34 DIFF_IDENT = '? '
35
36class FilesDiffAnalyzer:
37 def __init__(self, args) -> None:
38 self.out_dir = args.out_dir
39 self.show_diff = args.show_diff
40 self.skip_words = args.skip_words
41 self.first_dir = args.first_dir
42 self.second_dir = args.second_dir
43 self.include_common = args.include_common
44
45 self.first_dir_files = self.get_files(self.first_dir)
46 self.second_dir_files = self.get_files(self.second_dir)
47 self.common_file_map = defaultdict(set)
48
49 self.map_common_files(self.first_dir_files, self.first_dir)
50 self.map_common_files(self.second_dir_files, self.second_dir)
51
52 def get_files(self, dir: str) -> List[str]:
53 """Get all files directory in the input directory including the files in the subdirectories
54
55 Recursively finds all files in the input directory.
56 Returns a list of file directory strings, which do not include directories but only files.
57 List is sorted in alphabetical order of the file directories.
58
59 Args:
60 dir: Directory to get the files. String.
61
62 Returns:
63 A list of file directory strings within the input directory.
64 Sorted in Alphabetical order.
65
66 Raises:
67 FileNotFoundError: An error occurred accessing the non-existing directory
68 """
69
70 if not dir_exists(dir):
71 raise FileNotFoundError("Directory does not exist")
72
73 if dir[:-2] != "**":
74 if dir[:-1] != "/":
75 dir += "/"
76 dir += "**"
77
78 return [file for file in sorted(glob(dir, recursive=True)) if Path(file).is_file()]
79
80 def map_common_files(self, files: List[str], dir: str) -> None:
81 for file in files:
82 file_name = file.split(dir, 1)[-1]
83 self.common_file_map[file_name].add(dir)
84 return
85
86 def compare_file_contents(self, first_file: str, second_file: str) -> List[str]:
87 """Compare the contents of the files and return different lines
88
89 Given two file directory strings, compare the contents of the two files
90 and return the list of file contents string prepended with unique identifier codes.
91 The identifier codes include:
92 - ' '(two empty space characters): Line common to two files
93 - '- '(minus followed by a space) : Line unique to first file
94 - '+ '(plus followed by a space) : Line unique to second file
95
96 Args:
97 first_file: First file directory string to compare the content
98 second_file: Second file directory string to compare the content
99
100 Returns:
101 A list of the file content strings. For example:
102
103 [
104 " Foo",
105 "- Bar",
106 "+ Baz"
107 ]
108 """
109
110 d = Differ()
111 first_file_contents = sort_methods(get_file_contents(first_file))
112 second_file_contents = sort_methods(get_file_contents(second_file))
113 diff = list(d.compare(first_file_contents, second_file_contents))
114 ret = [f"diff {first_file} {second_file}"]
115
116 idx = 0
117 while idx < len(diff):
118 line = diff[idx]
119 line_code = line[:DIFFER_CODE_LEN]
120
121 match line_code:
122 case DifferCodes.COMMON:
123 if self.include_common:
124 ret.append(line)
125
126 case DifferCodes.UNIQUE_FIRST:
127 # Should compare line
128 if (idx < len(diff) - 1 and
129 (next_line_code := diff[idx + 1][:DIFFER_CODE_LEN])
130 not in (DifferCodes.UNIQUE_FIRST, DifferCodes.COMMON)):
131 delta = 1 if next_line_code == DifferCodes.UNIQUE_SECOND else 2
132 line_to_compare = diff[idx + delta]
133 if self.lines_differ(line, line_to_compare):
134 ret.extend([line, line_to_compare])
135 else:
136 if self.include_common:
137 ret.append(DifferCodes.COMMON +
138 line[DIFFER_CODE_LEN:])
139 idx += delta
140 else:
141 ret.append(line)
142
143 case DifferCodes.UNIQUE_SECOND:
144 ret.append(line)
145
146 case DifferCodes.DIFF_IDENT:
147 pass
148 idx += 1
149 return ret
150
151 def lines_differ(self, line1: str, line2: str) -> bool:
152 """Check if the input lines are different or not
153
154 Compare the two lines word by word and check if the two lines are different or not.
155 If the different words in the comparing lines are included in skip_words,
156 the lines are not considered different.
157
158 Args:
159 line1: first line to compare
160 line2: second line to compare
161
162 Returns:
163 Boolean value indicating if the two lines are different or not
164
165 """
166 # Split by '.' or ' '(whitespace)
167 def split_words(line: str) -> List[str]:
168 return split('\\s|\\.', line[DIFFER_CODE_LEN:])
169
170 line1_words, line2_words = split_words(line1), split_words(line2)
171 if len(line1_words) != len(line2_words):
172 return True
173
174 for word1, word2 in zip(line1_words, line2_words):
175 if word1 != word2:
176 # not check if words are equal to skip word, but
177 # check if words contain skip word as substring
178 if all(sw not in word1 and sw not in word2 for sw in self.skip_words):
179 return True
180
181 return False
182
183 def analyze(self) -> None:
184 """Analyze file contents in both directories and write to output or console.
185 """
186 for file in tqdm(sorted(self.common_file_map.keys())):
187 val = self.common_file_map[file]
188
189 # When file exists in both directories
190 lines = list()
191 if val == set([self.first_dir, self.second_dir]):
192 lines = self.compare_file_contents(
193 self.first_dir + file, self.second_dir + file)
194 else:
195 existing_dir, not_existing_dir = (
196 (self.first_dir, self.second_dir) if self.first_dir in val
197 else (self.second_dir, self.first_dir))
198
199 lines = [f"{not_existing_dir}{file} does not exist."]
200
201 if self.show_diff:
202 lines.append(f"Content of {existing_dir}{file}: \n")
203 lines.extend(get_file_contents(existing_dir + file))
204
205 self.write(lines)
206
207 def write(self, lines: List[str]) -> None:
208 if self.out_dir == "":
209 pprint(lines)
210 else:
211 write_lines(self.out_dir, lines)
212
213###
214# Helper functions
215###
216
217def sort_methods(lines: List[str]) -> List[str]:
218 """Sort class methods in the file contents by alphabetical order
219
220 Given lines of Java file contents, return lines with class methods sorted in alphabetical order.
221 Also omit empty lines or lines with spaces.
222 For example:
223 l = [
224 "package android.test;",
225 "",
226 "public static final int ORANGE = 1;",
227 "",
228 "public class TestClass {",
229 "public TestClass() { throw new RuntimeException("Stub!"); }",
230 "public void foo() { throw new RuntimeException("Stub!"); }",
231 "public void bar() { throw new RuntimeException("Stub!"); }",
232 "}"
233 ]
234 sort_methods(l) returns
235 [
236 "package android.test;",
237 "public static final int ORANGE = 1;",
238 "public class TestClass {",
239 "public TestClass() { throw new RuntimeException("Stub!"); }",
240 "public void bar() { throw new RuntimeException("Stub!"); }",
241 "public void foo() { throw new RuntimeException("Stub!"); }",
242 "}"
243 ]
244
245 Args:
246 lines: List of strings consisted of Java file contents.
247
248 Returns:
249 A list of string with sorted class methods.
250
251 """
252 def is_not_blank(l: str) -> bool:
253 return bool(l) and not l.isspace()
254
255 ret = list()
256
257 in_class = False
258 buffer = list()
259 for line in lines:
260 if not in_class:
261 if "class" in line:
262 in_class = True
263 ret.append(line)
264 else:
265 # Adding static variables, package info, etc.
266 # Skipping empty or space lines.
267 if is_not_blank(line):
268 ret.append(line)
269 else:
270 # End of class
271 if line and line[0] == "}":
272 in_class = False
273 ret.extend(sorted(buffer))
274 buffer = list()
275 ret.append(line)
276 else:
277 if is_not_blank(line):
278 buffer.append(line)
279
280 return ret
281
282def get_file_contents(file_path: str) -> List[str]:
283 lines = list()
284 with open(file_path) as f:
285 lines = [line.rstrip('\n') for line in f]
286 f.close()
287 return lines
288
289def pprint(l: List[str]) -> None:
290 for line in l:
291 print(line)
292
293def write_lines(out_dir: str, lines: List[str]) -> None:
294 with open(out_dir, "a") as f:
295 f.writelines(line + '\n' for line in lines)
296 f.write("\n")
297 f.close()
298
299def dir_exists(dir: str) -> bool:
300 return Path(dir).exists()
301
302if __name__ == '__main__':
303 parser = argparse.ArgumentParser()
304 parser.add_argument('first_dir', action='store', type=str,
305 help="first path to compare file directory and contents")
306 parser.add_argument('second_dir', action='store', type=str,
307 help="second path to compare file directory and contents")
308 parser.add_argument('--out', dest='out_dir',
309 action='store', default="", type=str,
310 help="optional directory to write log. If not set, will print to console")
311 parser.add_argument('--show-diff-file', dest='show_diff',
312 action=argparse.BooleanOptionalAction,
313 help="optional flag. If passed, will print out the content of the file unique to each directories")
314 parser.add_argument('--include-common', dest='include_common',
315 action=argparse.BooleanOptionalAction,
316 help="optional flag. If passed, will print out the contents common to both files as well,\
317 instead of printing only diff lines.")
318 parser.add_argument('--skip-words', nargs='+',
319 dest='skip_words', default=[], help="optional words to skip in comparison")
320
321 args = parser.parse_args()
322
323 if not args.first_dir or not args.second_dir:
324 parser.print_usage()
325 exit(0)
326
327 analyzer = FilesDiffAnalyzer(args)
328 analyzer.analyze()