| #!/usr/bin/env python3 |
| #-*- coding: utf-8 -*- |
| |
| # Copyright (C) 2018 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the 'License'); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an 'AS IS' BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """ |
| Enforces common Android string best-practices. It ignores lint messages from |
| a previous strings file, if provided. |
| |
| Usage: stringslint.py strings.xml |
| Usage: stringslint.py strings.xml old_strings.xml |
| |
| In general: |
| * Errors signal issues that must be fixed before submitting, and are only |
| used when there are no false-positives. |
| * Warnings signal issues that might need to be fixed, but need manual |
| inspection due to risk of false-positives. |
| * Info signal issues that should be fixed to match best-practices, such |
| as providing comments to aid translation. |
| """ |
| |
| import re, sys, codecs |
| import lxml.etree as ET |
| |
| BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) |
| |
| def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): |
| # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes |
| codes = [] |
| if reset: codes.append("0") |
| else: |
| if not fg is None: codes.append("3%d" % (fg)) |
| if not bg is None: |
| if not bright: codes.append("4%d" % (bg)) |
| else: codes.append("10%d" % (bg)) |
| if bold: codes.append("1") |
| elif dim: codes.append("2") |
| else: codes.append("22") |
| return "\033[%sm" % (";".join(codes)) |
| |
| warnings = None |
| |
| def warn(tag, msg, actual, expected, color=YELLOW): |
| global warnings |
| key = "%s:%d" % (tag.attrib["name"], hash(msg)) |
| value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), |
| tag.sourceline, |
| tag.attrib["name"], |
| format(reset=True), |
| msg) |
| if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True), |
| actual, |
| format(reset=True)) |
| if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True), |
| expected, |
| format(reset=True)) |
| warnings[key] = value |
| |
| |
| def error(tag, msg, actual, expected): |
| warn(tag, msg, actual, expected, RED) |
| |
| def info(tag, msg, actual, expected): |
| warn(tag, msg, actual, expected, CYAN) |
| |
| # Escaping logic borrowed from https://stackoverflow.com/a/24519338 |
| ESCAPE_SEQUENCE_RE = re.compile(r''' |
| ( \\U........ # 8-digit hex escapes |
| | \\u.... # 4-digit hex escapes |
| | \\x.. # 2-digit hex escapes |
| | \\[0-7]{1,3} # Octal escapes |
| | \\N\{[^}]+\} # Unicode characters by name |
| | \\[\\'"abfnrtv] # Single-character escapes |
| )''', re.UNICODE | re.VERBOSE) |
| |
| def decode_escapes(s): |
| def decode_match(match): |
| return codecs.decode(match.group(0), 'unicode-escape') |
| |
| s = re.sub(r"\n\s*", " ", s) |
| s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) |
| s = re.sub(r"%(\d+\$)?[a-z]", "____", s) |
| s = re.sub(r"\^\d+", "____", s) |
| s = re.sub(r"<br/?>", "\n", s) |
| s = re.sub(r"</?[a-z]+>", "", s) |
| return s |
| |
| def sample_iter(tag): |
| if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: |
| yield tag.attrib["example"] |
| elif tag.text: |
| yield decode_escapes(tag.text) |
| for e in tag: |
| for v in sample_iter(e): |
| yield v |
| if e.tail: |
| yield decode_escapes(e.tail) |
| |
| def lint(path): |
| global warnings |
| warnings = {} |
| |
| with open(path) as f: |
| raw = f.read() |
| if len(raw.strip()) == 0: |
| return warnings |
| tree = ET.fromstring(bytes(raw, encoding='utf-8')) |
| root = tree #tree.getroot() |
| |
| last_comment = None |
| for child in root: |
| # TODO: handle plurals |
| if isinstance(child, ET._Comment): |
| last_comment = child |
| elif child.tag == "string": |
| # We always consume comment |
| comment = last_comment |
| last_comment = None |
| |
| # Prepare string for analysis |
| text = "".join(child.itertext()) |
| sample = "".join(sample_iter(child)).strip().strip("'\"") |
| |
| # Validate comment |
| if comment is None: |
| info(child, "Missing string comment to aid translation", |
| None, None) |
| continue |
| if "do not translate" in comment.text.lower(): |
| continue |
| if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": |
| continue |
| |
| misspelled_attributes = [ |
| ("translateable", "translatable"), |
| ] |
| for misspelling, expected in misspelled_attributes: |
| if misspelling in child.attrib: |
| error(child, "Misspelled <string> attribute.", misspelling, expected) |
| |
| limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) |
| if limit is None: |
| info(child, "Missing CHAR LIMIT to aid translation", |
| repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") |
| elif re.match("\d+", limit.group(1)): |
| limit = int(limit.group(1)) |
| if len(sample) > limit: |
| warn(child, "Expanded string length is larger than CHAR LIMIT", |
| sample, None) |
| |
| # Look for common mistakes/substitutions |
| if "'" in text: |
| error(child, "Turned quotation mark glyphs are more polished", |
| text, "This doesn\u2019t need to \u2018happen\u2019 today") |
| if '"' in text and not text.startswith('"') and text.endswith('"'): |
| error(child, "Turned quotation mark glyphs are more polished", |
| text, "This needs to \u201chappen\u201d today") |
| if "..." in text: |
| error(child, "Ellipsis glyph is more polished", |
| text, "Loading\u2026") |
| if "wi-fi" in text.lower(): |
| error(child, "Non-breaking glyph is more polished", |
| text, "Wi\u2011Fi") |
| if "wifi" in text.lower(): |
| error(child, "Using non-standard spelling", |
| text, "Wi\u2011Fi") |
| if re.search("\d-\d", text): |
| warn(child, "Ranges should use en dash glyph", |
| text, "You will find this material in chapters 8\u201312") |
| if "--" in text: |
| warn(child, "Phrases should use em dash glyph", |
| text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.") |
| if ". " in text: |
| warn(child, "Only use single space between sentences", |
| text, "First idea. Second idea.") |
| if re.match(r"^[A-Z\s]{5,}$", text): |
| warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", |
| text, "Refresh data") |
| if " phone " in text and "product" not in child.attrib: |
| warn(child, "Strings mentioning phones should have variants for tablets", |
| text, None) |
| |
| # When more than one substitution, require indexes |
| if len(re.findall("%[^%]", text)) > 1: |
| if len(re.findall("%[^\d]", text)) > 0: |
| error(child, "Substitutions must be indexed", |
| text, "Add %1$s to %2$s") |
| |
| # Require xliff substitutions |
| for gc in child.iter(): |
| badsub = False |
| if gc.tail and re.search("%[^%]", gc.tail): badsub = True |
| if re.match("{.*xliff.*}g", gc.tag): |
| if "id" not in gc.attrib: |
| error(child, "Substitutions must define id attribute", |
| None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| if "example" not in gc.attrib: |
| error(child, "Substitutions must define example attribute", |
| None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| else: |
| if gc.text and re.search("%[^%]", gc.text): badsub = True |
| if badsub: |
| error(child, "Substitutions must be inside xliff tags", |
| text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") |
| |
| return warnings |
| |
| if len(sys.argv) > 2: |
| before = lint(sys.argv[2]) |
| else: |
| before = {} |
| after = lint(sys.argv[1]) |
| |
| for b in before: |
| if b in after: |
| del after[b] |
| |
| if len(after) > 0: |
| for a in sorted(after.keys()): |
| print(after[a]) |
| print() |
| sys.exit(1) |