AAPT2: Fix pseudolocalization to respect <xliff:g>
The XLIFF 'g' tag specifies content that should NOT be translated.
AAPT2's pseudolocalization process should respect it.
Bug:34064599
Test: make libandroidfw_tests
Change-Id: Ice437d7f0ff246730ee04896fd035e2d846148fb
diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp
index 1c750c6..47ca266 100644
--- a/tools/aapt2/ResourceParser.cpp
+++ b/tools/aapt2/ResourceParser.cpp
@@ -26,6 +26,7 @@
#include "ResourceValues.h"
#include "ValueVisitor.h"
#include "util/ImmutableMap.h"
+#include "util/Maybe.h"
#include "util/Util.h"
#include "xml/XmlPullParser.h"
@@ -150,82 +151,108 @@
/**
* Build a string from XML that converts nested elements into Span objects.
*/
-bool ResourceParser::FlattenXmlSubtree(xml::XmlPullParser* parser,
- std::string* out_raw_string,
- StyleString* out_style_string) {
+bool ResourceParser::FlattenXmlSubtree(
+ xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string,
+ std::vector<UntranslatableSection>* out_untranslatable_sections) {
+ // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply.
std::vector<Span> span_stack;
- bool error = false;
+ // Clear the output variables.
out_raw_string->clear();
out_style_string->spans.clear();
+ out_untranslatable_sections->clear();
+
+ // The StringBuilder will concatenate the various segments of text which are initially
+ // separated by tags. It also handles unicode escape codes and quotations.
util::StringBuilder builder;
+
+ // The first occurrence of a <xliff:g> tag. Nested <xliff:g> tags are illegal.
+ Maybe<size_t> untranslatable_start_depth;
+
size_t depth = 1;
while (xml::XmlPullParser::IsGoodEvent(parser->Next())) {
const xml::XmlPullParser::Event event = parser->event();
- if (event == xml::XmlPullParser::Event::kEndElement) {
- if (!parser->element_namespace().empty()) {
- // We already warned and skipped the start element, so just skip here
- // too
- continue;
+
+ if (event == xml::XmlPullParser::Event::kStartElement) {
+ if (parser->element_namespace().empty()) {
+ // This is an HTML tag which we encode as a span. Add it to the span stack.
+ std::string span_name = parser->element_name();
+ const auto end_attr_iter = parser->end_attributes();
+ for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter; ++attr_iter) {
+ span_name += ";";
+ span_name += attr_iter->name;
+ span_name += "=";
+ span_name += attr_iter->value;
+ }
+
+ // Make sure the string is representable in our binary format.
+ if (builder.Utf16Len() > std::numeric_limits<uint32_t>::max()) {
+ diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
+ << "style string '" << builder.ToString() << "' is too long");
+ return false;
+ }
+
+ span_stack.push_back(Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())});
+ } else if (parser->element_namespace() == sXliffNamespaceUri) {
+ if (parser->element_name() == "g") {
+ if (untranslatable_start_depth) {
+ // We've already encountered an <xliff:g> tag, and nested <xliff:g> tags are illegal.
+ diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
+ << "illegal nested XLIFF 'g' tag");
+ return false;
+ } else {
+ // Mark the start of an untranslatable section. Use UTF8 indices/lengths.
+ untranslatable_start_depth = depth;
+ const size_t current_idx = builder.ToString().size();
+ out_untranslatable_sections->push_back(UntranslatableSection{current_idx, current_idx});
+ }
+ }
+ // Ignore other xliff tags, they get handled by other tools.
+
+ } else {
+ // Besides XLIFF, any other namespaced tag is unsupported and ignored.
+ diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
+ << "ignoring element '" << parser->element_name()
+ << "' with unknown namespace '" << parser->element_namespace() << "'");
}
+ // Enter one level inside the element.
+ depth++;
+ } else if (event == xml::XmlPullParser::Event::kText) {
+ // Record both the raw text and append to the builder to deal with escape sequences
+ // and quotations.
+ out_raw_string->append(parser->text());
+ builder.Append(parser->text());
+ } else if (event == xml::XmlPullParser::Event::kEndElement) {
+ // Return one level from within the element.
depth--;
if (depth == 0) {
break;
}
- span_stack.back().last_char = builder.Utf16Len() - 1;
- out_style_string->spans.push_back(span_stack.back());
- span_stack.pop_back();
-
- } else if (event == xml::XmlPullParser::Event::kText) {
- out_raw_string->append(parser->text());
- builder.Append(parser->text());
-
- } else if (event == xml::XmlPullParser::Event::kStartElement) {
- if (!parser->element_namespace().empty()) {
- if (parser->element_namespace() != sXliffNamespaceUri) {
- // Only warn if this isn't an xliff namespace.
- diag_->Warn(DiagMessage(source_.WithLine(parser->line_number()))
- << "skipping element '" << parser->element_name()
- << "' with unknown namespace '"
- << parser->element_namespace() << "'");
- }
- continue;
+ if (parser->element_namespace().empty()) {
+ // This is an HTML tag which we encode as a span. Update the span
+ // stack and pop the top entry.
+ Span& top_span = span_stack.back();
+ top_span.last_char = builder.Utf16Len() - 1;
+ out_style_string->spans.push_back(std::move(top_span));
+ span_stack.pop_back();
+ } else if (untranslatable_start_depth == make_value(depth)) {
+ // This is the end of an untranslatable section. Use UTF8 indices/lengths.
+ UntranslatableSection& untranslatable_section = out_untranslatable_sections->back();
+ untranslatable_section.end = builder.ToString().size();
+ untranslatable_start_depth = {};
}
- depth++;
-
- // Build a span object out of the nested element.
- std::string span_name = parser->element_name();
- const auto end_attr_iter = parser->end_attributes();
- for (auto attr_iter = parser->begin_attributes();
- attr_iter != end_attr_iter; ++attr_iter) {
- span_name += ";";
- span_name += attr_iter->name;
- span_name += "=";
- span_name += attr_iter->value;
- }
-
- if (builder.Utf16Len() > std::numeric_limits<uint32_t>::max()) {
- diag_->Error(DiagMessage(source_.WithLine(parser->line_number()))
- << "style string '" << builder.ToString()
- << "' is too long");
- error = true;
- } else {
- span_stack.push_back(
- Span{span_name, static_cast<uint32_t>(builder.Utf16Len())});
- }
-
} else if (event == xml::XmlPullParser::Event::kComment) {
- // Skip
+ // Ignore.
} else {
LOG(FATAL) << "unhandled XML event";
}
}
- CHECK(span_stack.empty()) << "spans haven't been fully processed";
+ CHECK(span_stack.empty()) << "spans haven't been fully processed";
out_style_string->str = builder.ToString();
- return !error;
+ return true;
}
bool ResourceParser::Parse(xml::XmlPullParser* parser) {
@@ -548,15 +575,18 @@
std::string raw_value;
StyleString style_string;
- if (!FlattenXmlSubtree(parser, &raw_value, &style_string)) {
+ std::vector<UntranslatableSection> untranslatable_sections;
+ if (!FlattenXmlSubtree(parser, &raw_value, &style_string, &untranslatable_sections)) {
return {};
}
if (!style_string.spans.empty()) {
// This can only be a StyledString.
- return util::make_unique<StyledString>(table_->string_pool.MakeRef(
- style_string,
- StringPool::Context(StringPool::Context::kStylePriority, config_)));
+ std::unique_ptr<StyledString> styled_string =
+ util::make_unique<StyledString>(table_->string_pool.MakeRef(
+ style_string, StringPool::Context(StringPool::Context::kStylePriority, config_)));
+ styled_string->untranslatable_sections = std::move(untranslatable_sections);
+ return std::move(styled_string);
}
auto on_create_reference = [&](const ResourceName& name) {
@@ -582,8 +612,10 @@
// Try making a regular string.
if (type_mask & android::ResTable_map::TYPE_STRING) {
// Use the trimmed, escaped string.
- return util::make_unique<String>(table_->string_pool.MakeRef(
- style_string.str, StringPool::Context(config_)));
+ std::unique_ptr<String> string = util::make_unique<String>(
+ table_->string_pool.MakeRef(style_string.str, StringPool::Context(config_)));
+ string->untranslatable_sections = std::move(untranslatable_sections);
+ return std::move(string);
}
if (allow_raw_value) {
@@ -609,17 +641,15 @@
formatted = maybe_formatted.value();
}
- bool translateable = options_.translatable;
- if (Maybe<StringPiece> translateable_attr =
- xml::FindAttribute(parser, "translatable")) {
- Maybe<bool> maybe_translateable =
- ResourceUtils::ParseBool(translateable_attr.value());
- if (!maybe_translateable) {
+ bool translatable = options_.translatable;
+ if (Maybe<StringPiece> translatable_attr = xml::FindAttribute(parser, "translatable")) {
+ Maybe<bool> maybe_translatable = ResourceUtils::ParseBool(translatable_attr.value());
+ if (!maybe_translatable) {
diag_->Error(DiagMessage(out_resource->source)
<< "invalid value for 'translatable'. Must be a boolean");
return false;
}
- translateable = maybe_translateable.value();
+ translatable = maybe_translatable.value();
}
out_resource->value =
@@ -630,9 +660,9 @@
}
if (String* string_value = ValueCast<String>(out_resource->value.get())) {
- string_value->SetTranslateable(translateable);
+ string_value->SetTranslatable(translatable);
- if (formatted && translateable) {
+ if (formatted && translatable) {
if (!util::VerifyJavaStringFormat(*string_value->value)) {
DiagMessage msg(out_resource->source);
msg << "multiple substitutions specified in non-positional format; "
@@ -646,9 +676,8 @@
}
}
- } else if (StyledString* string_value =
- ValueCast<StyledString>(out_resource->value.get())) {
- string_value->SetTranslateable(translateable);
+ } else if (StyledString* string_value = ValueCast<StyledString>(out_resource->value.get())) {
+ string_value->SetTranslatable(translatable);
}
return true;
}
@@ -1151,19 +1180,17 @@
std::unique_ptr<Array> array = util::make_unique<Array>();
- bool translateable = options_.translatable;
- if (Maybe<StringPiece> translateable_attr =
- xml::FindAttribute(parser, "translatable")) {
- Maybe<bool> maybe_translateable =
- ResourceUtils::ParseBool(translateable_attr.value());
- if (!maybe_translateable) {
+ bool translatable = options_.translatable;
+ if (Maybe<StringPiece> translatable_attr = xml::FindAttribute(parser, "translatable")) {
+ Maybe<bool> maybe_translatable = ResourceUtils::ParseBool(translatable_attr.value());
+ if (!maybe_translatable) {
diag_->Error(DiagMessage(out_resource->source)
<< "invalid value for 'translatable'. Must be a boolean");
return false;
}
- translateable = maybe_translateable.value();
+ translatable = maybe_translatable.value();
}
- array->SetTranslateable(translateable);
+ array->SetTranslatable(translatable);
bool error = false;
const size_t depth = parser->depth();