Fix IllegalArgumentException when formatting time in Burmese with DateTimeFormatter
The example locale is my-MM.
The fix only removes symbol 'b'/'B' when 24-hour 'H' exists. It prevents
causing more bugs when removing the symbol blindly, e.g. 'b'/'B' with
12-hour 'h'.
Add CTS to make sure that no known crash for all locales. Add an
exhaustive CTS in DateTimeFormatterTest for every available
combination of (locale, calendar type, date style, time style),
where locales are generated from the Locale.getAvailableLocales().
Bug: 175349773
Test: atest CtsLibcoreTestCases:libcore.java.time.format.DateTimeFormatterTest
Change-Id: I30d56069721f785c9f6682a743f83a58ce4127e8
diff --git a/luni/src/main/java/libcore/icu/ICU.java b/luni/src/main/java/libcore/icu/ICU.java
index 4996431..46d2b0c 100644
--- a/luni/src/main/java/libcore/icu/ICU.java
+++ b/luni/src/main/java/libcore/icu/ICU.java
@@ -373,6 +373,96 @@
}
/**
+ * {@link java.time.format.DateTimeFormatter} does not handle some date symbols, e.g. 'B' / 'b',
+ * and thus we use a heuristic algorithm to remove the symbol. See http://b/174804526.
+ * See {@link #transformIcuDateTimePattern(String)} for documentation about the implementation.
+ */
+ public static String transformIcuDateTimePattern_forJavaTime(String pattern) {
+ return transformIcuDateTimePattern(pattern);
+ }
+
+ /**
+ * Rewrite the date/time pattern coming ICU to be consumed by libcore classes.
+ * It's an ideal place to rewrite the pattern entirely when multiple symbols not digested
+ * by libcore need to be removed/processed. Rewriting in single place could be more efficient
+ * in a small or constant number of scans instead of scanning for every symbol.
+ *
+ * {@link LocaleData#initLocaleData(Locale)} also rewrites time format, but only a subset of
+ * patterns. In the future, that should migrate to this function in order to handle the symbols
+ * in one place, but now separate because java.text and java.time handles different sets of
+ * symbols.
+ */
+ private static String transformIcuDateTimePattern(String pattern) {
+ if (pattern == null) {
+ return null;
+ }
+
+ // For details about the different symbols, see
+ // http://cldr.unicode.org/translation/date-time-1/date-time-patterns#TOC-Day-period-patterns
+ // The symbols B means "Day periods with locale-specific ranges".
+ // English example: 2:00 at night, 10:00 in the morning, 12:00 in the afternoon.
+ boolean contains_B = pattern.indexOf('B') != -1;
+ // AM, PM, noon and midnight. English example: 10:00 AM, 12:00 noon, 7:00 PM
+ boolean contains_b = pattern.indexOf('b') != -1;
+
+ // Simply remove the symbol 'B' and 'b' if 24-hour 'H' exists because the 24-hour format
+ // provides enough information and the day periods are optional. See http://b/174804526.
+ // Don't handle symbol 'B'/'b' with 12-hour 'h' because it's much more complicated because
+ // we likely need to replace 'B'/'b' with 'a' inserted into a new right position or use other
+ // ways.
+ boolean remove_B_and_b = (contains_B || contains_b) && (pattern.indexOf('H') != -1);
+
+ if (remove_B_and_b) {
+ pattern = rewriteIcuDateTimePattern(pattern);
+ }
+ return pattern;
+ }
+
+ /**
+ * Rewrite pattern with heuristics. It's known to
+ * - Remove 'b' and 'B' from simple patterns, e.g. "B H:mm" and "dd-MM-yy B HH:mm:ss" only.
+ * - (Append the new heuristics)
+ */
+ private static String rewriteIcuDateTimePattern(String pattern) {
+ // The below implementation can likely be replaced by a regular expression via
+ // String.replaceAll(). However, it's known that libcore's regex implementation is more
+ // memory-intensive, and the below implementation is likely cheaper, but it's not yet measured.
+ StringBuilder sb = new StringBuilder(pattern.length());
+ char prev = ' '; // the initial value is not used.
+ for (int i = 0; i < pattern.length(); i++) {
+ char curr = pattern.charAt(i);
+ switch(curr) {
+ case 'B':
+ case 'b':
+ // Ignore 'B' and 'b'
+ break;
+ case ' ': // Ascii whitespace
+ // caveat: Ideally it's a case for all Unicode whitespaces by UCharacter.isUWhiteSpace(c)
+ // but checking ascii whitespace only is enough for the CLDR data when this is written.
+ if (i != 0 && (prev == 'B' || prev == 'b')) {
+ // Ignore the whitespace behind the symbol 'B'/'b' because it's likely a whitespace to
+ // separate the day period with the next text.
+ } else {
+ sb.append(curr);
+ }
+ break;
+ default:
+ sb.append(curr);
+ break;
+ }
+ prev = curr;
+ }
+
+ // Remove the trailing whitespace which is likely following the symbol 'B'/'b' in the original
+ // pattern, e.g. "hh:mm B" (12:00 in the afternoon).
+ int lastIndex = sb.length() - 1;
+ if (lastIndex >= 0 && sb.charAt(lastIndex) == ' ') {
+ sb.deleteCharAt(lastIndex);
+ }
+ return sb.toString();
+ }
+
+ /**
* Returns the version of the CLDR data in use, such as "22.1.1".
*
*/
diff --git a/luni/src/test/java/libcore/java/time/format/DateTimeFormatterTest.java b/luni/src/test/java/libcore/java/time/format/DateTimeFormatterTest.java
index 3e29594..7ab33f7 100644
--- a/luni/src/test/java/libcore/java/time/format/DateTimeFormatterTest.java
+++ b/luni/src/test/java/libcore/java/time/format/DateTimeFormatterTest.java
@@ -19,10 +19,15 @@
import java.time.Instant;
import java.time.ZoneId;
+import java.time.ZoneOffset;
import java.time.ZonedDateTime;
+import java.time.chrono.Chronology;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DecimalStyle;
+import java.time.format.FormatStyle;
+import java.time.temporal.ChronoField;
+import java.time.temporal.ChronoUnit;
import java.time.temporal.TemporalAccessor;
import java.util.Locale;
@@ -86,4 +91,86 @@
private static String formatWithPattern(Locale l, String pattern, TemporalAccessor datetime) {
return DateTimeFormatter.ofPattern(pattern, l).format(datetime);
}
+
+ // 1 January 2022 00:00:00 GMT+00:00
+ private static final Instant TEST_INSTANT = Instant.ofEpochSecond(1640995200L);
+
+ // Regression test for http://b/174804526 when DateTimeFormatter fetches symbol 'B' from ICU.
+ @Test
+ public void test_format_locale_my_MM() {
+ DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofLocalizedTime(FormatStyle.SHORT)
+ .withLocale(new Locale("my", "MM"))
+ .withZone(ZoneOffset.UTC);
+ assertEquals("0:00", dateTimeFormatter.format(TEST_INSTANT));
+ TemporalAccessor accessor = dateTimeFormatter.parse("23:59");
+ assertEquals(23, accessor.getLong(ChronoField.HOUR_OF_DAY));
+ assertEquals(59, accessor.getLong(ChronoField.MINUTE_OF_HOUR));
+ }
+
+
+ /**
+ * Test {@link DateTimeFormatter#format(TemporalAccessor)} does not crash on available locales.
+ */
+ @Test
+ public void test_format_allLocales() {
+ for (Locale locale : Locale.getAvailableLocales()) {
+ for (FormatStyle formatStyle : FormatStyle.values()) {
+ try {
+ DateTimeFormatter.ofLocalizedTime(formatStyle)
+ .withLocale(locale)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+
+ DateTimeFormatter.ofLocalizedDate(formatStyle)
+ .withLocale(locale)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+
+ DateTimeFormatter.ofLocalizedDateTime(formatStyle)
+ .withLocale(locale)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+ } catch (RuntimeException cause) {
+ throw new RuntimeException("locale:" + locale +
+ " formatStyle:" + formatStyle.name(), cause);
+ }
+ }
+ }
+ }
+
+ /**
+ * Test {@link DateTimeFormatter#format(TemporalAccessor)} does not crash on available locales
+ * with all possible Chronologies.
+ */
+ @Test
+ public void test_format_allLocales_allChronologies() {
+ for (Locale locale : Locale.getAvailableLocales()) {
+ for (Chronology chronology : Chronology.getAvailableChronologies()) {
+ for (FormatStyle formatStyle : FormatStyle.values()) {
+ try {
+ DateTimeFormatter.ofLocalizedTime(formatStyle)
+ .withLocale(locale)
+ .withChronology(chronology)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+
+ DateTimeFormatter.ofLocalizedDate(formatStyle)
+ .withLocale(locale)
+ .withChronology(chronology)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+
+ DateTimeFormatter.ofLocalizedDateTime(formatStyle)
+ .withLocale(locale)
+ .withChronology(chronology)
+ .withZone(ZoneOffset.UTC)
+ .format(TEST_INSTANT);
+ } catch (RuntimeException cause) {
+ throw new RuntimeException("locale:" + locale +
+ " formatStyle:" + formatStyle.name(), cause);
+ }
+ }
+ }
+ }
+ }
}
diff --git a/luni/src/test/java/libcore/libcore/icu/ICUTest.java b/luni/src/test/java/libcore/libcore/icu/ICUTest.java
index e98486d..a5577a5 100644
--- a/luni/src/test/java/libcore/libcore/icu/ICUTest.java
+++ b/luni/src/test/java/libcore/libcore/icu/ICUTest.java
@@ -237,6 +237,32 @@
assertTrue(c.compare("AF", "af") < 0);
}
+ public void testTransformIcuDateTimePattern_forJavaTime() {
+ // Example patterns coming from locale my-MM
+ assertTransformIcuDateTimePattern("B H:mm", "H:mm");
+ assertTransformIcuDateTimePattern("B HH:mm:ss", "HH:mm:ss");
+ assertTransformIcuDateTimePattern("dd-MM-yy B HH:mm:ss", "dd-MM-yy HH:mm:ss");
+ assertTransformIcuDateTimePattern("y၊ MMM d B HH:mm:ss", "y၊ MMM d HH:mm:ss");
+
+ // Other examples
+ assertTransformIcuDateTimePattern("H:mm B", "H:mm");
+ assertTransformIcuDateTimePattern("H:mm b", "H:mm");
+ assertTransformIcuDateTimePattern("b H:mm", "H:mm");
+ assertTransformIcuDateTimePattern("B H:mm:ss, E", "H:mm:ss, E");
+
+ // Examples with no effect
+ assertTransformIcuDateTimePattern("hh:mm b", "hh:mm b"); // No change for 12-hour format
+ assertTransformIcuDateTimePattern("hh:mm B", "hh:mm B"); // No change for 12-hour format
+ assertTransformIcuDateTimePattern("B h:mm:ss, E", "B h:mm:ss, E");
+ // No change when no hour is specified
+ assertTransformIcuDateTimePattern("dd-MM-yy B", "dd-MM-yy B");
+ }
+
+ private static void assertTransformIcuDateTimePattern(String input, String expectedOutput) {
+ String pattern = ICU.transformIcuDateTimePattern_forJavaTime(input);
+ assertEquals("input:" + input, expectedOutput, pattern);
+ }
+
public void testSetDefault() {
String current = ICU.getDefaultLocale();
diff --git a/ojluni/src/main/java/java/time/format/DateTimeFormatterBuilder.java b/ojluni/src/main/java/java/time/format/DateTimeFormatterBuilder.java
index f24f42a..1323c3b 100644
--- a/ojluni/src/main/java/java/time/format/DateTimeFormatterBuilder.java
+++ b/ojluni/src/main/java/java/time/format/DateTimeFormatterBuilder.java
@@ -77,6 +77,8 @@
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
import static java.time.temporal.ChronoField.YEAR;
+import libcore.icu.ICU;
+
import java.lang.ref.SoftReference;
import java.math.BigDecimal;
import java.math.BigInteger;
@@ -209,7 +211,7 @@
throw new IllegalArgumentException("Either dateStyle or timeStyle must be non-null");
}
- // Android-changed: get format string from ICU.
+ // BEGIN Android-changed: get format string from ICU.
// LocaleResources lr = LocaleProviderAdapter.getResourceBundleBased()
// .getLocaleResources(locale);
// String pattern = lr.getJavaTimeDateTimePattern(
@@ -217,6 +219,11 @@
String pattern = Calendar.getDateTimeFormatString(
ULocale.forLocale(locale), chrono.getCalendarType(),
convertStyle(dateStyle), convertStyle(timeStyle));
+ // Transform the pattern coming from ICU because DateTimeFormatter does not handle some date
+ // symbols, e.g. 'B' / 'b', and thus we use a heuristic algorithm to remove the symbol.
+ // See http://b/174804526.
+ pattern = ICU.transformIcuDateTimePattern_forJavaTime(pattern);
+ // END Android-changed: get format string from ICU.
return pattern;
}