Fix string compression, add tests.
Fix String.compareTo() for interpreter; memcmp() does not
return the required result (only the right sign).
Fix x86-64 stub where the assembler silently accepted and
generated bad code for out-of-range JECXZ.
Add extensive tests for String.equals(), String.compareTo()
and String.indexOf().
Bug: 31040547
Test: Run ART test suite including interpreter tests on host and Nexus 9.
Test: Ditto with string compression enabled.
Change-Id: I21b7a74da8a577c8fbaf8d9225f048550236d414
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 54e52e5..c3321e1 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2117,7 +2117,7 @@
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Going into loop to compare each character */
- jecxz .Lstring_compareto_keep_length // check loop counter (if 0 then stop)
+ jecxz .Lstring_compareto_keep_length1 // check loop counter (if 0 then stop)
.Lstring_compareto_loop_comparison_this_compressed:
movzbl (%edi), %r8d // move *(this_cur_char) byte to long
movzwl (%esi), %r9d // move *(that_cur_char) word to long
@@ -2126,6 +2126,7 @@
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_this_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length1:
ret
.Lstring_compareto_that_is_compressed:
andl LITERAL(0x7FFFFFFF), %r9d
@@ -2134,7 +2135,7 @@
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Comparison this (8-bit) and that (16-bit) */
- jecxz .Lstring_compareto_keep_length // check loop counter (if 0, don't compare)
+ jecxz .Lstring_compareto_keep_length2 // check loop counter (if 0, don't compare)
.Lstring_compareto_loop_comparison_that_compressed:
movzwl (%edi), %r8d // move *(this_cur_char) word to long
movzbl (%esi), %r9d // move *(that_cur_chat) byte to long
@@ -2143,6 +2144,7 @@
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_that_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length2:
ret
.Lstring_compareto_both_compressed:
andl LITERAL(0x7FFFFFFF), %r9d
@@ -2151,9 +2153,9 @@
movl %r8d, %eax
subl %r9d, %eax
cmovg %r9d, %ecx
- jecxz .Lstring_compareto_keep_length
+ jecxz .Lstring_compareto_keep_length3
repe cmpsb
- je .Lstring_compareto_keep_length
+ je .Lstring_compareto_keep_length3
movzbl -1(%edi), %eax // get last compared char from this string (8-bit)
movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit)
jmp .Lstring_compareto_count_difference
@@ -2171,14 +2173,14 @@
* esi: pointer to comp string data
* edi: pointer to this string data
*/
- jecxz .Lstring_compareto_keep_length
+ jecxz .Lstring_compareto_keep_length3
repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
- je .Lstring_compareto_keep_length
+ je .Lstring_compareto_keep_length3
movzwl -2(%edi), %eax // get last compared char from this string (16-bit)
movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit)
.Lstring_compareto_count_difference:
subl %ecx, %eax // return the difference
-.Lstring_compareto_keep_length:
+.Lstring_compareto_keep_length3:
ret
END_FUNCTION art_quick_string_compareto
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index ed1103f..d60274f 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -292,38 +292,41 @@
if (lhs == rhs) {
return 0;
}
- // TODO: is this still true?
- // The annoying part here is that 0x00e9 - 0xffff != 0x00ea,
- // because the interpreter converts the characters to 32-bit integers
- // *without* sign extension before it subtracts them (which makes some
- // sense since "char" is unsigned). So what we get is the result of
- // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
- int32_t lhsCount = lhs->GetLength();
- int32_t rhsCount = rhs->GetLength();
- int32_t countDiff = lhsCount - rhsCount;
- int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
+ int32_t lhs_count = lhs->GetLength();
+ int32_t rhs_count = rhs->GetLength();
+ int32_t count_diff = lhs_count - rhs_count;
+ int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
if (lhs->IsCompressed() && rhs->IsCompressed()) {
- int32_t comparison = memcmp(lhs->GetValueCompressed(),
- rhs->GetValueCompressed(),
- minCount * sizeof(uint8_t));
- if (comparison != 0) {
- return comparison;
+ const uint8_t* lhs_chars = lhs->GetValueCompressed();
+ const uint8_t* rhs_chars = rhs->GetValueCompressed();
+ for (int32_t i = 0; i < min_count; ++i) {
+ int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
+ if (char_diff != 0) {
+ return char_diff;
+ }
}
} else if (lhs->IsCompressed() || rhs->IsCompressed()) {
- for (int32_t i = 0; i < minCount; ++i) {
- if (lhs->CharAt(i) != rhs->CharAt(i)) {
- return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i));
+ const uint8_t* compressed_chars =
+ lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
+ const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
+ for (int32_t i = 0; i < min_count; ++i) {
+ int32_t char_diff =
+ static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
+ if (char_diff != 0) {
+ return lhs->IsCompressed() ? char_diff : -char_diff;
}
}
} else {
- const uint16_t* lhsChars = lhs->GetValue();
- const uint16_t* rhsChars = rhs->GetValue();
- int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
- if (otherRes != 0) {
- return otherRes;
+ const uint16_t* lhs_chars = lhs->GetValue();
+ const uint16_t* rhs_chars = rhs->GetValue();
+ // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
+ // where memcmp() only guarantees that the returned value has the same sign.
+ int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
+ if (char_diff != 0) {
+ return char_diff;
}
}
- return countDiff;
+ return count_diff;
}
void String::VisitRoots(RootVisitor* visitor) {
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index d1ea0b1..a848fba 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -89,5 +89,424 @@
Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
System.out.println(result);
+
+ testCompareToAndEquals();
+ testIndexOf();
}
+
+ public static void testCompareToAndEquals() {
+ String[] strings = {
+ // Special: empty string.
+ "",
+ // Category 0, ASCII strings:
+ // "0123456789abcdef".substring(0, index + 1)
+ "0",
+ "01",
+ "012",
+ "0123",
+ "01234",
+ "012345",
+ "0123456",
+ "01234567",
+ "012345678",
+ "0123456789",
+ "0123456789a",
+ "0123456789ab",
+ "0123456789abc",
+ "0123456789abcd",
+ "0123456789abcde",
+ "0123456789abcdef",
+ // Category 1, ASCII strings:
+ // "0123456789abcdef".substring(0, index) + "x"
+ "x",
+ "0x",
+ "01x",
+ "012x",
+ "0123x",
+ "01234x",
+ "012345x",
+ "0123456x",
+ "01234567x",
+ "012345678x",
+ "0123456789x",
+ "0123456789ax",
+ "0123456789abx",
+ "0123456789abcx",
+ "0123456789abcdx",
+ "0123456789abcdex",
+ // Category 2, ASCII strings,
+ // "0123456789abcdef".substring(0, index) + "x" +
+ // "0123456789abcdef".substring(index + 1)
+ "x123456789abcdef",
+ "0x23456789abcdef",
+ "01x3456789abcdef",
+ "012x456789abcdef",
+ "0123x56789abcdef",
+ "01234x6789abcdef",
+ "012345x789abcdef",
+ "0123456x89abcdef",
+ "01234567x9abcdef",
+ "012345678xabcdef",
+ "0123456789xbcdef",
+ "0123456789axcdef",
+ "0123456789abxdef",
+ "0123456789abcxef",
+ "0123456789abcdxf",
+ "0123456789abcdex",
+ // Category 3, ASCII strings:
+ // "z" + "0123456789abcdef".substring(1, index + 1)
+ "z",
+ "z1",
+ "z12",
+ "z123",
+ "z1234",
+ "z12345",
+ "z123456",
+ "z1234567",
+ "z12345678",
+ "z123456789",
+ "z123456789a",
+ "z123456789ab",
+ "z123456789abc",
+ "z123456789abcd",
+ "z123456789abcde",
+ "z123456789abcdef",
+ // Category 4, non-ASCII strings:
+ // "0123456789abcdef".substring(0, index) + "\u0440"
+ "\u0440",
+ "0\u0440",
+ "01\u0440",
+ "012\u0440",
+ "0123\u0440",
+ "01234\u0440",
+ "012345\u0440",
+ "0123456\u0440",
+ "01234567\u0440",
+ "012345678\u0440",
+ "0123456789\u0440",
+ "0123456789a\u0440",
+ "0123456789ab\u0440",
+ "0123456789abc\u0440",
+ "0123456789abcd\u0440",
+ "0123456789abcde\u0440",
+ // Category 5, non-ASCII strings:
+ // "0123456789abcdef".substring(0, index) + "\u0440" +
+ // "0123456789abcdef".substring(index + 1)
+ "\u0440123456789abcdef",
+ "0\u044023456789abcdef",
+ "01\u04403456789abcdef",
+ "012\u0440456789abcdef",
+ "0123\u044056789abcdef",
+ "01234\u04406789abcdef",
+ "012345\u0440789abcdef",
+ "0123456\u044089abcdef",
+ "01234567\u04409abcdef",
+ "012345678\u0440abcdef",
+ "0123456789\u0440bcdef",
+ "0123456789a\u0440cdef",
+ "0123456789ab\u0440def",
+ "0123456789abc\u0440ef",
+ "0123456789abcd\u0440f",
+ "0123456789abcde\u0440",
+ // Category 6, ASCII strings:
+ // "\u0443" + "0123456789abcdef".substring(1, index + 1)
+ "\u0443",
+ "\u04431",
+ "\u044312",
+ "\u0443123",
+ "\u04431234",
+ "\u044312345",
+ "\u0443123456",
+ "\u04431234567",
+ "\u044312345678",
+ "\u0443123456789",
+ "\u0443123456789a",
+ "\u0443123456789ab",
+ "\u0443123456789abc",
+ "\u0443123456789abcd",
+ "\u0443123456789abcde",
+ "\u0443123456789abcdef",
+ // Category 7, non-ASCII strings:
+ // "0123456789abcdef".substring(0, index) + "\u0482"
+ "\u0482",
+ "0\u0482",
+ "01\u0482",
+ "012\u0482",
+ "0123\u0482",
+ "01234\u0482",
+ "012345\u0482",
+ "0123456\u0482",
+ "01234567\u0482",
+ "012345678\u0482",
+ "0123456789\u0482",
+ "0123456789a\u0482",
+ "0123456789ab\u0482",
+ "0123456789abc\u0482",
+ "0123456789abcd\u0482",
+ "0123456789abcde\u0482",
+ // Category 8, non-ASCII strings:
+ // "0123456789abcdef".substring(0, index) + "\u0482" +
+ // "0123456789abcdef".substring(index + 1)
+ "\u0482123456789abcdef",
+ "0\u048223456789abcdef",
+ "01\u04823456789abcdef",
+ "012\u0482456789abcdef",
+ "0123\u048256789abcdef",
+ "01234\u04826789abcdef",
+ "012345\u0482789abcdef",
+ "0123456\u048289abcdef",
+ "01234567\u04829abcdef",
+ "012345678\u0482abcdef",
+ "0123456789\u0482bcdef",
+ "0123456789a\u0482cdef",
+ "0123456789ab\u0482def",
+ "0123456789abc\u0482ef",
+ "0123456789abcd\u0482f",
+ "0123456789abcde\u0482",
+ // Category 9, ASCII strings:
+ // "\u0489" + "0123456789abcdef".substring(1, index + 1)
+ "\u0489",
+ "\u04891",
+ "\u048912",
+ "\u0489123",
+ "\u04891234",
+ "\u048912345",
+ "\u0489123456",
+ "\u04891234567",
+ "\u048912345678",
+ "\u0489123456789",
+ "\u0489123456789a",
+ "\u0489123456789ab",
+ "\u0489123456789abc",
+ "\u0489123456789abcd",
+ "\u0489123456789abcde",
+ "\u0489123456789abcdef",
+ };
+ int length = strings.length;
+ Assert.assertEquals(1 + 16 * 10, length);
+ for (int i = 0; i != length; ++i) {
+ String lhs = strings[i];
+ for (int j = 0; j != length; ++j) {
+ String rhs = strings[j];
+ int result = $noinline$compareTo(lhs, rhs);
+ final int expected;
+ if (i == 0 || j == 0 || i == j) {
+ // One of the strings is empty or the strings are the same.
+ expected = lhs.length() - rhs.length();
+ } else {
+ int i_category = (i - 1) / 16;
+ int i_index = (i - 1) % 16;
+ int j_category = (j - 1) / 16;
+ int j_index = (j - 1) % 16;
+ int min_ij_index = (i_index < j_index) ? i_index : j_index;
+ if (i_category == j_category) {
+ switch (i_category) {
+ case 0: case 3: case 6: case 9:
+ // Differs in length.
+ expected = lhs.length() - rhs.length();
+ break;
+ case 1: case 2: case 4: case 5: case 7: case 8:
+ // Differs in charAt(min_ij_index).
+ expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+ break;
+ default: throw new Error("Unexpected category.");
+ }
+ } else if (i_category == 3 || i_category == 6 || i_category == 9 ||
+ j_category == 3 || j_category == 6 || j_category == 9) {
+ // In these categories, charAt(0) differs from other categories' strings.
+ expected = lhs.charAt(0) - rhs.charAt(0);
+ } else if (// Category 0 string is a prefix to any longer string in
+ // remaining categories.
+ (i_category == 0 && i_index < j_index) ||
+ (j_category == 0 && j_index < i_index) ||
+ // Category 2 string is a prefix to category 3 string at the same
+ // index. Similar for categories 4 and 5 and also 7 and 8.
+ // This includes matching last strings of these pairs of categories.
+ (i_index == j_index &&
+ ((i_category == 1 && j_category == 2) ||
+ (i_category == 2 && j_category == 1) ||
+ (i_category == 4 && j_category == 5) ||
+ (i_category == 5 && j_category == 4) ||
+ (i_category == 7 && j_category == 8) ||
+ (i_category == 8 && j_category == 7)))) {
+ // Differs in length.
+ expected = lhs.length() - rhs.length();
+ } else {
+ // The remaining cases differ in charAt(min_ij_index), the characters
+ // before that are "0123456789abcdef".substring(0, min_ij_index).
+ for (int k = 0; k < min_ij_index; ++k) {
+ Assert.assertEquals("0123456789abcdef".charAt(k), lhs.charAt(k));
+ Assert.assertEquals("0123456789abcdef".charAt(k), rhs.charAt(k));
+ }
+ expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+ Assert.assertFalse(expected == 0);
+ }
+ }
+ if (expected != result) {
+ throw new Error(
+ "Mismatch at i=" + i + ", j=" + j + ", expected=" + expected +
+ ", result=" + result);
+ }
+ boolean equalsExpected =
+ (i == j) ||
+ // Last string in categories 1 and 2.
+ (i == 32 && j == 48) || (i == 48 && j == 32) ||
+ // Last string in categories 4 and 5.
+ (i == 80 && j == 96) || (i == 96 && j == 80) ||
+ // Last string in categories 7 and 8.
+ (i == 128 && j == 144) || (i == 144 && j == 128);
+ Assert.assertEquals(equalsExpected, $noinline$equals(lhs, rhs));
+ }
+ }
+
+ try {
+ $noinline$compareTo("", null);
+ Assert.fail();
+ } catch (NullPointerException expected) {
+ }
+ try {
+ $noinline$compareTo(null, "");
+ Assert.fail();
+ } catch (NullPointerException expected) {
+ }
+
+ Assert.assertFalse($noinline$equals("", null));
+ try {
+ $noinline$equals(null, "");
+ Assert.fail();
+ } catch (NullPointerException expected) {
+ }
+ }
+
+ public static void testIndexOf() {
+ String[] prefixes = {
+ "",
+ "0",
+ "01",
+ "012",
+ "0123",
+ "01234",
+ "012345",
+ "0123456",
+ "01234567",
+ "012345678",
+ "0123456789",
+ "0123456789a",
+ "0123456789ab",
+ "0123456789abc",
+ "0123456789abcd",
+ "0123456789abcdef",
+ };
+ String[] cores = {
+ "",
+ "x",
+ "xx",
+ "xxx",
+ "xxxx",
+ "xxxxx",
+ "xxxxxx",
+ "xxxxxxx",
+ "xxxxxxxx",
+ "xzx",
+ "xxzx",
+ "xxxzx",
+ "xxxxzx",
+ "xxxxxzx",
+ "xxxxxxzx",
+ "xxxxxxxzx",
+ "xxxxxxxxzx",
+ "\u0440",
+ "\u0440\u0440",
+ "\u0440\u0440\u0440",
+ "\u0440\u0440\u0440\u0440",
+ "\u0440\u0440\u0440\u0440\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+ "\u0440z\u0440",
+ "\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+ "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+ };
+ String[] suffixes = {
+ "",
+ "y",
+ "yy",
+ "yyy",
+ "yyyy",
+ "yyyyy",
+ "yyyyyy",
+ "yyyyyyy",
+ "yyyyyyyy",
+ "\u0441",
+ "y\u0441",
+ "yy\u0441",
+ "yyy\u0441",
+ "yyyy\u0441",
+ "yyyyy\u0441",
+ "yyyyyy\u0441",
+ "yyyyyyy\u0441",
+ "yyyyyyyy\u0441",
+ };
+ for (String p : prefixes) {
+ for (String c : cores) {
+ for (String s : suffixes) {
+ String full = p + c + s;
+ int expX = (c.isEmpty() || c.charAt(0) != 'x') ? -1 : p.length();
+ int exp0440 = (c.isEmpty() || c.charAt(0) != '\u0440') ? -1 : p.length();
+ Assert.assertEquals(expX, $noinline$indexOf(full, 'x'));
+ Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440'));
+ Assert.assertEquals(expX, $noinline$indexOf(full, 'x', -1));
+ Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440', -1));
+ Assert.assertEquals(-1, $noinline$indexOf(full, 'x', full.length() + 1));
+ Assert.assertEquals(-1, $noinline$indexOf(full, '\u0440', full.length() + 1));
+ for (int from = 0; from != full.length(); ++from) {
+ final int eX;
+ final int e0440;
+ if (from <= p.length()) {
+ eX = expX;
+ e0440 = exp0440;
+ } else if (from >= p.length() + c.length()) {
+ eX = -1;
+ e0440 = -1;
+ } else if (full.charAt(from) == 'z') {
+ eX = (full.charAt(from + 1) != 'x') ? -1 : from + 1;
+ e0440 = (full.charAt(from + 1) != '\u0440') ? -1 : from + 1;
+ } else {
+ eX = (full.charAt(from) != 'x') ? -1 : from;
+ e0440 = (full.charAt(from) != '\u0440') ? -1 : from;
+ }
+ Assert.assertEquals(eX, $noinline$indexOf(full, 'x', from));
+ Assert.assertEquals(e0440, $noinline$indexOf(full, '\u0440', from));
+ }
+ }
+ }
+ }
+ }
+
+ public static int $noinline$compareTo(String lhs, String rhs) {
+ if (doThrow) { throw new Error(); }
+ return lhs.compareTo(rhs);
+ }
+
+ public static boolean $noinline$equals(String lhs, String rhs) {
+ if (doThrow) { throw new Error(); }
+ return lhs.equals(rhs);
+ }
+
+ public static int $noinline$indexOf(String lhs, int ch) {
+ if (doThrow) { throw new Error(); }
+ return lhs.indexOf(ch);
+ }
+
+ public static int $noinline$indexOf(String lhs, int ch, int fromIndex) {
+ if (doThrow) { throw new Error(); }
+ return lhs.indexOf(ch, fromIndex);
+ }
+
+ public static boolean doThrow = false;
}