Fix string compression, add tests. Fix String.compareTo() for interpreter; memcmp() does not return the required result (only the right sign). Fix x86-64 stub where the assembler silently accepted and generated bad code for out-of-range JECXZ. Add extensive tests for String.equals(), String.compareTo() and String.indexOf(). Bug: 31040547 Test: Run ART test suite including interpreter tests on host and Nexus 9. Test: Ditto with string compression enabled. Change-Id: I21b7a74da8a577c8fbaf8d9225f048550236d414

commit: 9c9883bb03de05783d3e24dfc00dee4d03786fe3 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Mon Oct 17 14:45:29 2016 +0100
committer: Vladimir Marko <vmarko@google.com> Wed Oct 19 10:47:51 2016 +0100
tree: d765d341d46ad88c9bb8effdcfad9493dc9adb27
parent: fc7798ba8201ae44c5c11384232be7c51afd3727 [diff]
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 54e52e5..c3321e1 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S

@@ -2117,7 +2117,7 @@
     mov     %r8d, %ecx
     cmovg   %r9d, %ecx
     /* Going into loop to compare each character */
-    jecxz   .Lstring_compareto_keep_length      // check loop counter (if 0 then stop)
+    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
 .Lstring_compareto_loop_comparison_this_compressed:
     movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
     movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
@@ -2126,6 +2126,7 @@
     subl    %r9d, %r8d
     loope   .Lstring_compareto_loop_comparison_this_compressed
     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length1:
     ret
 .Lstring_compareto_that_is_compressed:
     andl    LITERAL(0x7FFFFFFF), %r9d
@@ -2134,7 +2135,7 @@
     mov     %r8d, %ecx
     cmovg   %r9d, %ecx
     /* Comparison this (8-bit) and that (16-bit) */
-    jecxz   .Lstring_compareto_keep_length      // check loop counter (if 0, don't compare)
+    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
 .Lstring_compareto_loop_comparison_that_compressed:
     movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
     movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
@@ -2143,6 +2144,7 @@
     subl    %r9d, %r8d
     loope   .Lstring_compareto_loop_comparison_that_compressed
     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length2:
     ret
 .Lstring_compareto_both_compressed:
     andl    LITERAL(0x7FFFFFFF), %r9d
@@ -2151,9 +2153,9 @@
     movl    %r8d, %eax
     subl    %r9d, %eax
     cmovg   %r9d, %ecx
-    jecxz   .Lstring_compareto_keep_length
+    jecxz   .Lstring_compareto_keep_length3
     repe    cmpsb
-    je      .Lstring_compareto_keep_length
+    je      .Lstring_compareto_keep_length3
     movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
     movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
     jmp     .Lstring_compareto_count_difference
@@ -2171,14 +2173,14 @@
      *   esi: pointer to comp string data
      *   edi: pointer to this string data
      */
-    jecxz .Lstring_compareto_keep_length
+    jecxz .Lstring_compareto_keep_length3
     repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
-    je    .Lstring_compareto_keep_length
+    je    .Lstring_compareto_keep_length3
     movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
     movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
 .Lstring_compareto_count_difference:
     subl  %ecx, %eax              // return the difference
-.Lstring_compareto_keep_length:
+.Lstring_compareto_keep_length3:
     ret
 END_FUNCTION art_quick_string_compareto
 

diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index ed1103f..d60274f 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc

@@ -292,38 +292,41 @@
   if (lhs == rhs) {
     return 0;
   }
-  // TODO: is this still true?
-  // The annoying part here is that 0x00e9 - 0xffff != 0x00ea,
-  // because the interpreter converts the characters to 32-bit integers
-  // *without* sign extension before it subtracts them (which makes some
-  // sense since "char" is unsigned).  So what we get is the result of
-  // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
-  int32_t lhsCount = lhs->GetLength();
-  int32_t rhsCount = rhs->GetLength();
-  int32_t countDiff = lhsCount - rhsCount;
-  int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
+  int32_t lhs_count = lhs->GetLength();
+  int32_t rhs_count = rhs->GetLength();
+  int32_t count_diff = lhs_count - rhs_count;
+  int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
   if (lhs->IsCompressed() && rhs->IsCompressed()) {
-    int32_t comparison = memcmp(lhs->GetValueCompressed(),
-                                rhs->GetValueCompressed(),
-                                minCount * sizeof(uint8_t));
-    if (comparison != 0) {
-      return comparison;
+    const uint8_t* lhs_chars = lhs->GetValueCompressed();
+    const uint8_t* rhs_chars = rhs->GetValueCompressed();
+    for (int32_t i = 0; i < min_count; ++i) {
+      int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
+      if (char_diff != 0) {
+        return char_diff;
+      }
     }
   } else if (lhs->IsCompressed() || rhs->IsCompressed()) {
-    for (int32_t i = 0; i < minCount; ++i) {
-      if (lhs->CharAt(i) != rhs->CharAt(i)) {
-        return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i));
+    const uint8_t* compressed_chars =
+        lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
+    const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
+    for (int32_t i = 0; i < min_count; ++i) {
+      int32_t char_diff =
+          static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
+      if (char_diff != 0) {
+        return lhs->IsCompressed() ? char_diff : -char_diff;
       }
     }
   } else {
-    const uint16_t* lhsChars = lhs->GetValue();
-    const uint16_t* rhsChars = rhs->GetValue();
-    int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
-    if (otherRes != 0) {
-      return otherRes;
+    const uint16_t* lhs_chars = lhs->GetValue();
+    const uint16_t* rhs_chars = rhs->GetValue();
+    // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
+    // where memcmp() only guarantees that the returned value has the same sign.
+    int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
+    if (char_diff != 0) {
+      return char_diff;
     }
   }
-  return countDiff;
+  return count_diff;
 }
 
 void String::VisitRoots(RootVisitor* visitor) {

diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index d1ea0b1..a848fba 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java

@@ -89,5 +89,424 @@
         Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
         String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
         System.out.println(result);
+
+        testCompareToAndEquals();
+        testIndexOf();
     }
+
+    public static void testCompareToAndEquals() {
+        String[] strings = {
+                // Special: empty string.
+                "",
+                // Category 0, ASCII strings:
+                //     "0123456789abcdef".substring(0, index + 1)
+                "0",
+                "01",
+                "012",
+                "0123",
+                "01234",
+                "012345",
+                "0123456",
+                "01234567",
+                "012345678",
+                "0123456789",
+                "0123456789a",
+                "0123456789ab",
+                "0123456789abc",
+                "0123456789abcd",
+                "0123456789abcde",
+                "0123456789abcdef",
+                // Category 1, ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "x"
+                "x",
+                "0x",
+                "01x",
+                "012x",
+                "0123x",
+                "01234x",
+                "012345x",
+                "0123456x",
+                "01234567x",
+                "012345678x",
+                "0123456789x",
+                "0123456789ax",
+                "0123456789abx",
+                "0123456789abcx",
+                "0123456789abcdx",
+                "0123456789abcdex",
+                // Category 2, ASCII strings,
+                //     "0123456789abcdef".substring(0, index) + "x" +
+                //     "0123456789abcdef".substring(index + 1)
+                "x123456789abcdef",
+                "0x23456789abcdef",
+                "01x3456789abcdef",
+                "012x456789abcdef",
+                "0123x56789abcdef",
+                "01234x6789abcdef",
+                "012345x789abcdef",
+                "0123456x89abcdef",
+                "01234567x9abcdef",
+                "012345678xabcdef",
+                "0123456789xbcdef",
+                "0123456789axcdef",
+                "0123456789abxdef",
+                "0123456789abcxef",
+                "0123456789abcdxf",
+                "0123456789abcdex",
+                // Category 3, ASCII strings:
+                //     "z" + "0123456789abcdef".substring(1, index + 1)
+                "z",
+                "z1",
+                "z12",
+                "z123",
+                "z1234",
+                "z12345",
+                "z123456",
+                "z1234567",
+                "z12345678",
+                "z123456789",
+                "z123456789a",
+                "z123456789ab",
+                "z123456789abc",
+                "z123456789abcd",
+                "z123456789abcde",
+                "z123456789abcdef",
+                // Category 4, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0440"
+                "\u0440",
+                "0\u0440",
+                "01\u0440",
+                "012\u0440",
+                "0123\u0440",
+                "01234\u0440",
+                "012345\u0440",
+                "0123456\u0440",
+                "01234567\u0440",
+                "012345678\u0440",
+                "0123456789\u0440",
+                "0123456789a\u0440",
+                "0123456789ab\u0440",
+                "0123456789abc\u0440",
+                "0123456789abcd\u0440",
+                "0123456789abcde\u0440",
+                // Category 5, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0440" +
+                //     "0123456789abcdef".substring(index + 1)
+                "\u0440123456789abcdef",
+                "0\u044023456789abcdef",
+                "01\u04403456789abcdef",
+                "012\u0440456789abcdef",
+                "0123\u044056789abcdef",
+                "01234\u04406789abcdef",
+                "012345\u0440789abcdef",
+                "0123456\u044089abcdef",
+                "01234567\u04409abcdef",
+                "012345678\u0440abcdef",
+                "0123456789\u0440bcdef",
+                "0123456789a\u0440cdef",
+                "0123456789ab\u0440def",
+                "0123456789abc\u0440ef",
+                "0123456789abcd\u0440f",
+                "0123456789abcde\u0440",
+                // Category 6, ASCII strings:
+                //     "\u0443" + "0123456789abcdef".substring(1, index + 1)
+                "\u0443",
+                "\u04431",
+                "\u044312",
+                "\u0443123",
+                "\u04431234",
+                "\u044312345",
+                "\u0443123456",
+                "\u04431234567",
+                "\u044312345678",
+                "\u0443123456789",
+                "\u0443123456789a",
+                "\u0443123456789ab",
+                "\u0443123456789abc",
+                "\u0443123456789abcd",
+                "\u0443123456789abcde",
+                "\u0443123456789abcdef",
+                // Category 7, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0482"
+                "\u0482",
+                "0\u0482",
+                "01\u0482",
+                "012\u0482",
+                "0123\u0482",
+                "01234\u0482",
+                "012345\u0482",
+                "0123456\u0482",
+                "01234567\u0482",
+                "012345678\u0482",
+                "0123456789\u0482",
+                "0123456789a\u0482",
+                "0123456789ab\u0482",
+                "0123456789abc\u0482",
+                "0123456789abcd\u0482",
+                "0123456789abcde\u0482",
+                // Category 8, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0482" +
+                //     "0123456789abcdef".substring(index + 1)
+                "\u0482123456789abcdef",
+                "0\u048223456789abcdef",
+                "01\u04823456789abcdef",
+                "012\u0482456789abcdef",
+                "0123\u048256789abcdef",
+                "01234\u04826789abcdef",
+                "012345\u0482789abcdef",
+                "0123456\u048289abcdef",
+                "01234567\u04829abcdef",
+                "012345678\u0482abcdef",
+                "0123456789\u0482bcdef",
+                "0123456789a\u0482cdef",
+                "0123456789ab\u0482def",
+                "0123456789abc\u0482ef",
+                "0123456789abcd\u0482f",
+                "0123456789abcde\u0482",
+                // Category 9, ASCII strings:
+                //     "\u0489" + "0123456789abcdef".substring(1, index + 1)
+                "\u0489",
+                "\u04891",
+                "\u048912",
+                "\u0489123",
+                "\u04891234",
+                "\u048912345",
+                "\u0489123456",
+                "\u04891234567",
+                "\u048912345678",
+                "\u0489123456789",
+                "\u0489123456789a",
+                "\u0489123456789ab",
+                "\u0489123456789abc",
+                "\u0489123456789abcd",
+                "\u0489123456789abcde",
+                "\u0489123456789abcdef",
+        };
+        int length = strings.length;
+        Assert.assertEquals(1 + 16 * 10, length);
+        for (int i = 0; i != length; ++i) {
+            String lhs = strings[i];
+            for (int j = 0; j != length; ++j) {
+                String rhs = strings[j];
+                int result = $noinline$compareTo(lhs, rhs);
+                final int expected;
+                if (i == 0 || j == 0 || i == j) {
+                    // One of the strings is empty or the strings are the same.
+                    expected = lhs.length() - rhs.length();
+                } else {
+                    int i_category = (i - 1) / 16;
+                    int i_index = (i - 1) % 16;
+                    int j_category = (j - 1) / 16;
+                    int j_index = (j - 1) % 16;
+                    int min_ij_index = (i_index < j_index) ? i_index : j_index;
+                    if (i_category == j_category) {
+                        switch (i_category) {
+                            case 0: case 3: case 6: case 9:
+                                // Differs in length.
+                                expected = lhs.length() - rhs.length();
+                                break;
+                            case 1: case 2: case 4: case 5: case 7: case 8:
+                                // Differs in charAt(min_ij_index).
+                                expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+                                break;
+                            default: throw new Error("Unexpected category.");
+                      }
+                    } else if (i_category == 3 || i_category == 6 || i_category == 9 ||
+                               j_category == 3 || j_category == 6 || j_category == 9) {
+                        // In these categories, charAt(0) differs from other categories' strings.
+                        expected = lhs.charAt(0) - rhs.charAt(0);
+                    } else if (// Category 0 string is a prefix to any longer string in
+                               // remaining categories.
+                               (i_category == 0 && i_index < j_index) ||
+                               (j_category == 0 && j_index < i_index) ||
+                               // Category 2 string is a prefix to category 3 string at the same
+                               // index. Similar for categories 4 and 5 and also 7 and 8.
+                               // This includes matching last strings of these pairs of categories.
+                               (i_index == j_index &&
+                                   ((i_category == 1 && j_category == 2) ||
+                                    (i_category == 2 && j_category == 1) ||
+                                    (i_category == 4 && j_category == 5) ||
+                                    (i_category == 5 && j_category == 4) ||
+                                    (i_category == 7 && j_category == 8) ||
+                                    (i_category == 8 && j_category == 7)))) {
+                        // Differs in length.
+                        expected = lhs.length() - rhs.length();
+                    } else {
+                        // The remaining cases differ in charAt(min_ij_index), the characters
+                        // before that are "0123456789abcdef".substring(0, min_ij_index).
+                        for (int k = 0; k < min_ij_index; ++k) {
+                          Assert.assertEquals("0123456789abcdef".charAt(k), lhs.charAt(k));
+                          Assert.assertEquals("0123456789abcdef".charAt(k), rhs.charAt(k));
+                        }
+                        expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+                        Assert.assertFalse(expected == 0);
+                    }
+                }
+                if (expected != result) {
+                  throw new Error(
+                      "Mismatch at i=" + i + ", j=" + j + ", expected=" + expected +
+                      ", result=" + result);
+                }
+                boolean equalsExpected =
+                    (i == j) ||
+                    // Last string in categories 1 and 2.
+                    (i == 32 && j == 48) || (i == 48 && j == 32) ||
+                    // Last string in categories 4 and 5.
+                    (i == 80 && j == 96) || (i == 96 && j == 80) ||
+                    // Last string in categories 7 and 8.
+                    (i == 128 && j == 144) || (i == 144 && j == 128);
+                Assert.assertEquals(equalsExpected, $noinline$equals(lhs, rhs));
+            }
+        }
+
+        try {
+            $noinline$compareTo("", null);
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+        try {
+            $noinline$compareTo(null, "");
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+
+        Assert.assertFalse($noinline$equals("", null));
+        try {
+            $noinline$equals(null, "");
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+    }
+
+    public static void testIndexOf() {
+        String[] prefixes = {
+                "",
+                "0",
+                "01",
+                "012",
+                "0123",
+                "01234",
+                "012345",
+                "0123456",
+                "01234567",
+                "012345678",
+                "0123456789",
+                "0123456789a",
+                "0123456789ab",
+                "0123456789abc",
+                "0123456789abcd",
+                "0123456789abcdef",
+        };
+        String[] cores = {
+                "",
+                "x",
+                "xx",
+                "xxx",
+                "xxxx",
+                "xxxxx",
+                "xxxxxx",
+                "xxxxxxx",
+                "xxxxxxxx",
+                "xzx",
+                "xxzx",
+                "xxxzx",
+                "xxxxzx",
+                "xxxxxzx",
+                "xxxxxxzx",
+                "xxxxxxxzx",
+                "xxxxxxxxzx",
+                "\u0440",
+                "\u0440\u0440",
+                "\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440z\u0440",
+                "\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+        };
+        String[] suffixes = {
+                "",
+                "y",
+                "yy",
+                "yyy",
+                "yyyy",
+                "yyyyy",
+                "yyyyyy",
+                "yyyyyyy",
+                "yyyyyyyy",
+                "\u0441",
+                "y\u0441",
+                "yy\u0441",
+                "yyy\u0441",
+                "yyyy\u0441",
+                "yyyyy\u0441",
+                "yyyyyy\u0441",
+                "yyyyyyy\u0441",
+                "yyyyyyyy\u0441",
+        };
+        for (String p : prefixes) {
+            for (String c : cores) {
+                for (String s : suffixes) {
+                    String full = p + c + s;
+                    int expX = (c.isEmpty() || c.charAt(0) != 'x') ? -1 : p.length();
+                    int exp0440 = (c.isEmpty() || c.charAt(0) != '\u0440') ? -1 : p.length();
+                    Assert.assertEquals(expX, $noinline$indexOf(full, 'x'));
+                    Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440'));
+                    Assert.assertEquals(expX, $noinline$indexOf(full, 'x', -1));
+                    Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440', -1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, 'x', full.length() + 1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, '\u0440', full.length() + 1));
+                    for (int from = 0; from != full.length(); ++from) {
+                        final int eX;
+                        final int e0440;
+                        if (from <= p.length()) {
+                            eX = expX;
+                            e0440 = exp0440;
+                        } else if (from >= p.length() + c.length()) {
+                            eX = -1;
+                            e0440 = -1;
+                        } else if (full.charAt(from) == 'z') {
+                            eX = (full.charAt(from + 1) != 'x') ? -1 : from + 1;
+                            e0440 = (full.charAt(from + 1) != '\u0440') ? -1 : from + 1;
+                        } else {
+                            eX = (full.charAt(from) != 'x') ? -1 : from;
+                            e0440 = (full.charAt(from) != '\u0440') ? -1 : from;
+                        }
+                        Assert.assertEquals(eX, $noinline$indexOf(full, 'x', from));
+                        Assert.assertEquals(e0440, $noinline$indexOf(full, '\u0440', from));
+                    }
+                }
+            }
+        }
+    }
+
+    public static int $noinline$compareTo(String lhs, String rhs) {
+        if (doThrow) { throw new Error(); }
+        return lhs.compareTo(rhs);
+    }
+
+    public static boolean $noinline$equals(String lhs, String rhs) {
+        if (doThrow) { throw new Error(); }
+        return lhs.equals(rhs);
+    }
+
+    public static int $noinline$indexOf(String lhs, int ch) {
+        if (doThrow) { throw new Error(); }
+        return lhs.indexOf(ch);
+    }
+
+    public static int $noinline$indexOf(String lhs, int ch, int fromIndex) {
+        if (doThrow) { throw new Error(); }
+        return lhs.indexOf(ch, fromIndex);
+    }
+
+    public static boolean doThrow = false;
 }
commit	9c9883bb03de05783d3e24dfc00dee4d03786fe3	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Mon Oct 17 14:45:29 2016 +0100
committer	Vladimir Marko <vmarko@google.com>	Wed Oct 19 10:47:51 2016 +0100
tree	d765d341d46ad88c9bb8effdcfad9493dc9adb27
parent	fc7798ba8201ae44c5c11384232be7c51afd3727 [diff]