ART: Optimize StringBuilder append pattern.

Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.

For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.

Boot image size for aosp_taimen-userdebug:
 - before:
   arm/boot*.oat: 19653872
   arm64/boot*.oat: 23292784
   oat/arm64/services.odex: 22408664
 - after:
   arm/boot*.oat: 19432184 (-216KiB)
   arm64/boot*.oat: 22992488 (-293KiB)
   oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.

Results for the new StringBuilderAppendBenchmark on taimen:
  timeAppendLongStrings: ~700ns -> ~200ns
  timeAppendStringAndInt: ~220ns -> ~140ns
  timeAppendStrings: ~200ns -> 130ns

Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/runtime/string_builder_append.cc b/runtime/string_builder_append.cc
new file mode 100644
index 0000000..5a34e93
--- /dev/null
+++ b/runtime/string_builder_append.cc
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "string_builder_append.h"
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "common_throws.h"
+#include "gc/heap.h"
+#include "mirror/string-alloc-inl.h"
+#include "obj_ptr-inl.h"
+#include "runtime.h"
+
+namespace art {
+
+class StringBuilderAppend::Builder {
+ public:
+  Builder(uint32_t format, const uint32_t* args, Thread* self)
+      : format_(format),
+        args_(args),
+        hs_(self) {}
+
+  int32_t CalculateLengthWithFlag() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void operator()(ObjPtr<mirror::Object> obj, size_t usable_size) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  static size_t Uint64Length(uint64_t value);
+
+  static size_t Int64Length(int64_t value) {
+    uint64_t v = static_cast<uint64_t>(value);
+    return (value >= 0) ? Uint64Length(v) : 1u + Uint64Length(-v);
+  }
+
+  static size_t RemainingSpace(ObjPtr<mirror::String> new_string, const uint8_t* data)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(new_string->IsCompressed());
+    DCHECK_GE(new_string->GetLength(), data - new_string->GetValueCompressed());
+    return new_string->GetLength() - (data - new_string->GetValueCompressed());
+  }
+
+  static size_t RemainingSpace(ObjPtr<mirror::String> new_string, const uint16_t* data)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(!new_string->IsCompressed());
+    DCHECK_GE(new_string->GetLength(), data - new_string->GetValue());
+    return new_string->GetLength() - (data - new_string->GetValue());
+  }
+
+  template <typename CharType, size_t size>
+  static CharType* AppendLiteral(ObjPtr<mirror::String> new_string,
+                                 CharType* data,
+                                 const char (&literal)[size]) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  template <typename CharType>
+  static CharType* AppendString(ObjPtr<mirror::String> new_string,
+                                CharType* data,
+                                ObjPtr<mirror::String> str) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  template <typename CharType>
+  static CharType* AppendInt64(ObjPtr<mirror::String> new_string,
+                               CharType* data,
+                               int64_t value) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  template <typename CharType>
+  void StoreData(ObjPtr<mirror::String> new_string, CharType* data) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static constexpr char kNull[] = "null";
+  static constexpr size_t kNullLength = sizeof(kNull) - 1u;
+  static constexpr char kTrue[] = "true";
+  static constexpr size_t kTrueLength = sizeof(kTrue) - 1u;
+  static constexpr char kFalse[] = "false";
+  static constexpr size_t kFalseLength = sizeof(kFalse) - 1u;
+
+  // The format and arguments to append.
+  const uint32_t format_;
+  const uint32_t* const args_;
+
+  // References are moved to the handle scope during CalculateLengthWithFlag().
+  StackHandleScope<kMaxArgs> hs_;
+
+  // The length and flag to store when the AppendBuilder is used as a pre-fence visitor.
+  int32_t length_with_flag_ = 0u;
+};
+
+inline size_t StringBuilderAppend::Builder::Uint64Length(uint64_t value)  {
+  if (value == 0u) {
+    return 1u;
+  }
+  // Calculate floor(log2(value)).
+  size_t log2_value = BitSizeOf<uint64_t>() - 1u - CLZ(value);
+  // Calculate an estimate of floor(log10(value)).
+  //   log10(2) = 0.301029996 > 0.296875 = 19/64
+  //   floor(log10(v)) == floor(log2(v) * log10(2))
+  //                   >= floor(log2(v) * 19/64)
+  //                   >= floor(floor(log2(v)) * 19/64)
+  // This estimate is no more that one off from the actual value because log2(value) < 64 and thus
+  //   log2(v) * log10(2) - log2(v) * 19/64 < 64*(log10(2) - 19/64)
+  // for the first approximation and
+  //   log2(v) * 19/64 - floor(log2(v)) * 19/64 < 19/64
+  // for the second one. Together,
+  //   64*(log10(2) - 19/64) + 19/64 = 0.56278 < 1 .
+  size_t log10_value_estimate = log2_value * 19u / 64u;
+  static constexpr uint64_t bounds[] = {
+      UINT64_C(9),
+      UINT64_C(99),
+      UINT64_C(999),
+      UINT64_C(9999),
+      UINT64_C(99999),
+      UINT64_C(999999),
+      UINT64_C(9999999),
+      UINT64_C(99999999),
+      UINT64_C(999999999),
+      UINT64_C(9999999999),
+      UINT64_C(99999999999),
+      UINT64_C(999999999999),
+      UINT64_C(9999999999999),
+      UINT64_C(99999999999999),
+      UINT64_C(999999999999999),
+      UINT64_C(9999999999999999),
+      UINT64_C(99999999999999999),
+      UINT64_C(999999999999999999),
+      UINT64_C(9999999999999999999),
+  };
+  // Add 1 for the lowest digit, add another 1 if the estimate was too low.
+  DCHECK_LT(log10_value_estimate, std::size(bounds));
+  size_t adjustment = (value > bounds[log10_value_estimate]) ? 2u : 1u;
+  return log10_value_estimate + adjustment;
+}
+
+template <typename CharType, size_t size>
+inline CharType* StringBuilderAppend::Builder::AppendLiteral(ObjPtr<mirror::String> new_string,
+                                                             CharType* data,
+                                                             const char (&literal)[size]) {
+  static_assert(size >= 2, "We need something to append.");
+
+  // Literals are zero-terminated.
+  constexpr size_t length = size - 1u;
+  DCHECK_EQ(literal[length], '\0');
+
+  DCHECK_LE(length, RemainingSpace(new_string, data));
+  for (size_t i = 0; i != length; ++i) {
+    data[i] = literal[i];
+  }
+  return data + length;
+}
+
+template <typename CharType>
+inline CharType* StringBuilderAppend::Builder::AppendString(ObjPtr<mirror::String> new_string,
+                                                            CharType* data,
+                                                            ObjPtr<mirror::String> str) {
+  size_t length = dchecked_integral_cast<size_t>(str->GetLength());
+  DCHECK_LE(length, RemainingSpace(new_string, data));
+  if (sizeof(CharType) == sizeof(uint8_t) || str->IsCompressed()) {
+    DCHECK(str->IsCompressed());
+    const uint8_t* value = str->GetValueCompressed();
+    for (size_t i = 0; i != length; ++i) {
+      data[i] = value[i];
+    }
+  } else {
+    const uint16_t* value = str->GetValue();
+    for (size_t i = 0; i != length; ++i) {
+      data[i] = dchecked_integral_cast<CharType>(value[i]);
+    }
+  }
+  return data + length;
+}
+
+template <typename CharType>
+inline CharType* StringBuilderAppend::Builder::AppendInt64(ObjPtr<mirror::String> new_string,
+                                                           CharType* data,
+                                                           int64_t value) {
+  DCHECK_GE(RemainingSpace(new_string, data), Int64Length(value));
+  uint64_t v = static_cast<uint64_t>(value);
+  if (value < 0) {
+    *data = '-';
+    ++data;
+    v = -v;
+  }
+  size_t length = Uint64Length(v);
+  // Write the digits from the end, do not write the most significant digit
+  // in the loop to avoid an unnecessary division.
+  for (size_t i = 1; i != length; ++i) {
+    uint64_t digit = v % UINT64_C(10);
+    v /= UINT64_C(10);
+    data[length - i] = '0' + static_cast<char>(digit);
+  }
+  DCHECK_LE(v, 10u);
+  *data = '0' + static_cast<char>(v);
+  return data + length;
+}
+
+inline int32_t StringBuilderAppend::Builder::CalculateLengthWithFlag() {
+  static_assert(static_cast<size_t>(Argument::kEnd) == 0u, "kEnd must be 0.");
+  bool compressible = mirror::kUseStringCompression;
+  uint64_t length = 0u;
+  const uint32_t* current_arg = args_;
+  for (uint32_t f = format_; f != 0u; f >>= kBitsPerArg) {
+    DCHECK_LE(f & kArgMask, static_cast<uint32_t>(Argument::kLast));
+    switch (static_cast<Argument>(f & kArgMask)) {
+      case Argument::kString: {
+        Handle<mirror::String> str =
+            hs_.NewHandle(reinterpret_cast32<mirror::String*>(*current_arg));
+        if (str != nullptr) {
+          length += str->GetLength();
+          compressible = compressible && str->IsCompressed();
+        } else {
+          length += kNullLength;
+        }
+        break;
+      }
+      case Argument::kBoolean: {
+        length += (*current_arg != 0u) ? kTrueLength : kFalseLength;
+        break;
+      }
+      case Argument::kChar: {
+        length += 1u;
+        compressible = compressible &&
+            mirror::String::IsASCII(reinterpret_cast<const uint16_t*>(current_arg)[0]);
+        break;
+      }
+      case Argument::kInt: {
+        length += Int64Length(static_cast<int32_t>(*current_arg));
+        break;
+      }
+      case Argument::kLong: {
+        current_arg = AlignUp(current_arg, sizeof(int64_t));
+        length += Int64Length(*reinterpret_cast<const int64_t*>(current_arg));
+        ++current_arg;  // Skip the low word, let the common code skip the high word.
+        break;
+      }
+
+      case Argument::kStringBuilder:
+      case Argument::kCharArray:
+      case Argument::kObject:
+      case Argument::kFloat:
+      case Argument::kDouble:
+        LOG(FATAL) << "Unimplemented arg format: 0x" << std::hex
+            << (f & kArgMask) << " full format: 0x" << std::hex << format_;
+        UNREACHABLE();
+      default:
+        LOG(FATAL) << "Unexpected arg format: 0x" << std::hex
+            << (f & kArgMask) << " full format: 0x" << std::hex << format_;
+        UNREACHABLE();
+    }
+    ++current_arg;
+    DCHECK_LE(hs_.NumberOfReferences(), kMaxArgs);
+  }
+
+  if (length > std::numeric_limits<int32_t>::max()) {
+    // We cannot allocate memory for the entire result.
+    hs_.Self()->ThrowNewException("Ljava/lang/OutOfMemoryError;",
+                                  "Out of memory for StringBuilder append.");
+    return -1;
+  }
+
+  length_with_flag_ = mirror::String::GetFlaggedCount(length, compressible);
+  return length_with_flag_;
+}
+
+template <typename CharType>
+inline void StringBuilderAppend::Builder::StoreData(ObjPtr<mirror::String> new_string,
+                                                    CharType* data) const {
+  size_t handle_index = 0u;
+  const uint32_t* current_arg = args_;
+  for (uint32_t f = format_; f != 0u; f >>= kBitsPerArg) {
+    DCHECK_LE(f & kArgMask, static_cast<uint32_t>(Argument::kLast));
+    switch (static_cast<Argument>(f & kArgMask)) {
+      case Argument::kString: {
+        ObjPtr<mirror::String> str =
+            ObjPtr<mirror::String>::DownCast(hs_.GetReference(handle_index));
+        ++handle_index;
+        if (str != nullptr) {
+          data = AppendString(new_string, data, str);
+        } else {
+          data = AppendLiteral(new_string, data, kNull);
+        }
+        break;
+      }
+      case Argument::kBoolean: {
+        if (*current_arg != 0u) {
+          data = AppendLiteral(new_string, data, kTrue);
+        } else {
+          data = AppendLiteral(new_string, data, kFalse);
+        }
+        break;
+      }
+      case Argument::kChar: {
+        DCHECK_GE(RemainingSpace(new_string, data), 1u);
+        *data = *reinterpret_cast<const CharType*>(current_arg);
+        ++data;
+        break;
+      }
+      case Argument::kInt: {
+        data = AppendInt64(new_string, data, static_cast<int32_t>(*current_arg));
+        break;
+      }
+      case Argument::kLong: {
+        current_arg = AlignUp(current_arg, sizeof(int64_t));
+        data = AppendInt64(new_string, data, *reinterpret_cast<const int64_t*>(current_arg));
+        ++current_arg;  // Skip the low word, let the common code skip the high word.
+        break;
+      }
+
+      case Argument::kStringBuilder:
+      case Argument::kCharArray:
+      case Argument::kFloat:
+      case Argument::kDouble:
+        LOG(FATAL) << "Unimplemented arg format: 0x" << std::hex
+            << (f & kArgMask) << " full format: 0x" << std::hex << format_;
+        UNREACHABLE();
+      default:
+        LOG(FATAL) << "Unexpected arg format: 0x" << std::hex
+            << (f & kArgMask) << " full format: 0x" << std::hex << format_;
+        UNREACHABLE();
+    }
+    ++current_arg;
+    DCHECK_LE(handle_index, hs_.NumberOfReferences());
+  }
+  DCHECK_EQ(RemainingSpace(new_string, data), 0u) << std::hex << format_;
+}
+
+inline void StringBuilderAppend::Builder::operator()(ObjPtr<mirror::Object> obj,
+                                                     size_t usable_size ATTRIBUTE_UNUSED) const {
+  ObjPtr<mirror::String> new_string = ObjPtr<mirror::String>::DownCast(obj);
+  new_string->SetCount(length_with_flag_);
+  if (mirror::String::IsCompressed(length_with_flag_)) {
+    StoreData(new_string, new_string->GetValueCompressed());
+  } else {
+    StoreData(new_string, new_string->GetValue());
+  }
+}
+
+ObjPtr<mirror::String> StringBuilderAppend::AppendF(uint32_t format,
+                                                    const uint32_t* args,
+                                                    Thread* self) {
+  Builder builder(format, args, self);
+  self->AssertNoPendingException();
+  int32_t length_with_flag = builder.CalculateLengthWithFlag();
+  if (self->IsExceptionPending()) {
+    return nullptr;
+  }
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  ObjPtr<mirror::String> result = mirror::String::Alloc</*kIsInstrumented=*/ true>(
+      self, length_with_flag, allocator_type, builder);
+
+  return result;
+}
+
+}  // namespace art