Merge "Add LoadString kind of kJitTableAddress for dump-cfg."
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3b273a2..e297b4f 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -215,24 +215,9 @@
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
     else
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-      ifdef ART_USE_VIXL_ARM_BACKEND
-        ifeq ($(1),optimizing)
-          # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-          # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is
-          # defined.
-          core_compile_options += --compiler-filter=interpret-only
-        endif
-      endif
     endif
   else
     $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
-    ifdef ART_USE_VIXL_ARM_BACKEND
-      ifeq ($(1),optimizing)
-      # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
-      # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is defined.
-      core_compile_options += --compiler-filter=interpret-only
-      endif
-    endif
   endif
   $(4)TARGET_CORE_IMG_OUTS += $$(core_image_name)
   $(4)TARGET_CORE_OAT_OUTS += $$(core_oat_name)
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 42e5db3..6b62110 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -71,6 +71,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/atomic_method_ref_map-inl.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
 #include "vdex_file.h"
@@ -287,8 +288,6 @@
       instruction_set_features_(instruction_set_features),
       requires_constructor_barrier_lock_("constructor barrier lock"),
       compiled_classes_lock_("compiled classes lock"),
-      compiled_methods_lock_("compiled method lock"),
-      compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
@@ -326,12 +325,12 @@
     MutexLock mu(self, compiled_classes_lock_);
     STLDeleteValues(&compiled_classes_);
   }
-  {
-    MutexLock mu(self, compiled_methods_lock_);
-    for (auto& pair : compiled_methods_) {
-      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, pair.second);
+  compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
+                                 CompiledMethod* method) {
+    if (method != nullptr) {
+      CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, method);
     }
-  }
+  });
   compiler_->UnInit();
 }
 
@@ -575,8 +574,7 @@
                           const DexFile& dex_file,
                           optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
                           bool compilation_enabled,
-                          Handle<mirror::DexCache> dex_cache)
-    REQUIRES(!driver->compiled_methods_lock_) {
+                          Handle<mirror::DexCache> dex_cache) {
   DCHECK(driver != nullptr);
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
@@ -940,6 +938,13 @@
                                 TimingLogger* timings) {
   CheckThreadPools();
 
+  for (const DexFile* dex_file : dex_files) {
+    // Can be already inserted if the caller is CompileOne. This happens for gtests.
+    if (!compiled_methods_.HaveDexFile(dex_file)) {
+      compiled_methods_.AddDexFile(dex_file);
+    }
+  }
+
   LoadImageClasses(timings);
   VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false);
 
@@ -2616,30 +2621,15 @@
                                        size_t non_relative_linker_patch_count) {
   DCHECK(GetCompiledMethod(method_ref) == nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    compiled_methods_.Put(method_ref, compiled_method);
-    non_relative_linker_patch_count_ += non_relative_linker_patch_count;
-  }
+  MethodTable::InsertResult result = compiled_methods_.Insert(method_ref,
+                                                              /*expected*/ nullptr,
+                                                              compiled_method);
+  CHECK(result == MethodTable::kInsertResultSuccess);
+  non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count);
   DCHECK(GetCompiledMethod(method_ref) != nullptr)
       << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
 }
 
-void CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) {
-  CompiledMethod* compiled_method = nullptr;
-  {
-    MutexLock mu(Thread::Current(), compiled_methods_lock_);
-    auto it = compiled_methods_.find(method_ref);
-    if (it != compiled_methods_.end()) {
-      compiled_method = it->second;
-      compiled_methods_.erase(it);
-    }
-  }
-  if (compiled_method != nullptr) {
-    CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, compiled_method);
-  }
-}
-
 CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
   MutexLock mu(Thread::Current(), compiled_classes_lock_);
   ClassTable::const_iterator it = compiled_classes_.find(ref);
@@ -2678,13 +2668,9 @@
 }
 
 CompiledMethod* CompilerDriver::GetCompiledMethod(MethodReference ref) const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  MethodTable::const_iterator it = compiled_methods_.find(ref);
-  if (it == compiled_methods_.end()) {
-    return nullptr;
-  }
-  CHECK(it->second != nullptr);
-  return it->second;
+  CompiledMethod* compiled_method = nullptr;
+  compiled_methods_.Get(ref, &compiled_method);
+  return compiled_method;
 }
 
 bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx,
@@ -2713,8 +2699,7 @@
 }
 
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
-  MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  return non_relative_linker_patch_count_;
+  return non_relative_linker_patch_count_.LoadRelaxed();
 }
 
 void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 7418b00..cc50197 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -41,6 +41,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
+#include "utils/atomic_method_ref_map.h"
 #include "utils/dex_cache_arrays_layout.h"
 
 namespace art {
@@ -131,7 +132,7 @@
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
+      REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   VerificationResults* GetVerificationResults() const {
     DCHECK(Runtime::Current()->IsAotCompiler());
@@ -168,18 +169,12 @@
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
 
-  CompiledMethod* GetCompiledMethod(MethodReference ref) const
-      REQUIRES(!compiled_methods_lock_);
-  size_t GetNonRelativeLinkerPatchCount() const
-      REQUIRES(!compiled_methods_lock_);
-
+  CompiledMethod* GetCompiledMethod(MethodReference ref) const;
+  size_t GetNonRelativeLinkerPatchCount() const;
   // Add a compiled method.
   void AddCompiledMethod(const MethodReference& method_ref,
                          CompiledMethod* const compiled_method,
-                         size_t non_relative_linker_patch_count)
-      REQUIRES(!compiled_methods_lock_);
-  // Remove and delete a compiled method.
-  void RemoveCompiledMethod(const MethodReference& method_ref) REQUIRES(!compiled_methods_lock_);
+                         size_t non_relative_linker_patch_count);
 
   void SetRequiresConstructorBarrier(Thread* self,
                                      const DexFile* dex_file,
@@ -519,18 +514,15 @@
   mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
 
-  typedef SafeMap<const MethodReference, CompiledMethod*, MethodReferenceComparator> MethodTable;
-
- public:
-  // Lock is public so that non-members can have lock annotations.
-  mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
 
  private:
   // All method references that this compiler has compiled.
-  MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_);
+  MethodTable compiled_methods_;
+
   // Number of non-relative patches in all compiled methods. These patches need space
   // in the .oat_patches ELF section if requested in the compiler options.
-  size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
+  Atomic<size_t> non_relative_linker_patch_count_;
 
   // If image_ is true, specifies the classes that will be included in the image.
   // Note if image_classes_ is null, all classes are included in the image.
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 555baf6..9bd25d8 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -117,6 +117,12 @@
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromBytes", "([BIII)Ljava/lang/String;") \
   V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromChars", "(II[C)Ljava/lang/String;") \
   V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromString", "(Ljava/lang/String;)Ljava/lang/String;") \
+  V(StringBufferAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuffer;") \
+  V(StringBufferLength, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kNoThrow, "Ljava/lang/StringBuffer;", "length", "()I") \
+  V(StringBufferToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuffer;", "toString", "()Ljava/lang/String;") \
+  V(StringBuilderAppend, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "append", "(Ljava/lang/String;)Ljava/lang/StringBuilder;") \
+  V(StringBuilderLength, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/StringBuilder;", "length", "()I") \
+  V(StringBuilderToString, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringBuilder;", "toString", "()Ljava/lang/String;") \
   V(UnsafeCASInt, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapInt", "(Ljava/lang/Object;JII)Z") \
   V(UnsafeCASLong, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapLong", "(Ljava/lang/Object;JJJ)Z") \
   V(UnsafeCASObject, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "compareAndSwapObject", "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z") \
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index aa8a77e..1ca439e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1322,11 +1322,10 @@
       }
       break;
 
-    // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
@@ -1346,13 +1345,20 @@
     return;
   }
 
+  Location right = cond->GetLocations()->InAt(1);
   vixl32::Register out = OutputRegister(cond);
   vixl32::Label true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
       // Integer case.
-      __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      if (right.IsRegister()) {
+        __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      } else {
+        DCHECK(right.IsConstant());
+        __ Cmp(InputRegisterAt(cond, 0),
+               CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
       AssemblerAccurateScope aas(GetVIXLAssembler(),
                                  kArmInstrMaxSizeInBytes * 3u,
                                  CodeBufferCheckScope::kMaximumSize);
@@ -2776,15 +2782,8 @@
 
 
 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
@@ -3956,15 +3955,8 @@
 }
 
 void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
-  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
-      ? LocationSummary::kCallOnSlowPath
-      : LocationSummary::kNoCall;
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
 void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
@@ -4697,8 +4689,9 @@
 }
 
 void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
-  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related.
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5200,14 +5193,27 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+       (kUseBakerReadBarrier ||
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
 }
 
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
 
 void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
@@ -5238,11 +5244,7 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5253,9 +5255,9 @@
   vixl32::Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(instruction);
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5276,7 +5278,8 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ Cmp(out, cls);
       // Classes must be equal for the instanceof to succeed.
       __ B(ne, &zero);
@@ -5291,13 +5294,18 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl32::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       __ Cmp(out, cls);
@@ -5315,14 +5323,19 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       vixl32::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -5340,14 +5353,19 @@
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       vixl32::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5360,12 +5378,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
       // /* HeapReference<Class> */ out = obj->klass_
       GenerateReferenceLoadTwoRegisters(instruction,
                                         out_loc,
                                         obj_loc,
                                         class_offset,
-                                        maybe_temp_loc);
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
@@ -5449,13 +5469,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
@@ -5466,20 +5480,31 @@
   vixl32::Register cls = InputRegisterAt(instruction, 1);
   Location temp_loc = locations->GetTemp(0);
   vixl32::Register temp = RegisterFrom(temp_loc);
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARMVIXL* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
                                                             is_type_check_slow_path_fatal);
@@ -5491,12 +5516,17 @@
     __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -5505,12 +5535,24 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl32::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
@@ -5523,6 +5565,14 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       vixl32::Label loop;
       __ Bind(&loop);
@@ -5530,7 +5580,11 @@
       __ B(eq, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is null, jump to the slow path to throw the
       // exception.
@@ -5541,13 +5595,25 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck:  {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // If the component type is null, jump to the slow path to throw the exception.
       __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
       // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
@@ -5559,10 +5625,7 @@
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -5570,8 +5633,45 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
+
       __ B(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      vixl32::Label start_loop;
+      __ Bind(&start_loop);
+      __ CompareAndBranchIfZero(RegisterFrom(maybe_temp2_loc),
+                                type_check_slow_path->GetEntryLabel());
+      __ Ldr(RegisterFrom(maybe_temp3_loc), MemOperand(temp, object_array_data_offset));
+      GetAssembler()->MaybeUnpoisonHeapReference(RegisterFrom(maybe_temp3_loc));
+      // Go to next interface.
+      __ Add(temp, temp, Operand::From(2 * kHeapReferenceSize));
+      __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Cmp(cls, RegisterFrom(maybe_temp3_loc));
+      __ B(ne, &start_loop);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -5862,7 +5962,8 @@
     HInstruction* instruction ATTRIBUTE_UNUSED,
     Location out,
     uint32_t offset,
-    Location maybe_temp ATTRIBUTE_UNUSED) {
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
   vixl32::Register out_reg = RegisterFrom(out);
   if (kEmitCompilerReadBarrier) {
     TODO_VIXL32(FATAL);
@@ -5879,7 +5980,8 @@
     Location out,
     Location obj,
     uint32_t offset,
-    Location maybe_temp ATTRIBUTE_UNUSED) {
+    Location maybe_temp ATTRIBUTE_UNUSED,
+    ReadBarrierOption read_barrier_option ATTRIBUTE_UNUSED) {
   vixl32::Register out_reg = RegisterFrom(out);
   vixl32::Register obj_reg = RegisterFrom(obj);
   if (kEmitCompilerReadBarrier) {
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 89fef43..bd91127 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -422,7 +422,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -437,7 +438,8 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
 
   // Generate a GC root reference load:
   //
diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc
index c80e19e..9df5bf1 100644
--- a/compiler/optimizing/escape.cc
+++ b/compiler/optimizing/escape.cc
@@ -23,16 +23,19 @@
 void CalculateEscape(HInstruction* reference,
                      bool (*no_escape)(HInstruction*, HInstruction*),
                      /*out*/ bool* is_singleton,
-                     /*out*/ bool* is_singleton_and_non_escaping) {
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible) {
   // For references not allocated in the method, don't assume anything.
   if (!reference->IsNewInstance() && !reference->IsNewArray()) {
     *is_singleton = false;
-    *is_singleton_and_non_escaping = false;
+    *is_singleton_and_not_returned = false;
+    *is_singleton_and_not_deopt_visible = false;
     return;
   }
   // Assume the best until proven otherwise.
   *is_singleton = true;
-  *is_singleton_and_non_escaping = true;
+  *is_singleton_and_not_returned = true;
+  *is_singleton_and_not_deopt_visible = true;
   // Visit all uses to determine if this reference can escape into the heap,
   // a method call, an alias, etc.
   for (const HUseListNode<HInstruction*>& use : reference->GetUses()) {
@@ -45,7 +48,8 @@
       // for the uncommon cases. Similarly, null checks are eventually eliminated for explicit
       // allocations, but if we see one before it is simplified, assume an alias.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
                (user->IsInstanceFieldSet() && (reference == user->InputAt(1))) ||
@@ -56,7 +60,8 @@
       // The reference is merged to HPhi/HSelect, passed to a callee, or stored to heap.
       // Hence, the reference is no longer the only name that can refer to its value.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if ((user->IsUnresolvedInstanceFieldGet() && (reference == user->InputAt(0))) ||
                (user->IsUnresolvedInstanceFieldSet() && (reference == user->InputAt(0)))) {
@@ -64,37 +69,35 @@
       // Note that we could optimize this case and still perform some optimizations until
       // we hit the unresolved access, but the conservative assumption is the simplest.
       *is_singleton = false;
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
+      *is_singleton_and_not_deopt_visible = false;
       return;
     } else if (user->IsReturn()) {
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_returned = false;
     }
   }
 
-  // Need for further analysis?
-  if (!*is_singleton_and_non_escaping) {
-    return;
-  }
-
-  // Look at the environment uses and if it's for HDeoptimize, it's treated the
-  // same as a return which escapes at the end of executing the compiled code.
-  // Other environment uses are fine, as long as all client optimizations that
-  // rely on this informations are disabled for debuggable.
+  // Look at the environment uses if it's for HDeoptimize. Other environment uses are fine,
+  // as long as client optimizations that rely on this information are disabled for debuggable.
   for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
     HEnvironment* user = use.GetUser();
     if (user->GetHolder()->IsDeoptimize()) {
-      *is_singleton_and_non_escaping = false;
+      *is_singleton_and_not_deopt_visible = false;
       break;
     }
   }
 }
 
-bool IsNonEscapingSingleton(HInstruction* reference,
-                            bool (*no_escape)(HInstruction*, HInstruction*)) {
-  bool is_singleton = true;
-  bool is_singleton_and_non_escaping = true;
-  CalculateEscape(reference, no_escape, &is_singleton, &is_singleton_and_non_escaping);
-  return is_singleton_and_non_escaping;
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*)) {
+  bool is_singleton = false;
+  bool is_singleton_and_not_returned = false;
+  bool is_singleton_and_not_deopt_visible = false;  // not relevant for escape
+  CalculateEscape(reference,
+                  no_escape,
+                  &is_singleton,
+                  &is_singleton_and_not_returned,
+                  &is_singleton_and_not_deopt_visible);
+  return is_singleton_and_not_returned;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h
index 6514843..75e37b0 100644
--- a/compiler/optimizing/escape.h
+++ b/compiler/optimizing/escape.h
@@ -31,9 +31,18 @@
  * allocation. The method assigns true to parameter 'is_singleton' if the reference
  * is the only name that can refer to its value during the lifetime of the method,
  * meaning that the reference is not aliased with something else, is not stored to
- * heap memory, and not passed to another method. The method assigns true to parameter
- * 'is_singleton_and_non_escaping' if the reference is a singleton and is not returned
- * to the caller or used as an environment local of an HDeoptimize instruction.
+ * heap memory, and not passed to another method. In addition, the method assigns
+ * true to parameter 'is_singleton_and_not_returned' if the reference is a singleton
+ * and not returned to the caller and to parameter 'is_singleton_and_not_deopt_visible'
+ * if the reference is a singleton and not used as an environment local of an
+ * HDeoptimize instruction (clients of the final value must run after BCE to ensure
+ * all such instructions have been introduced already).
+ *
+ * Note that being visible to a HDeoptimize instruction does not count for ordinary
+ * escape analysis, since switching between compiled code and interpreted code keeps
+ * non escaping references restricted to the lifetime of the method and the thread
+ * executing it. This property only concerns optimizations that are interested in
+ * escape analysis with respect to the *compiled* code (such as LSE).
  *
  * When set, the no_escape function is applied to any use of the allocation instruction
  * prior to any built-in escape analysis. This allows clients to define better escape
@@ -45,14 +54,14 @@
 void CalculateEscape(HInstruction* reference,
                      bool (*no_escape)(HInstruction*, HInstruction*),
                      /*out*/ bool* is_singleton,
-                     /*out*/ bool* is_singleton_and_non_escaping);
+                     /*out*/ bool* is_singleton_and_not_returned,
+                     /*out*/ bool* is_singleton_and_not_deopt_visible);
 
 /*
- * Convenience method for testing singleton and non-escaping property at once.
+ * Convenience method for testing the singleton and not returned properties at once.
  * Callers should be aware that this method invokes the full analysis at each call.
  */
-bool IsNonEscapingSingleton(HInstruction* reference,
-                            bool (*no_escape)(HInstruction*, HInstruction*));
+bool DoesNotEscape(HInstruction* reference, bool (*no_escape)(HInstruction*, HInstruction*));
 
 }  // namespace art
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 85b461d..658b804 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,6 +16,7 @@
 
 #include "instruction_simplifier.h"
 
+#include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
@@ -107,6 +108,8 @@
   void SimplifyStringCharAt(HInvoke* invoke);
   void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
   void SimplifyNPEOnArgN(HInvoke* invoke, size_t);
+  void SimplifyReturnThis(HInvoke* invoke);
+  void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
@@ -1864,11 +1867,61 @@
 // is provably non-null, we can clear the flag.
 void InstructionSimplifierVisitor::SimplifyNPEOnArgN(HInvoke* invoke, size_t n) {
   HInstruction* arg = invoke->InputAt(n);
-  if (!arg->CanBeNull()) {
+  if (invoke->CanThrow() && !arg->CanBeNull()) {
     invoke->SetCanThrow(false);
   }
 }
 
+// Methods that return "this" can replace the returned value with the receiver.
+void InstructionSimplifierVisitor::SimplifyReturnThis(HInvoke* invoke) {
+  if (invoke->HasUses()) {
+    HInstruction* receiver = invoke->InputAt(0);
+    invoke->ReplaceWith(receiver);
+    RecordSimplification();
+  }
+}
+
+// Helper method for StringBuffer escape analysis.
+static bool NoEscapeForStringBufferReference(HInstruction* reference, HInstruction* user) {
+  if (user->IsInvokeStaticOrDirect()) {
+    // Any constructor on StringBuffer is okay.
+    return user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+           user->InputAt(0) == reference;
+  } else if (user->IsInvokeVirtual()) {
+    switch (user->AsInvokeVirtual()->GetIntrinsic()) {
+      case Intrinsics::kStringBufferLength:
+      case Intrinsics::kStringBufferToString:
+        DCHECK_EQ(user->InputAt(0), reference);
+        return true;
+      case Intrinsics::kStringBufferAppend:
+        // Returns "this", so only okay if no further uses.
+        DCHECK_EQ(user->InputAt(0), reference);
+        DCHECK_NE(user->InputAt(1), reference);
+        return !user->HasUses();
+      default:
+        break;
+    }
+  }
+  return false;
+}
+
+// Certain allocation intrinsics are not removed by dead code elimination
+// because of potentially throwing an OOM exception or other side effects.
+// This method removes such intrinsics when special circumstances allow.
+void InstructionSimplifierVisitor::SimplifyAllocationIntrinsic(HInvoke* invoke) {
+  if (!invoke->HasUses()) {
+    // Instruction has no uses. If unsynchronized, we can remove right away, safely ignoring
+    // the potential OOM of course. Otherwise, we must ensure the receiver object of this
+    // call does not escape since only thread-local synchronization may be removed.
+    bool is_synchronized = invoke->GetIntrinsic() == Intrinsics::kStringBufferToString;
+    HInstruction* receiver = invoke->InputAt(0);
+    if (!is_synchronized || DoesNotEscape(receiver, NoEscapeForStringBufferReference)) {
+      invoke->GetBlock()->RemoveInstruction(invoke);
+      RecordSimplification();
+    }
+  }
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1926,6 +1979,14 @@
     case Intrinsics::kStringStringIndexOfAfter:
       SimplifyNPEOnArgN(instruction, 1);  // 0th has own NullCheck
       break;
+    case Intrinsics::kStringBufferAppend:
+    case Intrinsics::kStringBuilderAppend:
+      SimplifyReturnThis(instruction);
+      break;
+    case Intrinsics::kStringBufferToString:
+    case Intrinsics::kStringBuilderToString:
+      SimplifyAllocationIntrinsic(instruction);
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8234b24..8f64fae 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -2613,6 +2613,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 17a97da..d8a896e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2781,6 +2781,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index c8e3534..9e72447 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -677,7 +677,10 @@
       vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
       vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
       if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-        __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
+        UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+        const vixl32::Register temp_reg = temps.Acquire();
+        __ Add(temp_reg, base, offset);
+        __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
       } else {
         __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
       }
@@ -2703,6 +2706,12 @@
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 7c81588..9b5d7a0 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2497,6 +2497,12 @@
 
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 2d4f417..5a99886 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1949,6 +1949,12 @@
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 06ab46f..922c3bc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3331,6 +3331,12 @@
 
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 2ea8670..05d270a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3000,6 +3000,12 @@
 
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppend);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
 
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index edecf17..2856c3e 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -37,8 +37,13 @@
       : reference_(reference),
         position_(pos),
         is_singleton_(true),
-        is_singleton_and_non_escaping_(true) {
-    CalculateEscape(reference_, nullptr, &is_singleton_, &is_singleton_and_non_escaping_);
+        is_singleton_and_not_returned_(true),
+        is_singleton_and_not_deopt_visible_(true) {
+    CalculateEscape(reference_,
+                    nullptr,
+                    &is_singleton_,
+                    &is_singleton_and_not_returned_,
+                    &is_singleton_and_not_deopt_visible_);
   }
 
   HInstruction* GetReference() const {
@@ -59,19 +64,17 @@
   // Returns true if reference_ is a singleton and not returned to the caller or
   // used as an environment local of an HDeoptimize instruction.
   // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndNonEscaping() const {
-    return is_singleton_and_non_escaping_;
+  bool IsSingletonAndRemovable() const {
+    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
   }
 
  private:
   HInstruction* const reference_;
-  const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
-  bool is_singleton_;         // can only be referred to by a single name in the method.
+  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
 
-  // reference_ is singleton and does not escape in the end either by
-  // returning to the caller, or being used as an environment local of an
-  // HDeoptimize instruction.
-  bool is_singleton_and_non_escaping_;
+  bool is_singleton_;                        // can only be referred to by a single name in the method,
+  bool is_singleton_and_not_returned_;       // and not returned to caller,
+  bool is_singleton_and_not_deopt_visible_;  // and not used as an environment local of HDeoptimize.
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -623,7 +626,7 @@
       bool from_all_predecessors = true;
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
       HInstruction* singleton_ref = nullptr;
-      if (ref_info->IsSingletonAndNonEscaping()) {
+      if (ref_info->IsSingletonAndRemovable()) {
         // We do more analysis of liveness when merging heap values for such
         // cases since stores into such references may potentially be eliminated.
         singleton_ref = ref_info->GetReference();
@@ -796,7 +799,7 @@
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
-    } else if (ref_info->IsSingletonAndNonEscaping()) {
+    } else if (ref_info->IsSingletonAndRemovable()) {
       // Store into a field of a singleton that's not returned. The value cannot be
       // killed due to aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
@@ -970,7 +973,7 @@
       // new_instance isn't used for field accesses. No need to process it.
       return;
     }
-    if (ref_info->IsSingletonAndNonEscaping() &&
+    if (ref_info->IsSingletonAndRemovable() &&
         !new_instance->IsFinalizable() &&
         !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 165dce3..7ab04e1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2072,6 +2072,8 @@
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
+  // TODO: this method is used by LICM and GVN with possibly different
+  //       meanings? split and rename?
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
@@ -3789,7 +3791,7 @@
 
   bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
 
-  bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
+  bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
@@ -4181,6 +4183,19 @@
                 kVirtual),
         vtable_index_(vtable_index) {}
 
+  bool CanBeNull() const OVERRIDE {
+    switch (GetIntrinsic()) {
+      case Intrinsics::kThreadCurrentThread:
+      case Intrinsics::kStringBufferAppend:
+      case Intrinsics::kStringBufferToString:
+      case Intrinsics::kStringBuilderAppend:
+      case Intrinsics::kStringBuilderToString:
+        return false;
+      default:
+        return HInvoke::CanBeNull();
+    }
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     // TODO: Add implicit null checks in intrinsics.
     return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
diff --git a/compiler/utils/atomic_method_ref_map.h b/compiler/utils/atomic_method_ref_map.h
index f0db231..11ab211 100644
--- a/compiler/utils/atomic_method_ref_map.h
+++ b/compiler/utils/atomic_method_ref_map.h
@@ -47,6 +47,10 @@
   // thread safe.
   void AddDexFile(const DexFile* dex_file);
 
+  bool HaveDexFile(const DexFile* dex_file) const {
+    return arrays_.find(dex_file) != arrays_.end();
+  }
+
   // Visit all of the dex files and elements.
   template <typename Visitor>
   void Visit(const Visitor& visitor);
diff --git a/compiler/utils/atomic_method_ref_map_test.cc b/compiler/utils/atomic_method_ref_map_test.cc
index c3e48ff..9e5bf4b 100644
--- a/compiler/utils/atomic_method_ref_map_test.cc
+++ b/compiler/utils/atomic_method_ref_map_test.cc
@@ -36,9 +36,11 @@
   int value = 123;
   // Error case: Not already inserted.
   EXPECT_FALSE(map.Get(MethodReference(dex.get(), 1), &value));
+  EXPECT_FALSE(map.HaveDexFile(dex.get()));
   // Error case: Dex file not registered.
   EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, 1) == Map::kInsertResultInvalidDexFile);
   map.AddDexFile(dex.get());
+  EXPECT_TRUE(map.HaveDexFile(dex.get()));
   EXPECT_GT(dex->NumMethodIds(), 10u);
   // After we have added the get should succeed but return the default value.
   EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value));
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 714a58c..b6b62a8 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -26,6 +26,7 @@
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
+#include "jit/offline_profiling_info.h"
 #include "oat.h"
 #include "oat_file.h"
 #include "utils.h"
@@ -552,26 +553,6 @@
   RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" });
 }
 
-static const char kDexFileLayoutInputProfile[] = "cHJvADAwMgABAAwAAQABAOqMEeFEZXhOb09hdC5qYXIBAAEA";
-
-static void WriteFileBase64(const char* base64, const char* location) {
-  // Decode base64.
-  CHECK(base64 != nullptr);
-  size_t length;
-  std::unique_ptr<uint8_t[]> bytes(DecodeBase64(base64, &length));
-  CHECK(bytes.get() != nullptr);
-
-  // Write to provided file.
-  std::unique_ptr<File> file(OS::CreateEmptyFile(location));
-  CHECK(file.get() != nullptr);
-  if (!file->WriteFully(bytes.get(), length)) {
-    PLOG(FATAL) << "Failed to write base64 as file";
-  }
-  if (file->FlushCloseOrErase() != 0) {
-    PLOG(FATAL) << "Could not flush and close test file.";
-  }
-}
-
 class Dex2oatLayoutTest : public Dex2oatTest {
  protected:
   void CheckFilter(CompilerFilter::Filter input ATTRIBUTE_UNUSED,
@@ -579,13 +560,34 @@
     // Ignore, we'll do our own checks.
   }
 
+  // Emits a profile with a single dex file with the given location and a single class index of 1.
+  void GenerateProfile(const std::string& test_profile,
+                       const std::string& dex_location,
+                       uint32_t checksum) {
+    int profile_test_fd = open(test_profile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
+    CHECK_GE(profile_test_fd, 0);
+
+    ProfileCompilationInfo info;
+    std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
+    info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1));
+    bool result = info.Save(profile_test_fd);
+    close(profile_test_fd);
+    ASSERT_TRUE(result);
+  }
+
   void RunTest() {
     std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
     std::string profile_location = GetScratchDir() + "/primary.prof";
     std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
 
     Copy(GetDexSrc2(), dex_location);
-    WriteFileBase64(kDexFileLayoutInputProfile, profile_location.c_str());
+    const char* location = dex_location.c_str();
+    std::string error_msg;
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
+    EXPECT_EQ(dex_files.size(), 1U);
+    std::unique_ptr<const DexFile>& dex_file = dex_files[0];
+    GenerateProfile(profile_location, dex_location, dex_file->GetLocationChecksum());
 
     const std::vector<std::string>& extra_args = { "--profile-file=" + profile_location };
     GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kLayoutProfile, extra_args);
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 3a83eaf..a71ab4b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1109,62 +1109,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    push   {r10-r12, lr}
-    .cfi_adjust_cfa_offset 16
-    .cfi_rel_offset r10, 0
-    .cfi_rel_offset r11, 4
-    .cfi_rel_offset ip, 8
-    .cfi_rel_offset lr, 12
-    ldr    r10, [sp, #16]                                        @ load referrer
-    ldr    r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET]        @ load declaring class
-    ldr    r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
-    ubfx   r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
-    add    r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
-    ldrd   r10, r11, [r10]                               @ load index into r11 and pointer into r10
-    cmp    r0, r11
-    bne    .Lart_quick_resolve_string_slow_path
-#ifdef USE_READ_BARRIER
-    ldr    r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r0, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    mov    r0, r10
-    pop    {r10-r12, pc}
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr    r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    lsrs   r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
-    bcs    .Lart_quick_resolve_string_no_rb
-    mov    r0, r10
-    .cfi_remember_state
-    pop    {r10-r12, lr}
-    .cfi_adjust_cfa_offset -16
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_restore r12
-    .cfi_restore lr
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
-    b      .Lslow_rb_art_quick_read_barrier_mark_reg00  @ Get the marked string back.
-    .cfi_restore_state
-#endif
-
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
-    push {r0-r9}                  @ 10 words of callee saves and args; {r10-r12, lr} already saved.
-    .cfi_adjust_cfa_offset 40
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r1, 4
-    .cfi_rel_offset r2, 8
-    .cfi_rel_offset r3, 12
-    .cfi_rel_offset r4, 16
-    .cfi_rel_offset r5, 20
-    .cfi_rel_offset r6, 24
-    .cfi_rel_offset r7, 28
-    .cfi_rel_offset r8, 32
-    .cfi_rel_offset r9, 36
-    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1   @ save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME r1                   @ save everything in case of GC
     mov    r1, r9                                    @ pass Thread::Current
     bl     artResolveStringFromCode                  @ (uint32_t type_idx, Thread*)
     cbz    r0, 1f                                    @ If result is null, deliver the OOME.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 73bca03..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1317,6 +1317,7 @@
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
     ret
     .cfi_restore_state                // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
 
 .Lthrow_class_cast_exception:
     // Restore
@@ -1484,6 +1485,7 @@
     strb w3, [x3, x0]
     ret
     .cfi_restore_state            // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
 .Lthrow_array_store_exception:
     RESTORE_TWO_REGS x2, xLR, 16
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
@@ -1651,44 +1653,7 @@
      */
 
 ENTRY art_quick_resolve_string
-    SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ldr   x29, [sp, #(2 * __SIZEOF_POINTER__)]                   // load referrer
-    ldr   w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET]         // load declaring class
-    ldr   x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]  // load string dex cache
-    ubfx  lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into LR
-    ldr   x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x29
-    cmp   x0, x29, lsr #32                                       // compare against upper 32 bits
-    bne   .Lart_quick_resolve_string_slow_path
-    ubfx  x0, x29, #0, #32                                       // extract lower 32 bits into x0
-#ifdef USE_READ_BARRIER
-    // Most common case: GC is not marking.
-    ldr    w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x29, .Lart_quick_resolve_string_marking
-.Lart_quick_resolve_string_no_rb:
-#endif
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    ret
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-
-#ifdef USE_READ_BARRIER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr   x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbnz  x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
-    .cfi_remember_state
-    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
-    // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
-    b     .Lslow_rb_art_quick_read_barrier_mark_reg00  // Get the marked string back.
-    .cfi_restore_state
-    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
-#endif
-
-// Slow path case, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
-    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR  // save callee saves in case of GC
+    SETUP_SAVE_EVERYTHING_FRAME                     // save everything for stack crawl
     mov   x1, xSELF                                 // pass Thread::Current
     bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
     cbz   w0, 1f                                    // If result is null, deliver the OOME.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6fbc954..c6f4c03 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1148,51 +1148,17 @@
 END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    PUSH edi
-    PUSH esi
-    // Save xmm0 at an aligned address on the stack.
-    subl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(12)
-    movsd %xmm0, 0(%esp)
-    movl 24(%esp), %edi                                          // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi           // get declaring class
-    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi    // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
-    andl %eax, %esi
-    movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
-    movd %xmm0, %edi                                             // extract pointer
-    pshufd LITERAL(0x55), %xmm0, %xmm0                           // shuffle index into lowest bits
-    movd %xmm0, %esi                                             // extract index
-    // Restore xmm0 and remove it together with padding from the stack.
-    movsd 0(%esp), %xmm0
-    addl MACRO_LITERAL(12), %esp
-    CFI_ADJUST_CFA_OFFSET(-12)
-    cmp %esi, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %edi, %eax
-    CFI_REMEMBER_STATE
-    POP esi
-    POP edi
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(esp, 24)                          // workaround for clang bug: 31975598
-
-.Lart_quick_resolve_string_slow_path:
+    SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
     // Outgoing argument set up
-    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
-    subl LITERAL(8), %esp                                        // push padding
+    subl LITERAL(8), %esp                                 // push padding
     CFI_ADJUST_CFA_OFFSET(8)
-    pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
+    pushl %fs:THREAD_SELF_OFFSET                          // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
-    PUSH eax                                                     // pass arg1
+    PUSH eax                                              // pass arg1
     call SYMBOL(artResolveStringFromCode)
-    addl LITERAL(16), %esp                                       // pop arguments
+    addl LITERAL(16), %esp                                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    testl %eax, %eax                                        // If result is null, deliver the OOME.
+    testl %eax, %eax                                      // If result is null, deliver the OOME.
     jz 1f
     CFI_REMEMBER_STATE
     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f8066e4..4c46b08 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1354,34 +1354,7 @@
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    // Custom calling convention: RAX serves as both input and output.
-    PUSH r15
-    PUSH r14
-    movq 24(%rsp), %r15                                         // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d         // get declaring class
-    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15  // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
-    andl %eax, %r14d
-    movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
-    movl %r14d, %r15d
-    shrq LITERAL(32), %r14
-    cmpl %r14d, %eax
-    jne .Lart_quick_resolve_string_slow_path
-    movl %r15d, %eax
-    CFI_REMEMBER_STATE
-    POP r14
-    POP r15
-#ifdef USE_READ_BARRIER
-    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lnot_null_art_quick_read_barrier_mark_reg00
-#endif
-    ret
-    CFI_RESTORE_STATE
-    CFI_DEF_CFA(rsp, 24)                        // workaround for clang bug: 31975598
-
-// Slow path, the index did not match.
-.Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+    SETUP_SAVE_EVERYTHING_FRAME
     // Outgoing argument set up
     movl %eax, %edi                             // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 1e809d5..730a9c3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -399,7 +399,11 @@
 
 inline mirror::DexCache* ArtMethod::GetDexCache() {
   DCHECK(!IsProxyMethod());
-  return GetDeclaringClass()->GetDexCache();
+  if (UNLIKELY(IsObsolete())) {
+    return GetObsoleteDexCache();
+  } else {
+    return GetDeclaringClass()->GetDexCache();
+  }
 }
 
 template<ReadBarrierOption kReadBarrierOption>
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index d1454b6..eeece90 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -35,6 +35,7 @@
 #include "jit/profiling_info.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/executable.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
@@ -57,6 +58,28 @@
   return executable->GetArtMethod();
 }
 
+mirror::DexCache* ArtMethod::GetObsoleteDexCache() {
+  DCHECK(!Runtime::Current()->IsAotCompiler()) << PrettyMethod();
+  DCHECK(IsObsolete());
+  ObjPtr<mirror::ClassExt> ext(GetDeclaringClass()->GetExtData());
+  CHECK(!ext.IsNull());
+  ObjPtr<mirror::PointerArray> obsolete_methods(ext->GetObsoleteMethods());
+  CHECK(!obsolete_methods.IsNull());
+  DCHECK(ext->GetObsoleteDexCaches() != nullptr);
+  int32_t len = obsolete_methods->GetLength();
+  DCHECK_EQ(len, ext->GetObsoleteDexCaches()->GetLength());
+  // TODO I think this is fine since images should never have obsolete methods in them.
+  PointerSize pointer_size = kRuntimePointerSize;
+  DCHECK_EQ(kRuntimePointerSize, Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  for (int32_t i = 0; i < len; i++) {
+    if (this == obsolete_methods->GetElementPtrSize<ArtMethod*>(i, pointer_size)) {
+      return ext->GetObsoleteDexCaches()->Get(i);
+    }
+  }
+  LOG(FATAL) << "This method does not appear in the obsolete map of its class!";
+  UNREACHABLE();
+}
+
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0e1d7e7..00fab65 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -227,6 +227,11 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  bool IsObsolete() {
+    // TODO Should maybe make this IsIntrinsic check not needed
+    return !IsIntrinsic() && (GetAccessFlags() & kAccObsoleteMethod) != 0;
+  }
+
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsNative() {
     return (GetAccessFlags<kReadBarrierOption>() & kAccNative) != 0;
@@ -557,6 +562,7 @@
   mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::DexCache* GetObsoleteDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy(PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 9e17be2..7c06ffe 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -614,6 +614,9 @@
 
 struct ClassExtOffsets : public CheckOffsets<mirror::ClassExt> {
   ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_dex_caches_), "obsoleteDexCaches");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_methods_), "obsoleteMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, original_dex_cache_), "originalDexCache");
     addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
   }
 };
diff --git a/runtime/handle.h b/runtime/handle.h
index 3db3be2..e4b6d29 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -61,6 +61,10 @@
     return down_cast<T*>(reference_->AsMirrorPtr());
   }
 
+  ALWAYS_INLINE bool IsNull() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Get() == nullptr;
+  }
+
   ALWAYS_INLINE jobject ToJObject() const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
       // Special case so that we work with null handles.
diff --git a/runtime/image.cc b/runtime/image.cc
index bd5ba93..52c9f4e 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 9fbbbd3..441c1a1 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,11 +31,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index ada0326..6ffbd3f 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -285,12 +285,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -301,11 +304,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index c791eb5..7125d5a 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -292,3 +292,41 @@
 .macro REFRESH_IBASE
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
+
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 42f8c1b..34d99a8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -300,6 +300,44 @@
   ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 .endm
 
+/*
+ * Save two registers to the stack.
+ */
+.macro SAVE_TWO_REGS reg1, reg2, offset
+    stp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_rel_offset \reg1, (\offset)
+    .cfi_rel_offset \reg2, (\offset) + 8
+.endm
+
+/*
+ * Restore two registers from the stack.
+ */
+.macro RESTORE_TWO_REGS reg1, reg2, offset
+    ldp \reg1, \reg2, [sp, #(\offset)]
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+.endm
+
+/*
+ * Increase frame size and save two registers to the bottom of the stack.
+ */
+.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
+    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+    .cfi_rel_offset \reg1, 0
+    .cfi_rel_offset \reg2, 8
+.endm
+
+/*
+ * Restore two registers from the bottom of the stack and decrease frame size.
+ */
+.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
+    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
+    .cfi_restore \reg1
+    .cfi_restore \reg2
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
 /* File: arm64/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -334,11 +372,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
+    SAVE_TWO_REGS_INCREASE_FRAME xPROFILE, x27, 80
+    SAVE_TWO_REGS                xIBASE, xREFS, 16
+    SAVE_TWO_REGS                xSELF, xINST, 32
+    SAVE_TWO_REGS                xPC, xFP, 48
+    SAVE_TWO_REGS                fp, lr, 64
     add     fp, sp, #64
 
     /* Remember the return register */
@@ -7226,12 +7264,15 @@
  */
     cmp     wPROFILE, #0
     bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    .cfi_remember_state
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
+    .cfi_restore_state                              // Reset unwind info so following code unwinds.
+    .cfi_def_cfa_offset 80                          // workaround for clang bug: 31975598
 
 MterpProfileActive:
     mov     xINST, x0                               // stash return value
@@ -7242,11 +7283,11 @@
     strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
     bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
     mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    RESTORE_TWO_REGS                fp, lr, 64
+    RESTORE_TWO_REGS                xPC, xFP, 48
+    RESTORE_TWO_REGS                xSELF, xINST, 32
+    RESTORE_TWO_REGS                xIBASE, xREFS, 16
+    RESTORE_TWO_REGS_DECREASE_FRAME xPROFILE, x27, 80
     ret
 
     .cfi_endproc
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 4136488..53d0eea 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -180,6 +180,7 @@
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
+  friend class Dex2oatLayoutTest;
 
   DexFileToProfileInfoMap info_;
 };
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index b11dad8..7d7c1d7 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -424,6 +424,29 @@
   }
 }
 
+template<bool kUnchecked>
+void PointerArray::Memcpy(int32_t dst_pos,
+                          ObjPtr<PointerArray> src,
+                          int32_t src_pos,
+                          int32_t count,
+                          PointerSize ptr_size) {
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  DCHECK(!src.IsNull());
+  if (ptr_size == PointerSize::k64) {
+    LongArray* l_this = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(this))
+                                    : AsLongArray());
+    LongArray* l_src = (kUnchecked ? down_cast<LongArray*>(static_cast<Object*>(src.Ptr()))
+                                   : src->AsLongArray());
+    l_this->Memcpy(dst_pos, l_src, src_pos, count);
+  } else {
+    IntArray* i_this = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(this))
+                                   : AsIntArray());
+    IntArray* i_src = (kUnchecked ? down_cast<IntArray*>(static_cast<Object*>(src.Ptr()))
+                                  : src->AsIntArray());
+    i_this->Memcpy(dst_pos, i_src, src_pos, count);
+  }
+}
+
 template<typename T>
 inline void PrimitiveArray<T>::SetArrayClass(ObjPtr<Class> array_class) {
   CHECK(array_class_.IsNull());
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 994e9b2..19d300e 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -208,6 +208,17 @@
             typename Visitor>
   void Fixup(mirror::PointerArray* dest, PointerSize pointer_size, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Works like memcpy(), except we guarantee not to allow tearing of array values (ie using smaller
+  // than element size copies). Arguments are assumed to be within the bounds of the array and the
+  // arrays non-null. Cannot be called in an active transaction.
+  template<bool kUnchecked = false>
+  void Memcpy(int32_t dst_pos,
+              ObjPtr<PointerArray> src,
+              int32_t src_pos,
+              int32_t count,
+              PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
 }  // namespace mirror
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
index cc208e4..259bbbe 100644
--- a/runtime/mirror/class_ext.cc
+++ b/runtime/mirror/class_ext.cc
@@ -34,6 +34,71 @@
 
 GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
 
+void ClassExt::SetObsoleteArrays(ObjPtr<PointerArray> methods,
+                                 ObjPtr<ObjectArray<DexCache>> dex_caches) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  CHECK_EQ(methods.IsNull(), dex_caches.IsNull());
+  auto obsolete_dex_cache_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_);
+  auto obsolete_methods_off = OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_);
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  SetFieldObject<false>(obsolete_dex_cache_off, dex_caches.Ptr());
+  SetFieldObject<false>(obsolete_methods_off, methods.Ptr());
+}
+
+// TODO We really need to be careful how we update this. If we ever in the future make it so that
+// these arrays are written into without all threads being suspended we have a race condition!
+bool ClassExt::ExtendObsoleteArrays(Thread* self, uint32_t increase) {
+  DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
+      << "Obsolete arrays are set without synchronization!";
+  StackHandleScope<5> hs(self);
+  Handle<ClassExt> h_this(hs.NewHandle(this));
+  Handle<PointerArray> old_methods(hs.NewHandle(h_this->GetObsoleteMethods()));
+  Handle<ObjectArray<DexCache>> old_dex_caches(hs.NewHandle(h_this->GetObsoleteDexCaches()));
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  size_t new_len;
+  if (old_methods.Get() == nullptr) {
+    CHECK(old_dex_caches.Get() == nullptr);
+    new_len = increase;
+  } else {
+    CHECK_EQ(old_methods->GetLength(), old_dex_caches->GetLength());
+    new_len = increase + old_methods->GetLength();
+  }
+  Handle<PointerArray> new_methods(hs.NewHandle<PointerArray>(
+      cl->AllocPointerArray(self, new_len)));
+  if (new_methods.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+  Handle<ObjectArray<DexCache>> new_dex_caches(hs.NewHandle<ObjectArray<DexCache>>(
+      ObjectArray<DexCache>::Alloc(self,
+                                   cl->FindClass(self,
+                                                 "[Ljava/lang/DexCache;",
+                                                 ScopedNullHandle<ClassLoader>()),
+                                   new_len)));
+  if (new_dex_caches.IsNull()) {
+    // Fail.
+    self->AssertPendingOOMException();
+    return false;
+  }
+
+  if (!old_methods.IsNull()) {
+    // Copy the old contents.
+    new_methods->Memcpy(0,
+                        old_methods.Get(),
+                        0,
+                        old_methods->GetLength(),
+                        cl->GetImagePointerSize());
+    new_dex_caches->AsObjectArray<Object>()->AssignableCheckingMemcpy<false>(
+        0, old_dex_caches->AsObjectArray<Object>(), 0, old_dex_caches->GetLength(), false);
+  }
+  // Set the fields.
+  h_this->SetObsoleteArrays(new_methods.Get(), new_dex_caches.Get());
+
+  return true;
+}
+
 ClassExt* ClassExt::Alloc(Thread* self) {
   DCHECK(dalvik_system_ClassExt_.Read() != nullptr);
   return down_cast<ClassExt*>(dalvik_system_ClassExt_.Read()->AllocObject(self).Ptr());
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index 35eaae1..9104631 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -19,8 +19,11 @@
 
 #include "class-inl.h"
 
+#include "array.h"
+#include "dex_cache.h"
 #include "gc_root.h"
 #include "object.h"
+#include "object_array.h"
 #include "object_callbacks.h"
 #include "string.h"
 
@@ -49,6 +52,22 @@
     return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_));
   }
 
+  ObjectArray<DexCache>* GetObsoleteDexCaches() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<DexCache>>(
+        OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_));
+  }
+
+  PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
+  }
+
+  void SetObsoleteArrays(ObjPtr<PointerArray> methods, ObjPtr<ObjectArray<DexCache>> dex_caches)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Extend the obsolete arrays by the given amount.
+  bool ExtendObsoleteArrays(Thread* self, uint32_t increase)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static void SetClass(ObjPtr<Class> dalvik_system_ClassExt);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -57,6 +76,13 @@
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<ObjectArray<DexCache>> obsolete_dex_caches_;
+
+  HeapReference<PointerArray> obsolete_methods_;
+
+  HeapReference<DexCache> original_dex_cache_;
+
+  // The saved verification error of this class.
   HeapReference<Object> verify_error_;
 
   static GcRoot<Class> dalvik_system_ClassExt_;
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index dd32df6..a1110d9 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -67,6 +67,10 @@
 
 // Set by the verifier for a method that could not be verified to follow structured locking.
 static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
+// Set to indicate that the ArtMethod is obsolete and has a different DexCache from it's declaring
+// class.
+// TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
+static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
diff --git a/runtime/object_lock.cc b/runtime/object_lock.cc
index b8754a4..39ab52f 100644
--- a/runtime/object_lock.cc
+++ b/runtime/object_lock.cc
@@ -17,6 +17,7 @@
 #include "object_lock.h"
 
 #include "mirror/object-inl.h"
+#include "mirror/class_ext.h"
 #include "monitor.h"
 
 namespace art {
@@ -61,6 +62,7 @@
 }
 
 template class ObjectLock<mirror::Class>;
+template class ObjectLock<mirror::ClassExt>;
 template class ObjectLock<mirror::Object>;
 template class ObjectTryLock<mirror::Class>;
 template class ObjectTryLock<mirror::Object>;
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index b323aef..0f9fbb2 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -24,6 +24,7 @@
            "ti_heap.cc",
            "ti_method.cc",
            "ti_stack.cc",
+           "ti_redefine.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
     shared_libs: [
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 6480843..d1c2293 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -40,15 +40,16 @@
 #include "base/mutex.h"
 #include "events-inl.h"
 #include "jni_env_ext-inl.h"
-#include "object_tagging.h"
 #include "obj_ptr-inl.h"
+#include "object_tagging.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread_list.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "ti_class.h"
 #include "ti_heap.h"
 #include "ti_method.h"
+#include "ti_redefine.h"
 #include "ti_stack.h"
 #include "transform.h"
 
@@ -1148,6 +1149,8 @@
     if (!IsValidEnv(env)) {
       return ERR(INVALID_ENVIRONMENT);
     }
+    jvmtiError res = OK;
+    std::string error;
     for (jclass klass : classes) {
       JNIEnv* jni_env = nullptr;
       jobject loader = nullptr;
@@ -1183,11 +1186,22 @@
            /*out*/&new_dex_data);
       // Check if anything actually changed.
       if ((new_data_len != 0 || new_dex_data != nullptr) && new_dex_data != dex_data) {
-        MoveTransformedFileIntoRuntime(klass, std::move(location), new_data_len, new_dex_data);
+        res = Redefiner::RedefineClass(env,
+                                       art::Runtime::Current(),
+                                       art::Thread::Current(),
+                                       klass,
+                                       location,
+                                       new_data_len,
+                                       new_dex_data,
+                                       &error);
         env->Deallocate(new_dex_data);
       }
       // Deallocate the old dex data.
       env->Deallocate(dex_data);
+      if (res != OK) {
+        LOG(ERROR) << "FAILURE TO REDEFINE " << error;
+        return res;
+      }
     }
     return OK;
   }
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
new file mode 100644
index 0000000..69bd887
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -0,0 +1,507 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_redefine.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "events-inl.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/class_ext.h"
+#include "mirror/object.h"
+#include "object_lock.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+// Moves dex data to an anonymous, read-only mmap'd region.
+std::unique_ptr<art::MemMap> Redefiner::MoveDataToMemMap(const std::string& original_location,
+                                                         jint data_len,
+                                                         unsigned char* dex_data,
+                                                         std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
+      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
+      nullptr,
+      data_len,
+      PROT_READ|PROT_WRITE,
+      /*low_4gb*/false,
+      /*reuse*/false,
+      error_msg));
+  if (map == nullptr) {
+    return map;
+  }
+  memcpy(map->Begin(), dex_data, data_len);
+  // Make the dex files mmap read only.
+  map->Protect(PROT_READ);
+  return map;
+}
+
+jvmtiError Redefiner::RedefineClass(ArtJvmTiEnv* env,
+                                    art::Runtime* runtime,
+                                    art::Thread* self,
+                                    jclass klass,
+                                    const std::string& original_dex_location,
+                                    jint data_len,
+                                    unsigned char* dex_data,
+                                    std::string* error_msg) {
+  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_dex_location,
+                                                    data_len,
+                                                    dex_data,
+                                                    error_msg));
+  std::ostringstream os;
+  char* generic_ptr_unused = nullptr;
+  char* signature_ptr = nullptr;
+  if (env->GetClassSignature(klass, &signature_ptr, &generic_ptr_unused) != OK) {
+    signature_ptr = const_cast<char*>("<UNKNOWN CLASS>");
+  }
+  if (map.get() == nullptr) {
+    os << "Failed to create anonymous mmap for modified dex file of class " << signature_ptr
+       << "in dex file " << original_dex_location << " because: " << *error_msg;
+    *error_msg = os.str();
+    return ERR(OUT_OF_MEMORY);
+  }
+  if (map->Size() < sizeof(art::DexFile::Header)) {
+    *error_msg = "Could not read dex file header because dex_data was too short";
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  uint32_t checksum = reinterpret_cast<const art::DexFile::Header*>(map->Begin())->checksum_;
+  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map->GetName(),
+                                                                  checksum,
+                                                                  std::move(map),
+                                                                  /*verify*/true,
+                                                                  /*verify_checksum*/true,
+                                                                  error_msg));
+  if (dex_file.get() == nullptr) {
+    os << "Unable to load modified dex file for " << signature_ptr << ": " << *error_msg;
+    *error_msg = os.str();
+    return ERR(INVALID_CLASS_FORMAT);
+  }
+  // Get shared mutator lock.
+  art::ScopedObjectAccess soa(self);
+  art::StackHandleScope<1> hs(self);
+  Redefiner r(runtime, self, klass, signature_ptr, dex_file, error_msg);
+  // Lock around this class to avoid races.
+  art::ObjectLock<art::mirror::Class> lock(self, hs.NewHandle(r.GetMirrorClass()));
+  return r.Run();
+}
+
+// TODO *MAJOR* This should return the actual source java.lang.DexFile object for the klass.
+// TODO Make mirror of DexFile and associated types to make this less hellish.
+// TODO Make mirror of BaseDexClassLoader and associated types to make this less hellish.
+art::mirror::Object* Redefiner::FindSourceDexFileObject(
+    art::Handle<art::mirror::ClassLoader> loader) {
+  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
+  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
+  const char* dex_file_name = "Ldalvik/system/DexFile;";
+  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
+  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
+
+  CHECK(!self_->IsExceptionPending());
+  art::StackHandleScope<11> hs(self_);
+  art::ClassLinker* class_linker = runtime_->GetClassLinker();
+
+  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
+      nullptr));
+  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
+      self_, dex_class_loader_name, null_loader)));
+
+  // Get all the ArtFields so we can look in the BaseDexClassLoader
+  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
+      "pathList", dex_path_list_name);
+  CHECK(path_list_field != nullptr);
+
+  art::ArtField* dex_path_list_element_field =
+      class_linker->FindClass(self_, dex_path_list_name, null_loader)
+        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
+  CHECK(dex_path_list_element_field != nullptr);
+
+  art::ArtField* element_dex_file_field =
+      class_linker->FindClass(self_, dex_path_list_element_name, null_loader)
+        ->FindDeclaredInstanceField("dexFile", dex_file_name);
+  CHECK(element_dex_file_field != nullptr);
+
+  // Check if loader is a BaseDexClassLoader
+  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(loader->GetClass()));
+  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
+    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
+               << "supported class loader type!";
+    return nullptr;
+  }
+  // Start navigating the fields of the loader (now known to be a BaseDexClassLoader derivative)
+  art::Handle<art::mirror::Object> path_list(
+      hs.NewHandle(path_list_field->GetObject(loader.Get())));
+  CHECK(path_list.Get() != nullptr);
+  CHECK(!self_->IsExceptionPending());
+  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
+      dex_path_list_element_field->GetObject(path_list.Get())->
+      AsObjectArray<art::mirror::Object>()));
+  CHECK(!self_->IsExceptionPending());
+  CHECK(dex_elements_list.Get() != nullptr);
+  size_t num_elements = dex_elements_list->GetLength();
+  art::MutableHandle<art::mirror::Object> current_element(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::Object> first_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  // Iterate over the DexPathList$Element to find the right one
+  // TODO Or not ATM just return the first one.
+  for (size_t i = 0; i < num_elements; i++) {
+    current_element.Assign(dex_elements_list->Get(i));
+    CHECK(current_element.Get() != nullptr);
+    CHECK(!self_->IsExceptionPending());
+    CHECK(dex_elements_list.Get() != nullptr);
+    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self_,
+                                                                  dex_path_list_element_name,
+                                                                  null_loader));
+    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
+    // comes from but it is more annoying because we would need to find this class. It is not
+    // necessary for proper function since we just need to be in front of the classes old dex file
+    // in the path.
+    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
+    if (first_dex_file.Get() != nullptr) {
+      return first_dex_file.Get();
+    }
+  }
+  return nullptr;
+}
+
+art::mirror::Class* Redefiner::GetMirrorClass() {
+  return self_->DecodeJObject(klass_)->AsClass();
+}
+
+art::mirror::ClassLoader* Redefiner::GetClassLoader() {
+  return GetMirrorClass()->GetClassLoader();
+}
+
+art::mirror::DexCache* Redefiner::CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader) {
+  return runtime_->GetClassLinker()->RegisterDexFile(*dex_file_, loader.Get());
+}
+
+// TODO Really wishing I had that mirror of java.lang.DexFile now.
+art::mirror::LongArray* Redefiner::AllocateDexFileCookie(
+    art::Handle<art::mirror::Object> java_dex_file_obj) {
+  art::StackHandleScope<2> hs(self_);
+  // mCookie is nulled out if the DexFile has been closed but mInternalCookie sticks around until
+  // the object is finalized. Since they always point to the same array if mCookie is not null we
+  // just use the mInternalCookie field. We will update one or both of these fields later.
+  // TODO Should I get the class from the classloader or directly?
+  art::ArtField* internal_cookie_field = java_dex_file_obj->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  // TODO Add check that mCookie is either null or same as mInternalCookie
+  CHECK(internal_cookie_field != nullptr);
+  art::Handle<art::mirror::LongArray> cookie(
+      hs.NewHandle(internal_cookie_field->GetObject(java_dex_file_obj.Get())->AsLongArray()));
+  // TODO Maybe make these non-fatal.
+  CHECK(cookie.Get() != nullptr);
+  CHECK_GE(cookie->GetLength(), 1);
+  art::Handle<art::mirror::LongArray> new_cookie(
+      hs.NewHandle(art::mirror::LongArray::Alloc(self_, cookie->GetLength() + 1)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  // Copy the oat-dex field at the start.
+  // TODO Should I clear this field?
+  // TODO This is a really crappy thing here with the first element being different.
+  new_cookie->SetWithoutChecks<false>(0, cookie->GetWithoutChecks(0));
+  new_cookie->SetWithoutChecks<false>(
+      1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex_file_.get())));
+  new_cookie->Memcpy(2, cookie.Get(), 1, cookie->GetLength() - 1);
+  return new_cookie.Get();
+}
+
+void Redefiner::RecordFailure(jvmtiError result, const std::string& error_msg) {
+  *error_msg_ = art::StringPrintf("Unable to perform redefinition of '%s': %s",
+                                  class_sig_,
+                                  error_msg.c_str());
+  result_ = result;
+}
+
+bool Redefiner::FinishRemainingAllocations(
+    /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+    /*out*/art::MutableHandle<art::mirror::Object>* java_dex_file_obj,
+    /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+    /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache) {
+  art::StackHandleScope<4> hs(self_);
+  // This shouldn't allocate
+  art::Handle<art::mirror::ClassLoader> loader(hs.NewHandle(GetClassLoader()));
+  if (loader.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(FindSourceDexFileObject(loader)));
+  if (dex_file_obj.Get() == nullptr) {
+    // TODO Better error msg.
+    RecordFailure(ERR(INTERNAL), "Unable to find class loader!");
+    return false;
+  }
+  art::Handle<art::mirror::LongArray> new_cookie(hs.NewHandle(AllocateDexFileCookie(dex_file_obj)));
+  if (new_cookie.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate dex file array for class loader");
+    return false;
+  }
+  art::Handle<art::mirror::DexCache> dex_cache(hs.NewHandle(CreateNewDexCache(loader)));
+  if (dex_cache.Get() == nullptr) {
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate DexCache");
+    return false;
+  }
+  source_class_loader->Assign(loader.Get());
+  java_dex_file_obj->Assign(dex_file_obj.Get());
+  new_dex_file_cookie->Assign(new_cookie.Get());
+  new_dex_cache->Assign(dex_cache.Get());
+  return true;
+}
+
+jvmtiError Redefiner::Run() {
+  art::StackHandleScope<5> hs(self_);
+  // TODO We might want to have a global lock (or one based on the class being redefined at least)
+  // in order to make cleanup easier. Not a huge deal though.
+  //
+  // First we just allocate the ClassExt and its fields that we need. These can be updated
+  // atomically without any issues (since we allocate the map arrays as empty) so we don't bother
+  // doing a try loop. The other allocations we need to ensure that nothing has changed in the time
+  // between allocating them and pausing all threads before we can update them so we need to do a
+  // try loop.
+  if (!EnsureRedefinitionIsValid() || !EnsureClassAllocationsFinished()) {
+    return result_;
+  }
+  art::MutableHandle<art::mirror::ClassLoader> source_class_loader(
+      hs.NewHandle<art::mirror::ClassLoader>(nullptr));
+  art::MutableHandle<art::mirror::Object> java_dex_file(
+      hs.NewHandle<art::mirror::Object>(nullptr));
+  art::MutableHandle<art::mirror::LongArray> new_dex_file_cookie(
+      hs.NewHandle<art::mirror::LongArray>(nullptr));
+  art::MutableHandle<art::mirror::DexCache> new_dex_cache(
+      hs.NewHandle<art::mirror::DexCache>(nullptr));
+  if (!FinishRemainingAllocations(&source_class_loader,
+                                  &java_dex_file,
+                                  &new_dex_file_cookie,
+                                  &new_dex_cache)) {
+    // TODO Null out the ClassExt fields we allocated (if possible, might be racing with another
+    // redefineclass call which made it even bigger. Leak shouldn't be huge (2x array of size
+    // declared_methods_.length) but would be good to get rid of.
+    // new_dex_file_cookie & new_dex_cache should be cleaned up by the GC.
+    return result_;
+  }
+  // Get the mirror class now that we aren't allocating anymore.
+  art::Handle<art::mirror::Class> art_class(hs.NewHandle(GetMirrorClass()));
+  // Enable assertion that this thread isn't interrupted during this installation.
+  // After this we will need to do real cleanup in case of failure. Prior to this we could simply
+  // return and would let everything get cleaned up or harmlessly leaked.
+  // Do transition to final suspension
+  // TODO We might want to give this its own suspended state!
+  // TODO This isn't right. We need to change state without any chance of suspend ideally!
+  self_->TransitionFromRunnableToSuspended(art::ThreadState::kNative);
+  runtime_->GetThreadList()->SuspendAll(
+      "Final installation of redefined Class!", /*long_suspend*/true);
+  // TODO Might want to move this into a different type.
+  // Now we reach the part where we must do active cleanup if something fails.
+  // TODO We should really Retry if this fails instead of simply aborting.
+  // Set the new DexFileCookie returns the original so we can fix it back up if redefinition fails
+  art::ObjPtr<art::mirror::LongArray> original_dex_file_cookie(nullptr);
+  if (!UpdateJavaDexFile(java_dex_file.Get(),
+                         new_dex_file_cookie.Get(),
+                         &original_dex_file_cookie)) {
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  if (!UpdateClass(art_class.Get(), new_dex_cache.Get())) {
+    // TODO Should have some form of scope to do this.
+    RestoreJavaDexFile(java_dex_file.Get(), original_dex_file_cookie);
+    // Release suspendAll
+    runtime_->GetThreadList()->ResumeAll();
+    // Get back shared mutator lock as expected for return.
+    self_->TransitionFromSuspendedToRunnable();
+    return result_;
+  }
+  // Update the ClassObjects Keep the old DexCache (and other stuff) around so we can restore
+  // functions/fields.
+  // Verify the new Class.
+  //   Failure then undo updates to class
+  // Do stack walks and allocate obsolete methods
+  // Shrink the obsolete method maps if possible?
+  // TODO find appropriate class loader. Allocate new dex files array. Pause all java treads.
+  // Replace dex files array. Do stack scan + allocate obsoletes. Remove array if possible.
+  // TODO We might want to ensure that all threads are stopped for this!
+  // AddDexToClassPath();
+  // TODO
+  // Release suspendAll
+  // TODO Put this into a scoped thing.
+  runtime_->GetThreadList()->ResumeAll();
+  // Get back shared mutator lock as expected for return.
+  self_->TransitionFromSuspendedToRunnable();
+  // TODO Do this at a more reasonable place.
+  dex_file_.release();
+  return OK;
+}
+
+void Redefiner::RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                   art::ObjPtr<art::mirror::LongArray> orig_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  art::ObjPtr<art::mirror::LongArray> new_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  if (!new_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, orig_cookie);
+  }
+}
+
+// Performs updates to class that will allow us to verify it.
+bool Redefiner::UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                            art::ObjPtr<art::mirror::DexCache> new_dex_cache) {
+  art::ClassLinker* linker = runtime_->GetClassLinker();
+  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
+  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
+      *dex_file_, class_sig_, art::ComputeModifiedUtf8Hash(class_sig_));
+  if (class_def == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS_FORMAT), "Unable to find ClassDef!");
+    return false;
+  }
+  const art::DexFile::TypeId& declaring_class_id = dex_file_->GetTypeId(class_def->class_idx_);
+  const art::DexFile& old_dex_file = mclass->GetDexFile();
+  for (art::ArtMethod& method : mclass->GetMethods(image_pointer_size)) {
+    const art::DexFile::StringId* new_name_id = dex_file_->FindStringId(method.GetName());
+    art::dex::TypeIndex method_return_idx =
+        dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(method.GetReturnTypeDescriptor()));
+    const auto* old_type_list = method.GetParameterTypeList();
+    std::vector<art::dex::TypeIndex> new_type_list;
+    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
+      new_type_list.push_back(
+          dex_file_->GetIndexForTypeId(
+              *dex_file_->FindTypeId(
+                  old_dex_file.GetTypeDescriptor(
+                      old_dex_file.GetTypeId(
+                          old_type_list->GetTypeItem(i).type_idx_)))));
+    }
+    const art::DexFile::ProtoId* proto_id = dex_file_->FindProtoId(method_return_idx,
+                                                                   new_type_list);
+    CHECK(proto_id != nullptr || old_type_list == nullptr);
+    // TODO Return false, cleanup.
+    const art::DexFile::MethodId* method_id = dex_file_->FindMethodId(declaring_class_id,
+                                                                      *new_name_id,
+                                                                      *proto_id);
+    CHECK(method_id != nullptr);
+    // TODO Return false, cleanup.
+    uint32_t dex_method_idx = dex_file_->GetIndexForMethodId(*method_id);
+    method.SetDexMethodIndex(dex_method_idx);
+    linker->SetEntryPointsToInterpreter(&method);
+    method.SetCodeItemOffset(dex_file_->FindCodeItemOffset(*class_def, dex_method_idx));
+    method.SetDexCacheResolvedMethods(new_dex_cache->GetResolvedMethods(), image_pointer_size);
+    method.SetDexCacheResolvedTypes(new_dex_cache->GetResolvedTypes(), image_pointer_size);
+  }
+  // Update the class fields.
+  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
+  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
+  mclass->SetDexCache(new_dex_cache.Ptr());
+  mclass->SetDexCacheStrings(new_dex_cache->GetStrings());
+  mclass->SetDexClassDefIndex(dex_file_->GetIndexForClassDef(*class_def));
+  mclass->SetDexTypeIndex(dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(class_sig_)));
+  return true;
+}
+
+bool Redefiner::UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                                  art::ObjPtr<art::mirror::LongArray> new_cookie,
+                                  /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie) {
+  art::ArtField* internal_cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mInternalCookie", "Ljava/lang/Object;");
+  art::ArtField* cookie_field = java_dex_file->GetClass()->FindDeclaredInstanceField(
+      "mCookie", "Ljava/lang/Object;");
+  CHECK(internal_cookie_field != nullptr);
+  art::ObjPtr<art::mirror::LongArray> orig_internal_cookie(
+      internal_cookie_field->GetObject(java_dex_file)->AsLongArray());
+  art::ObjPtr<art::mirror::LongArray> orig_cookie(
+      cookie_field->GetObject(java_dex_file)->AsLongArray());
+  internal_cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  *original_cookie = orig_internal_cookie;
+  if (!orig_cookie.IsNull()) {
+    cookie_field->SetObject<false>(java_dex_file, new_cookie);
+  }
+  return true;
+}
+
+// This function does all (java) allocations we need to do for the Class being redefined.
+// TODO Change this name maybe?
+bool Redefiner::EnsureClassAllocationsFinished() {
+  art::StackHandleScope<2> hs(self_);
+  art::Handle<art::mirror::Class> klass(hs.NewHandle(self_->DecodeJObject(klass_)->AsClass()));
+  if (klass.Get() == nullptr) {
+    RecordFailure(ERR(INVALID_CLASS), "Unable to decode class argument!");
+    return false;
+  }
+  // Allocate the classExt
+  art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->EnsureExtDataPresent(self_)));
+  if (ext.Get() == nullptr) {
+    // No memory. Clear exception (it's not useful) and return error.
+    // TODO This doesn't need to be fatal. We could just not support obsolete methods after hitting
+    // this case.
+    self_->AssertPendingOOMException();
+    self_->ClearException();
+    RecordFailure(ERR(OUT_OF_MEMORY), "Could not allocate ClassExt");
+    return false;
+  }
+  // Allocate the 2 arrays that make up the obsolete methods map.  Since the contents of the arrays
+  // are only modified when all threads (other than the modifying one) are suspended we don't need
+  // to worry about missing the unsyncronized writes to the array. We do synchronize when setting it
+  // however, since that can happen at any time.
+  // TODO Clear these after we walk the stacks in order to free them in the (likely?) event there
+  // are no obsolete methods.
+  {
+    art::ObjectLock<art::mirror::ClassExt> lock(self_, ext);
+    if (!ext->ExtendObsoleteArrays(
+          self_, klass->GetDeclaredMethodsSlice(art::kRuntimePointerSize).size())) {
+      // OOM. Clear exception and return error.
+      self_->AssertPendingOOMException();
+      self_->ClearException();
+      RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate/extend obsolete methods map");
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
new file mode 100644
index 0000000..f3a5834
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -0,0 +1,168 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
+
+#include <string>
+
+#include <jni.h>
+
+#include "art_jvmti.h"
+#include "art_method.h"
+#include "class_linker.h"
+#include "dex_file.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti.h"
+#include "linear_alloc.h"
+#include "mem_map.h"
+#include "mirror/array-inl.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
+#include "mirror/class_loader-inl.h"
+#include "mirror/string-inl.h"
+#include "oat_file.h"
+#include "obj_ptr.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread_list.h"
+#include "transform.h"
+#include "utf.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace openjdkjvmti {
+
+// Class that can redefine a single class's methods.
+class Redefiner {
+ public:
+  // Redefine the given class with the given dex data. Note this function does not take ownership of
+  // the dex_data pointer. It is not used after this call however and may be freed if desired.
+  // The caller is responsible for freeing it. The runtime makes it's own copy of the data.
+  static jvmtiError RedefineClass(ArtJvmTiEnv* env,
+                                  art::Runtime* runtime,
+                                  art::Thread* self,
+                                  jclass klass,
+                                  const std::string& original_dex_location,
+                                  jint data_len,
+                                  unsigned char* dex_data,
+                                  std::string* error_msg);
+
+ private:
+  jvmtiError result_;
+  art::Runtime* runtime_;
+  art::Thread* self_;
+  // Kept as a jclass since we have weird run-state changes that make keeping it around as a
+  // mirror::Class difficult and confusing.
+  jclass klass_;
+  std::unique_ptr<const art::DexFile> dex_file_;
+  std::string* error_msg_;
+  char* class_sig_;
+
+  // TODO Maybe change jclass to a mirror::Class
+  Redefiner(art::Runtime* runtime,
+            art::Thread* self,
+            jclass klass,
+            char* class_sig,
+            std::unique_ptr<const art::DexFile>& redefined_dex_file,
+            std::string* error_msg)
+      : result_(ERR(INTERNAL)),
+        runtime_(runtime),
+        self_(self),
+        klass_(klass),
+        dex_file_(std::move(redefined_dex_file)),
+        error_msg_(error_msg),
+        class_sig_(class_sig) { }
+
+  static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
+                                                       jint data_len,
+                                                       unsigned char* dex_data,
+                                                       std::string* error_msg);
+
+  // TODO Put on all the lock qualifiers.
+  jvmtiError Run() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  bool FinishRemainingAllocations(
+        /*out*/art::MutableHandle<art::mirror::ClassLoader>* source_class_loader,
+        /*out*/art::MutableHandle<art::mirror::Object>* source_dex_file_obj,
+        /*out*/art::MutableHandle<art::mirror::LongArray>* new_dex_file_cookie,
+        /*out*/art::MutableHandle<art::mirror::DexCache>* new_dex_cache)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Preallocates all needed allocations in klass so that we can pause execution safely.
+  // TODO We should be able to free the arrays if they end up not being used. Investigate doing this
+  // in the future. For now we will just take the memory hit.
+  bool EnsureClassAllocationsFinished() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // This finds the java.lang.DexFile we will add the native DexFile to as part of the classpath.
+  // TODO Make sure the DexFile object returned is the one that the klass_ actually comes from.
+  art::mirror::Object* FindSourceDexFileObject(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::Class* GetMirrorClass() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  // Allocates and fills the new DexFileCookie
+  art::mirror::LongArray* AllocateDexFileCookie(art::Handle<art::mirror::Object> java_dex_file_obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  art::mirror::DexCache* CreateNewDexCache(art::Handle<art::mirror::ClassLoader> loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  void RecordFailure(jvmtiError result, const std::string& error_msg);
+
+  // TODO Actually write this.
+  // This will check that no constraints are violated (more than 1 class in dex file, any changes in
+  // number/declaration of methods & fields, changes in access flags, etc.)
+  bool EnsureRedefinitionIsValid() {
+    return true;
+  }
+
+  bool UpdateJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                         art::ObjPtr<art::mirror::LongArray> new_cookie,
+                         /*out*/art::ObjPtr<art::mirror::LongArray>* original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  void RestoreJavaDexFile(art::ObjPtr<art::mirror::Object> java_dex_file,
+                          art::ObjPtr<art::mirror::LongArray> original_cookie)
+      REQUIRES(art::Locks::mutator_lock_);
+
+  bool UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
+                   art::ObjPtr<art::mirror::DexCache> new_dex_cache)
+      REQUIRES(art::Locks::mutator_lock_);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_REDEFINE_H_
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 7bb5205..f7b8b92 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -29,8 +29,12 @@
  * questions.
  */
 
+#include <unordered_map>
+#include <unordered_set>
+
 #include "transform.h"
 
+#include "art_method.h"
 #include "class_linker.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -46,6 +50,7 @@
 #include "mirror/string-inl.h"
 #include "oat_file.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 #include "transform.h"
 #include "utf.h"
@@ -53,196 +58,7 @@
 
 namespace openjdkjvmti {
 
-static bool ReadChecksum(jint data_len, const unsigned char* dex, /*out*/uint32_t* res) {
-  if (data_len < static_cast<jint>(sizeof(art::DexFile::Header))) {
-    return false;
-  }
-  *res = reinterpret_cast<const art::DexFile::Header*>(dex)->checksum_;
-  return true;
-}
-
-static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
-                                                      jint data_len,
-                                                      unsigned char* dex_data) {
-  std::string error_msg;
-  std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
-      art::StringPrintf("%s-transformed", original_location.c_str()).c_str(),
-      nullptr,
-      data_len,
-      PROT_READ|PROT_WRITE,
-      /*low_4gb*/false,
-      /*reuse*/false,
-      &error_msg));
-  if (map == nullptr) {
-    return map;
-  }
-  memcpy(map->Begin(), dex_data, data_len);
-  map->Protect(PROT_READ);
-  return map;
-}
-
-static void InvalidateExistingMethods(art::Thread* self,
-                                      art::Handle<art::mirror::Class> klass,
-                                      art::Handle<art::mirror::DexCache> cache,
-                                      const art::DexFile* dex_file)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  // Create new DexCache with new DexFile.
-  // reset dex_class_def_idx_
-  // for each method reset entry_point_from_quick_compiled_code_ to bridge
-  // for each method reset dex_code_item_offset_
-  // for each method reset dex_method_index_
-  // for each method set dex_cache_resolved_methods_ to new DexCache
-  // for each method set dex_cache_resolved_types_ to new DexCache
-  auto* runtime = art::Runtime::Current();
-  art::ClassLinker* linker = runtime->GetClassLinker();
-  art::PointerSize image_pointer_size = linker->GetImagePointerSize();
-  std::string descriptor_storage;
-  const char* descriptor = klass->GetDescriptor(&descriptor_storage);
-  // Get the new class def
-  const art::DexFile::ClassDef* class_def = art::OatFile::OatDexFile::FindClassDef(
-      *dex_file, descriptor, art::ComputeModifiedUtf8Hash(descriptor));
-  CHECK(class_def != nullptr);
-  const art::DexFile::TypeId& declaring_class_id = dex_file->GetTypeId(class_def->class_idx_);
-  art::StackHandleScope<6> hs(self);
-  const art::DexFile& old_dex_file = klass->GetDexFile();
-  for (art::ArtMethod& method : klass->GetMethods(image_pointer_size)) {
-    // Find the code_item for the method then find the dex_method_index and dex_code_item_offset to
-    // set.
-    const art::DexFile::StringId* new_name_id = dex_file->FindStringId(method.GetName());
-    art::dex::TypeIndex method_return_idx =
-        dex_file->GetIndexForTypeId(*dex_file->FindTypeId(method.GetReturnTypeDescriptor()));
-    const auto* old_type_list = method.GetParameterTypeList();
-    std::vector<art::dex::TypeIndex> new_type_list;
-    for (uint32_t i = 0; old_type_list != nullptr && i < old_type_list->Size(); i++) {
-      new_type_list.push_back(
-          dex_file->GetIndexForTypeId(
-              *dex_file->FindTypeId(
-                  old_dex_file.GetTypeDescriptor(
-                      old_dex_file.GetTypeId(
-                          old_type_list->GetTypeItem(i).type_idx_)))));
-    }
-    const art::DexFile::ProtoId* proto_id = dex_file->FindProtoId(method_return_idx,
-                                                                  new_type_list);
-    CHECK(proto_id != nullptr || old_type_list == nullptr);
-    const art::DexFile::MethodId* method_id = dex_file->FindMethodId(declaring_class_id,
-                                                                      *new_name_id,
-                                                                      *proto_id);
-    CHECK(method_id != nullptr);
-    uint32_t dex_method_idx = dex_file->GetIndexForMethodId(*method_id);
-    method.SetDexMethodIndex(dex_method_idx);
-    linker->SetEntryPointsToInterpreter(&method);
-    method.SetCodeItemOffset(dex_file->FindCodeItemOffset(*class_def, dex_method_idx));
-    method.SetDexCacheResolvedMethods(cache->GetResolvedMethods(), image_pointer_size);
-    method.SetDexCacheResolvedTypes(cache->GetResolvedTypes(), image_pointer_size);
-  }
-
-  // Update the class fields.
-  // Need to update class last since the ArtMethod gets its DexFile from the class (which is needed
-  // to call GetReturnTypeDescriptor and GetParameterTypeList above).
-  klass->SetDexCache(cache.Get());
-  klass->SetDexCacheStrings(cache->GetStrings());
-  klass->SetDexClassDefIndex(dex_file->GetIndexForClassDef(*class_def));
-  klass->SetDexTypeIndex(dex_file->GetIndexForTypeId(*dex_file->FindTypeId(descriptor)));
-}
-
-// Adds the dex file.
-static art::mirror::LongArray* InsertDexFileIntoArray(art::Thread* self,
-                                                      const art::DexFile* dex,
-                                                      art::Handle<art::mirror::LongArray>& orig)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  art::StackHandleScope<1> hs(self);
-  CHECK_GE(orig->GetLength(), 1);
-  art::Handle<art::mirror::LongArray> ret(
-      hs.NewHandle(art::mirror::LongArray::Alloc(self, orig->GetLength() + 1)));
-  CHECK(ret.Get() != nullptr);
-  // Copy the oat-dex.
-  // TODO Should I clear the oatdex element?
-  ret->SetWithoutChecks<false>(0, orig->GetWithoutChecks(0));
-  ret->SetWithoutChecks<false>(1, static_cast<int64_t>(reinterpret_cast<intptr_t>(dex)));
-  ret->Memcpy(2, orig.Get(), 1, orig->GetLength() - 1);
-  return ret.Get();
-}
-
-// TODO Handle all types of class loaders.
-static bool FindDalvikSystemDexFileAndLoaderForClass(
-    art::Handle<art::mirror::Class> klass,
-    /*out*/art::mirror::Object** dex_file,
-    /*out*/art::mirror::ClassLoader** loader)
-      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  const char* dex_path_list_element_array_name = "[Ldalvik/system/DexPathList$Element;";
-  const char* dex_path_list_element_name = "Ldalvik/system/DexPathList$Element;";
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  const char* dex_path_list_name = "Ldalvik/system/DexPathList;";
-  const char* dex_class_loader_name = "Ldalvik/system/BaseDexClassLoader;";
-
-  art::Thread* self = art::Thread::Current();
-  CHECK(!self->IsExceptionPending());
-  art::StackHandleScope<11> hs(self);
-  art::ClassLinker* class_linker = art::Runtime::Current()->GetClassLinker();
-
-  art::Handle<art::mirror::ClassLoader> null_loader(hs.NewHandle<art::mirror::ClassLoader>(
-      nullptr));
-  art::Handle<art::mirror::Class> base_dex_loader_class(hs.NewHandle(class_linker->FindClass(
-      self, dex_class_loader_name, null_loader)));
-
-  art::ArtField* path_list_field = base_dex_loader_class->FindDeclaredInstanceField(
-      "pathList", dex_path_list_name);
-  CHECK(path_list_field != nullptr);
-
-  art::ArtField* dex_path_list_element_field =
-      class_linker->FindClass(self, dex_path_list_name, null_loader)
-        ->FindDeclaredInstanceField("dexElements", dex_path_list_element_array_name);
-  CHECK(dex_path_list_element_field != nullptr);
-
-  art::ArtField* element_dex_file_field =
-      class_linker->FindClass(self, dex_path_list_element_name, null_loader)
-        ->FindDeclaredInstanceField("dexFile", dex_file_name);
-  CHECK(element_dex_file_field != nullptr);
-
-  art::Handle<art::mirror::ClassLoader> h_class_loader(hs.NewHandle(klass->GetClassLoader()));
-  art::Handle<art::mirror::Class> loader_class(hs.NewHandle(h_class_loader->GetClass()));
-  // Check if loader is a BaseDexClassLoader
-  if (!loader_class->IsSubClass(base_dex_loader_class.Get())) {
-    LOG(ERROR) << "The classloader is not a BaseDexClassLoader which is currently the only "
-               << "supported class loader type!";
-    return false;
-  }
-  art::Handle<art::mirror::Object> path_list(
-      hs.NewHandle(path_list_field->GetObject(h_class_loader.Get())));
-  CHECK(path_list.Get() != nullptr);
-  CHECK(!self->IsExceptionPending());
-  art::Handle<art::mirror::ObjectArray<art::mirror::Object>> dex_elements_list(hs.NewHandle(
-      dex_path_list_element_field->GetObject(path_list.Get())->
-      AsObjectArray<art::mirror::Object>()));
-  CHECK(!self->IsExceptionPending());
-  CHECK(dex_elements_list.Get() != nullptr);
-  size_t num_elements = dex_elements_list->GetLength();
-  art::MutableHandle<art::mirror::Object> current_element(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  art::MutableHandle<art::mirror::Object> first_dex_file(
-      hs.NewHandle<art::mirror::Object>(nullptr));
-  for (size_t i = 0; i < num_elements; i++) {
-    current_element.Assign(dex_elements_list->Get(i));
-    CHECK(current_element.Get() != nullptr);
-    CHECK(!self->IsExceptionPending());
-    CHECK(dex_elements_list.Get() != nullptr);
-    CHECK_EQ(current_element->GetClass(), class_linker->FindClass(self,
-                                                                  dex_path_list_element_name,
-                                                                  null_loader));
-    // TODO It would be cleaner to put the art::DexFile into the dalvik.system.DexFile the class
-    // comes from but it is more annoying because we would need to find this class. It is not
-    // necessary for proper function since we just need to be in front of the classes old dex file
-    // in the path.
-    first_dex_file.Assign(element_dex_file_field->GetObject(current_element.Get()));
-    if (first_dex_file.Get() != nullptr) {
-      *dex_file = first_dex_file.Get();
-      *loader = h_class_loader.Get();
-      return true;
-    }
-  }
-  return false;
-}
-
+// TODO Move this function somewhere more appropriate.
 // Gets the data surrounding the given class.
 jvmtiError GetTransformationData(ArtJvmTiEnv* env,
                                  jclass klass,
@@ -281,83 +97,4 @@
   return OK;
 }
 
-// Install the new dex file.
-// TODO do error checks for bad state (method in a stack, changes to number of methods/fields/etc).
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          const std::string& original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data) {
-  const char* dex_file_name = "Ldalvik/system/DexFile;";
-  art::Thread* self = art::Thread::Current();
-  art::Runtime* runtime = art::Runtime::Current();
-  art::ThreadList* threads = runtime->GetThreadList();
-  art::ClassLinker* class_linker = runtime->GetClassLinker();
-  uint32_t checksum = 0;
-  if (!ReadChecksum(data_len, dex_data, &checksum)) {
-    return ERR(INVALID_CLASS_FORMAT);
-  }
-
-  std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_location, data_len, dex_data));
-  if (map.get() == nullptr) {
-    return ERR(INTERNAL);
-  }
-  std::string error_msg;
-  // Load the new dex_data in memory (mmap it, etc)
-  std::unique_ptr<const art::DexFile> new_dex_file = art::DexFile::Open(map->GetName(),
-                                                                        checksum,
-                                                                        std::move(map),
-                                                                        /*verify*/ true,
-                                                                        /*verify_checksum*/ true,
-                                                                        &error_msg);
-  CHECK(new_dex_file.get() != nullptr) << "Unable to load dex file! " << error_msg;
-
-  // Get mutator lock. We need the lifetimes of these variables (hs, the classes, etc.) to be longer
-  // then current lock (since there isn't upgrading of the lock) so we don't use soa.
-  art::ThreadState old_state = self->TransitionFromSuspendedToRunnable();
-  // This scope is needed to make sure that the HandleScope dies with mutator_lock_ since we need to
-  // upgrade the mutator_lock during the execution.
-  {
-    art::StackHandleScope<11> hs(self);
-    art::Handle<art::mirror::ClassLoader> null_loader(
-        hs.NewHandle<art::mirror::ClassLoader>(nullptr));
-    CHECK(null_loader.Get() == nullptr);
-    art::ArtField* dex_file_cookie_field = class_linker->
-        FindClass(self, dex_file_name, null_loader)->
-        FindDeclaredInstanceField("mCookie", "Ljava/lang/Object;");
-    art::ArtField* dex_file_internal_cookie_field =
-        class_linker->FindClass(self, dex_file_name, null_loader)
-          ->FindDeclaredInstanceField("mInternalCookie", "Ljava/lang/Object;");
-    CHECK(dex_file_cookie_field != nullptr);
-    art::Handle<art::mirror::Class> klass(hs.NewHandle(self->DecodeJObject(jklass)->AsClass()));
-    art::mirror::Object* dex_file_ptr = nullptr;
-    art::mirror::ClassLoader* class_loader_ptr = nullptr;
-    // Find dalvik.system.DexFile that represents the dex file we are changing.
-    if (!FindDalvikSystemDexFileAndLoaderForClass(klass, &dex_file_ptr, &class_loader_ptr)) {
-      self->TransitionFromRunnableToSuspended(old_state);
-      LOG(ERROR) << "Could not find DexFile.";
-      return ERR(INTERNAL);
-    }
-    art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(dex_file_ptr));
-    art::Handle<art::mirror::ClassLoader> class_loader(hs.NewHandle(class_loader_ptr));
-    art::Handle<art::mirror::LongArray> art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            dex_file_cookie_field->GetObject(dex_file_obj.Get())->AsLongArray()));
-    art::Handle<art::mirror::LongArray> new_art_dex_array(
-        hs.NewHandle<art::mirror::LongArray>(
-            InsertDexFileIntoArray(self, new_dex_file.get(), art_dex_array)));
-    art::Handle<art::mirror::DexCache> cache(
-        hs.NewHandle(class_linker->RegisterDexFile(*new_dex_file.get(), class_loader.Get())));
-    self->TransitionFromRunnableToSuspended(old_state);
-
-    threads->SuspendAll("moving dex file into runtime", /*long_suspend*/true);
-    // Change the mCookie field. Old value will be GC'd as normal.
-    dex_file_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    dex_file_internal_cookie_field->SetObject<false>(dex_file_obj.Get(), new_art_dex_array.Get());
-    // Invalidate existing methods.
-    InvalidateExistingMethods(self, klass, cache, new_dex_file.release());
-  }
-  threads->ResumeAll();
-  return OK;
-}
-
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index a76ed93..35b990b 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -52,12 +52,6 @@
                                  /*out*/jint* data_len,
                                  /*out*/unsigned char** dex_data);
 
-// Install the new dex file.
-jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          const std::string& original_location,
-                                          jint data_len,
-                                          unsigned char* dex_data);
-
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_TRANSFORM_H_
diff --git a/test/624-checker-stringops/src/Main.java b/test/624-checker-stringops/src/Main.java
index 34e8283..d965e3f 100644
--- a/test/624-checker-stringops/src/Main.java
+++ b/test/624-checker-stringops/src/Main.java
@@ -98,9 +98,170 @@
     return k;
   }
 
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null1>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null2>>]             intrinsic:StringBufferLength
+  //
+  /// CHECK-START: int Main.bufferLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength
+  static int bufferLen2() {
+    StringBuffer s = new StringBuffer();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Allows combining of returned "this". Also ensures that similar looking append() calls
+  // are not combined somehow through returned result.
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>]   intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]
+  /// CHECK-DAG:                  InvokeVirtual [<<Null3>>]             intrinsic:StringBuilderLength
+  //
+  /// CHECK-START: int Main.builderLen2() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance
+  /// CHECK-DAG: <<String1:l\d+>> LoadString
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG: <<String2:l\d+>> LoadString
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength
+  static int builderLen2() {
+    StringBuilder s = new StringBuilder();
+    return s.append("x").append("x").length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBufferLength  loop:none
+  //
+  /// CHECK-START: int Main.bufferLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBufferAppend  loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBufferLength  loop:none
+  static int bufferLoopAppender() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // Similar situation in a loop.
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (before)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                         loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                          loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Null1:l\d+>>   NullCheck     [<<New>>]                                             loop:<<Loop>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<Null1>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null2:l\d+>>   NullCheck     [<<Append1>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<Null2>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                          loop:<<Loop>>
+  /// CHECK-DAG: <<Null3:l\d+>>   NullCheck     [<<Append2>>]                                         loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<Null3>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<Null4:l\d+>>   NullCheck     [<<New>>]                                             loop:none
+  /// CHECK-DAG:                  InvokeVirtual [<<Null4>>]             intrinsic:StringBuilderLength loop:none
+  //
+  /// CHECK-START: int Main.builderLoopAppender() instruction_simplifier (after)
+  /// CHECK-DAG: <<New:l\d+>>     NewInstance                                                       loop:none
+  /// CHECK-DAG: <<String1:l\d+>> LoadString                                                        loop:<<Loop:B\d+>>
+  /// CHECK-DAG: <<Append1:l\d+>> InvokeVirtual [<<New>>,<<String1>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String2:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append2:l\d+>> InvokeVirtual [<<New>>,<<String2>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG: <<String3:l\d+>> LoadString                                                        loop:<<Loop>>
+  /// CHECK-DAG: <<Append3:l\d+>> InvokeVirtual [<<New>>,<<String3>>] intrinsic:StringBuilderAppend loop:<<Loop>>
+  /// CHECK-DAG:                  InvokeVirtual [<<New>>]             intrinsic:StringBuilderLength loop:none
+  static int builderLoopAppender() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      b.append("x").append("y").append("z");
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBufferToString     loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.bufferDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBufferToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int bufferDeadLoop() {
+    StringBuffer b = new StringBuffer();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
+  //
+  // All calls in the loop-body and thus loop can be eliminated.
+  //
+  /// CHECK-START: int Main.builderDeadLoop() instruction_simplifier (before)
+  /// CHECK-DAG: Phi                                              loop:<<Loop:B\d+>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringBuilderToString    loop:<<Loop>>
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.builderDeadLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringBuilderToString
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int builderDeadLoop() {
+    StringBuilder b = new StringBuilder();
+    for (int i = 0; i < 10; i++) {
+      int d = b.toString().indexOf("x", 1);
+    }
+    return b.length();
+  }
+
   public static void main(String[] args) {
     expectEquals(1865, liveIndexOf());
     expectEquals(29, deadIndexOf());
+
     try {
       indexOfExceptions(null, XYZ);
       throw new Error("Expected: NPE");
@@ -113,6 +274,13 @@
     }
     expectEquals(598, indexOfExceptions(ABC, XYZ));
 
+    expectEquals(2, bufferLen2());
+    expectEquals(2, builderLen2());
+    expectEquals(30, bufferLoopAppender());
+    expectEquals(30, builderLoopAppender());
+    expectEquals(0, bufferDeadLoop());
+    expectEquals(0, builderDeadLoop());
+
     System.out.println("passed");
   }
 
diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk
index 72616a1..5ae961a 100644
--- a/test/Android.arm_vixl.mk
+++ b/test/Android.arm_vixl.mk
@@ -16,9 +16,6 @@
 
 # Known broken tests for the ARM VIXL backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \
-  103-string-append \
-  137-cfi \
   488-checker-inline-recursive-calls \
   552-checker-sharpening \
   562-checker-no-intermediate \
-  602-deoptimizeable \