Merge "Disable the CC collector / read barrier checks in non-debug build."
diff --git a/Android.mk b/Android.mk
index bb1334a..5da9bdd 100644
--- a/Android.mk
+++ b/Android.mk
@@ -360,6 +360,19 @@
 
 endif  # art_test_bother
 
+# Valgrind.
+.PHONY: valgrind-test-art-target
+valgrind-test-art-target: valgrind-test-art-target-gtest
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target32
+valgrind-test-art-target32: valgrind-test-art-target-gtest32
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
+.PHONY: valgrind-test-art-target64
+valgrind-test-art-target64: valgrind-test-art-target-gtest64
+	$(hide) $(call ART_TEST_PREREQ_FINISHED,$@)
+
 ########################################################################
 # oat-target and oat-target-sync rules
 
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 0235a30..fc4dd55 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -347,7 +347,7 @@
       ifdef SANITIZE_TARGET
         art_target_non_debug_cflags += -Wframe-larger-than=6400
       else
-        art_target_non_debug_cflags += -Wframe-larger-than=1728
+        art_target_non_debug_cflags += -Wframe-larger-than=1736
       endif
     endif
   endif
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 3b459c3..a14265e 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -279,6 +279,8 @@
   compiler/utils/intrusive_forward_list_test.cc \
   compiler/utils/swap_space_test.cc \
   compiler/utils/test_dex_file_builder_test.cc \
+  compiler/utils/transform_array_ref_test.cc \
+  compiler/utils/transform_iterator_test.cc \
 
 COMPILER_GTEST_COMMON_SRC_FILES_all := \
   compiler/jni/jni_cfi_test.cc \
@@ -426,6 +428,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_TEST_HOST_GTEST_DEPENDENCIES :=
 
 ART_GTEST_TARGET_ANDROID_ROOT := '/system'
@@ -433,6 +438,28 @@
   ART_GTEST_TARGET_ANDROID_ROOT := $(ART_TEST_ANDROID_ROOT)
 endif
 
+ART_VALGRIND_TARGET_DEPENDENCIES := \
+  $(TARGET_OUT_EXECUTABLES)/valgrind \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/default.supp
+
+ifdef TARGET_2ND_ARCH
+ART_VALGRIND_TARGET_DEPENDENCIES += \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/memcheck-$(TARGET_2ND_ARCH)-linux \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_core-$(TARGET_2ND_ARCH)-linux.so \
+  $(TARGET_OUT_SHARED_LIBRARIES)/valgrind/vgpreload_memcheck-$(TARGET_2ND_ARCH)-linux.so
+endif
+
+include $(CLEAR_VARS)
+LOCAL_MODULE := valgrind-target-suppressions.txt
+LOCAL_MODULE_CLASS := ETC
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := test/valgrind-target-suppressions.txt
+LOCAL_MODULE_PATH := $(ART_TARGET_TEST_OUT)
+include $(BUILD_PREBUILT)
+
 # Define a make rule for a target device gtest.
 # $(1): gtest name - the name of the test we're building such as leb128_test.
 # $(2): 2ND_ or undefined - used to differentiate between the primary and secondary architecture.
@@ -449,7 +476,8 @@
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libjavacore.so \
     $$($(2)TARGET_OUT_SHARED_LIBRARIES)/libopenjdkd.so \
     $$(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar \
-    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
+    $$(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar \
+    $$(ART_TARGET_TEST_OUT)/valgrind-target-suppressions.txt
 
 .PHONY: $$(gtest_rule)
 $$(gtest_rule): test-art-target-sync
@@ -468,7 +496,27 @@
   ART_TEST_TARGET_GTEST_RULES += $$(gtest_rule)
   ART_TEST_TARGET_GTEST_$(1)_RULES += $$(gtest_rule)
 
+.PHONY: valgrind-$$(gtest_rule)
+valgrind-$$(gtest_rule): $(ART_VALGRIND_TARGET_DEPENDENCIES) test-art-target-sync
+	$(hide) adb shell touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell rm $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID
+	$(hide) adb shell chmod 755 $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1)
+	$(hide) $$(call ART_TEST_SKIP,$$@) && \
+	  (adb shell "$(GCOV_ENV) LD_LIBRARY_PATH=$(3) ANDROID_ROOT=$(ART_GTEST_TARGET_ANDROID_ROOT) \
+	    valgrind --leak-check=full --error-exitcode=1 --workaround-gcc296-bugs=yes \
+	    --suppressions=$(ART_TARGET_TEST_DIR)/valgrind-target-suppressions.txt \
+	    $(ART_TARGET_NATIVETEST_DIR)/$(TARGET_$(2)ARCH)/$(1) && touch $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID" \
+	  && (adb pull $(ART_TARGET_TEST_DIR)/$(TARGET_$(2)ARCH)/$$@-$$$$PPID /tmp/ \
+	      && $$(call ART_TEST_PASSED,$$@)) \
+	  || $$(call ART_TEST_FAILED,$$@))
+	$(hide) rm -f /tmp/$$@-$$$$PPID
+
+  ART_TEST_TARGET_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_TARGET_SUFFIX)_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_RULES += valgrind-$$(gtest_rule)
+  ART_TEST_TARGET_VALGRIND_GTEST_$(1)_RULES += valgrind-$$(gtest_rule)
+
   # Clear locally defined variables.
+  valgrind_gtest_rule :=
   gtest_rule :=
 endef  # define-art-gtest-rule-target
 
@@ -591,6 +639,7 @@
     endif
 
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
     ifdef TARGET_2ND_ARCH
       $$(eval $$(call define-art-gtest-rule-target,$$(art_gtest_name),2ND_,$$(2nd_library_path)))
     endif
@@ -601,8 +650,13 @@
 test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES)
 	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
 
+.PHONY: valgrind-test-art-target-gtest-$$(art_gtest_name)
+valgrind-test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES)
+	$$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
+
     # Clear locally defined variables.
     ART_TEST_TARGET_GTEST_$$(art_gtest_name)_RULES :=
+    ART_TEST_TARGET_VALGRIND_GTEST_$$(art_gtest_name)_RULES :=
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
@@ -685,9 +739,6 @@
 
   rule_name := $(3)test-art-$(1)-gtest$(4)
   ifeq ($(3),valgrind-)
-    ifneq ($(1),host)
-      $$(error valgrind tests only wired up for the host)
-    endif
     dependencies := $$(ART_TEST_$(2)_VALGRIND_GTEST$(4)_RULES)
   else
     dependencies := $$(ART_TEST_$(2)_GTEST$(4)_RULES)
@@ -703,9 +754,12 @@
 endef  # define-test-art-gtest-combination
 
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,))
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(ART_PHONY_TEST_TARGET_SUFFIX)))
 ifdef TARGET_2ND_ARCH
 $(eval $(call define-test-art-gtest-combination,target,TARGET,,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
+$(eval $(call define-test-art-gtest-combination,target,TARGET,valgrind-,$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)))
 endif
 $(eval $(call define-test-art-gtest-combination,host,HOST,,))
 $(eval $(call define-test-art-gtest-combination,host,HOST,valgrind-,))
@@ -737,6 +791,9 @@
 ART_TEST_TARGET_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
 ART_TEST_TARGET_GTEST_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)_RULES :=
+ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_GTEST_TARGET_ANDROID_ROOT :=
 ART_GTEST_class_linker_test_DEX_DEPS :=
 ART_GTEST_compiler_driver_test_DEX_DEPS :=
@@ -755,6 +812,7 @@
 ART_GTEST_stub_test_DEX_DEPS :=
 ART_GTEST_transaction_test_DEX_DEPS :=
 ART_VALGRIND_DEPENDENCIES :=
+ART_VALGRIND_TARGET_DEPENDENCIES :=
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_TARGET_GTEST_$(dir)_DEX :=))
 $(foreach dir,$(GTEST_DEX_DIRECTORIES), $(eval ART_TEST_HOST_GTEST_$(dir)_DEX :=))
 ART_TEST_HOST_GTEST_MainStripped_DEX :=
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4a98342..951b075 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -606,13 +606,13 @@
     INTRINSIC(SunMiscUnsafe, Get ## type, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Get ## type ## Volatile, ObjectJ_ ## code, kIntrinsicUnsafeGet, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, Put ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
               type_flags), \
     INTRINSIC(SunMiscUnsafe, Put ## type ## Volatile, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsVolatile), \
+              (type_flags) | kIntrinsicFlagIsVolatile), \
     INTRINSIC(SunMiscUnsafe, PutOrdered ## type, ObjectJ ## code ## _V, kIntrinsicUnsafePut, \
-              type_flags | kIntrinsicFlagIsOrdered)
+              (type_flags) | kIntrinsicFlagIsOrdered)
 
     UNSAFE_GET_PUT(Int, I, kIntrinsicFlagNone),
     UNSAFE_GET_PUT(Long, J, kIntrinsicFlagIsLong),
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 606302b..d87762d 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -80,15 +80,6 @@
   return (it != verified_methods_.end()) ? it->second : nullptr;
 }
 
-void VerificationResults::RemoveVerifiedMethod(MethodReference ref) {
-  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
-  auto it = verified_methods_.find(ref);
-  if (it != verified_methods_.end()) {
-    delete it->second;
-    verified_methods_.erase(it);
-  }
-}
-
 void VerificationResults::AddRejectedClass(ClassReference ref) {
   {
     WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
@@ -104,7 +95,7 @@
 
 bool VerificationResults::IsCandidateForCompilation(MethodReference&,
                                                     const uint32_t access_flags) {
-  if (!compiler_options_->IsCompilationEnabled()) {
+  if (!compiler_options_->IsBytecodeCompilationEnabled()) {
     return false;
   }
   // Don't compile class initializers unless kEverything.
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index da80bf0..1af11a8 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -48,7 +48,6 @@
 
     const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
         REQUIRES(!verified_methods_lock_);
-    void RemoveVerifiedMethod(MethodReference ref) REQUIRES(!verified_methods_lock_);
 
     void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_);
     bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_);
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 3cb63e7..94f5acc 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -390,9 +390,8 @@
           *devirt_target->dex_file, devirt_target->dex_method_index, dex_cache, class_loader,
           nullptr, kVirtual);
     } else {
-      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
-          *devirt_target->dex_file,
-          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
+      auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(*devirt_target->dex_file,
+                                                                       class_loader.Get())));
       called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 1ab1d31..a4b4889 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -26,6 +26,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/bit_vector.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -66,6 +67,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
+#include "utils/array_ref.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "utils/swap_space.h"
 #include "verifier/method_verifier.h"
@@ -333,6 +335,24 @@
   DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats);
 };
 
+class CompilerDriver::DexFileMethodSet {
+ public:
+  explicit DexFileMethodSet(const DexFile& dex_file)
+    : dex_file_(dex_file),
+      method_indexes_(dex_file.NumMethodIds(), false, Allocator::GetMallocAllocator()) {
+  }
+  DexFileMethodSet(DexFileMethodSet&& other) = default;
+
+  const DexFile& GetDexFile() const { return dex_file_; }
+
+  BitVector& GetMethodIndexes() { return method_indexes_; }
+  const BitVector& GetMethodIndexes() const { return method_indexes_; }
+
+ private:
+  const DexFile& dex_file_;
+  BitVector method_indexes_;
+};
+
 CompilerDriver::CompilerDriver(
     const CompilerOptions* compiler_options,
     VerificationResults* verification_results,
@@ -379,7 +399,10 @@
       dex_files_for_oat_file_(nullptr),
       compiled_method_storage_(swap_fd),
       profile_compilation_info_(profile_compilation_info),
-      max_arena_alloc_(0) {
+      max_arena_alloc_(0),
+      dex_to_dex_references_lock_("dex-to-dex references lock"),
+      dex_to_dex_references_(),
+      current_dex_to_dex_methods_(nullptr) {
   DCHECK(compiler_options_ != nullptr);
   DCHECK(method_inliner_map_ != nullptr);
 
@@ -552,9 +575,31 @@
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
   MethodReference method_ref(&dex_file, method_idx);
 
-  if ((access_flags & kAccNative) != 0) {
-    // Are we interpreting only and have support for generic JNI down calls?
-    if (!driver->GetCompilerOptions().IsCompilationEnabled() &&
+  if (driver->GetCurrentDexToDexMethods() != nullptr) {
+    // This is the second pass when we dex-to-dex compile previously marked methods.
+    // TODO: Refactor the compilation to avoid having to distinguish the two passes
+    // here. That should be done on a higher level. http://b/29089975
+    if (driver->GetCurrentDexToDexMethods()->IsBitSet(method_idx)) {
+      const VerifiedMethod* verified_method =
+          driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
+      // Do not optimize if a VerifiedMethod is missing. SafeCast elision,
+      // for example, relies on it.
+      compiled_method = optimizer::ArtCompileDEX(
+          driver,
+          code_item,
+          access_flags,
+          invoke_type,
+          class_def_idx,
+          method_idx,
+          class_loader,
+          dex_file,
+          (verified_method != nullptr)
+              ? dex_to_dex_compilation_level
+              : optimizer::DexToDexCompilationLevel::kRequired);
+    }
+  } else if ((access_flags & kAccNative) != 0) {
+    // Are we extracting only and have support for generic JNI down calls?
+    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
         InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
       // Leaving this empty will trigger the generic JNI version
     } else {
@@ -588,21 +633,9 @@
     }
     if (compiled_method == nullptr &&
         dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
+      DCHECK(!Runtime::Current()->UseJitCompilation());
       // TODO: add a command-line option to disable DEX-to-DEX compilation ?
-      // Do not optimize if a VerifiedMethod is missing. SafeCast elision, for example, relies on
-      // it.
-      compiled_method = optimizer::ArtCompileDEX(
-          driver,
-          code_item,
-          access_flags,
-          invoke_type,
-          class_def_idx,
-          method_idx,
-          class_loader,
-          dex_file,
-          (verified_method != nullptr)
-              ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kRequired);
+      driver->MarkForDexToDexCompilation(self, method_ref);
     }
   }
   if (kTimeCompileMethod) {
@@ -628,12 +661,6 @@
     driver->AddCompiledMethod(method_ref, compiled_method, non_relative_linker_patch_count);
   }
 
-  // Done compiling, delete the verified method to reduce native memory usage. Do not delete in
-  // optimizing compiler, which may need the verified method again for inlining.
-  if (driver->GetCompilerKind() != Compiler::kOptimizing) {
-    driver->GetVerificationResults()->RemoveVerifiedMethod(method_ref);
-  }
-
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
     LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
@@ -680,6 +707,7 @@
                                   *dex_file,
                                   dex_file->GetClassDef(class_def_idx));
 
+  DCHECK(current_dex_to_dex_methods_ == nullptr);
   CompileMethod(self,
                 this,
                 code_item,
@@ -693,6 +721,34 @@
                 true,
                 dex_cache);
 
+  ArrayRef<DexFileMethodSet> dex_to_dex_references;
+  {
+    // From this point on, we shall not modify dex_to_dex_references_, so
+    // just grab a reference to it that we use without holding the mutex.
+    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
+    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  }
+  if (!dex_to_dex_references.empty()) {
+    DCHECK_EQ(dex_to_dex_references.size(), 1u);
+    DCHECK(&dex_to_dex_references[0].GetDexFile() == dex_file);
+    current_dex_to_dex_methods_ = &dex_to_dex_references.front().GetMethodIndexes();
+    DCHECK(current_dex_to_dex_methods_->IsBitSet(method_idx));
+    DCHECK_EQ(current_dex_to_dex_methods_->NumSetBits(), 1u);
+    CompileMethod(self,
+                  this,
+                  code_item,
+                  access_flags,
+                  invoke_type,
+                  class_def_idx,
+                  method_idx,
+                  jclass_loader,
+                  *dex_file,
+                  dex_to_dex_compilation_level,
+                  true,
+                  dex_cache);
+    current_dex_to_dex_methods_ = nullptr;
+  }
+
   FreeThreadPools();
 
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
@@ -1057,9 +1113,8 @@
       uint16_t exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
       StackHandleScope<2> hs2(self);
-      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(
-          *dex_file,
-          Runtime::Current()->GetLinearAlloc())));
+      Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file,
+                                                                                     nullptr)));
       Handle<mirror::Class> klass(hs2.NewHandle(
           class_linker->ResolveType(*dex_file,
                                     exception_type_idx,
@@ -1285,6 +1340,17 @@
   return IsImageClass(descriptor);
 }
 
+void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref) {
+  MutexLock lock(self, dex_to_dex_references_lock_);
+  // Since we're compiling one dex file at a time, we need to look for the
+  // current dex file entry only at the end of dex_to_dex_references_.
+  if (dex_to_dex_references_.empty() ||
+      &dex_to_dex_references_.back().GetDexFile() != method_ref.dex_file) {
+    dex_to_dex_references_.emplace_back(*method_ref.dex_file);
+  }
+  dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index);
+}
+
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
                                                       uint32_t type_idx) {
   bool result = false;
@@ -2089,7 +2155,7 @@
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(manager_->GetClassLoader())));
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
         dex_file,
-        class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
+        class_loader.Get())));
     mirror::Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
 
     if (klass == nullptr) {
@@ -2496,8 +2562,9 @@
             ? "null"
             : profile_compilation_info_->DumpInfo(&dex_files));
   }
-  for (size_t i = 0; i != dex_files.size(); ++i) {
-    const DexFile* dex_file = dex_files[i];
+
+  DCHECK(current_dex_to_dex_methods_ == nullptr);
+  for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader,
                    *dex_file,
@@ -2510,6 +2577,25 @@
     max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_);
     Runtime::Current()->ReclaimArenaPoolMemory();
   }
+
+  ArrayRef<DexFileMethodSet> dex_to_dex_references;
+  {
+    // From this point on, we shall not modify dex_to_dex_references_, so
+    // just grab a reference to it that we use without holding the mutex.
+    MutexLock lock(Thread::Current(), dex_to_dex_references_lock_);
+    dex_to_dex_references = ArrayRef<DexFileMethodSet>(dex_to_dex_references_);
+  }
+  for (const auto& method_set : dex_to_dex_references) {
+    current_dex_to_dex_methods_ = &method_set.GetMethodIndexes();
+    CompileDexFile(class_loader,
+                   method_set.GetDexFile(),
+                   dex_files,
+                   parallel_thread_pool_.get(),
+                   parallel_thread_count_,
+                   timings);
+  }
+  current_dex_to_dex_methods_ = nullptr;
+
   VLOG(compiler) << "Compile: " << GetMemoryUsageString(false);
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 19a1ecc..2dd4651 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -52,6 +52,7 @@
 class MethodVerifier;
 }  // namespace verifier
 
+class BitVector;
 class CompiledClass;
 class CompiledMethod;
 class CompilerOptions;
@@ -120,12 +121,12 @@
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
                   TimingLogger* timings)
-      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
+      REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_);
+      REQUIRES(!compiled_methods_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
   VerificationResults* GetVerificationResults() const {
     DCHECK(Runtime::Current()->IsAotCompiler());
@@ -475,6 +476,13 @@
     return true;
   }
 
+  void MarkForDexToDexCompilation(Thread* self, const MethodReference& method_ref)
+      REQUIRES(!dex_to_dex_references_lock_);
+
+  const BitVector* GetCurrentDexToDexMethods() const {
+    return current_dex_to_dex_methods_;
+  }
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -601,7 +609,7 @@
 
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
-               TimingLogger* timings);
+               TimingLogger* timings) REQUIRES(!dex_to_dex_references_lock_);
   void CompileDexFile(jobject class_loader,
                       const DexFile& dex_file,
                       const std::vector<const DexFile*>& dex_files,
@@ -702,6 +710,16 @@
   const ProfileCompilationInfo* const profile_compilation_info_;
 
   size_t max_arena_alloc_;
+
+  // Data for delaying dex-to-dex compilation.
+  Mutex dex_to_dex_references_lock_;
+  // In the first phase, dex_to_dex_references_ collects methods for dex-to-dex compilation.
+  class DexFileMethodSet;
+  std::vector<DexFileMethodSet> dex_to_dex_references_ GUARDED_BY(dex_to_dex_references_lock_);
+  // In the second phase, current_dex_to_dex_methods_ points to the BitVector with method
+  // indexes for dex-to-dex compilation in the current dex file.
+  const BitVector* current_dex_to_dex_methods_;
+
   friend class CompileClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6bbd3c5..60b700a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -88,8 +88,12 @@
     return compiler_filter_ == CompilerFilter::kVerifyAtRuntime;
   }
 
-  bool IsCompilationEnabled() const {
-    return CompilerFilter::IsCompilationEnabled(compiler_filter_);
+  bool IsBytecodeCompilationEnabled() const {
+    return CompilerFilter::IsBytecodeCompilationEnabled(compiler_filter_);
+  }
+
+  bool IsJniCompilationEnabled() const {
+    return CompilerFilter::IsJniCompilationEnabled(compiler_filter_);
   }
 
   bool IsVerificationEnabled() const {
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index be720ad..da10568 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -437,6 +437,9 @@
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
+    CHECK(dex_file_oat_index_map_.find(dex_file) != dex_file_oat_index_map_.end())
+        << "Dex cache should have been pruned " << dex_file->GetLocation()
+        << "; possibly in class path";
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
     size_t oat_index = GetOatIndexForDexCache(dex_cache);
@@ -839,6 +842,10 @@
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
 
+  // Clear class table strong roots so that dex caches can get pruned. We require pruning the class
+  // path dex caches.
+  class_linker->ClearClassTableStrongRoots();
+
   // Make a list of classes we would like to prune.
   NonImageClassesVisitor visitor(this);
   class_linker->VisitClasses(&visitor);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 5b19284..6d1f944 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -199,7 +199,7 @@
     for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
       dex_files.push_back(dex_file.get());
       ScopedObjectAccess soa(Thread::Current());
-      class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+      class_linker->RegisterDexFile(*dex_file, nullptr);
     }
     linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(),
                                             instruction_set_features_.get());
@@ -448,23 +448,23 @@
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
-    InstructionSet insn_set = kX86;
-    std::string error_msg;
-    std::unique_ptr<const InstructionSetFeatures> insn_features(
-        InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-    ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-    std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
-                                                            insn_features.get(),
-                                                            0u,
-                                                            nullptr));
-    ASSERT_NE(oat_header.get(), nullptr);
-    ASSERT_TRUE(oat_header->IsValid());
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  ASSERT_NE(oat_header.get(), nullptr);
+  ASSERT_TRUE(oat_header->IsValid());
 
-    char* magic = const_cast<char*>(oat_header->GetMagic());
-    strcpy(magic, "");  // bad magic
-    ASSERT_FALSE(oat_header->IsValid());
-    strcpy(magic, "oat\n000");  // bad version
-    ASSERT_FALSE(oat_header->IsValid());
+  char* magic = const_cast<char*>(oat_header->GetMagic());
+  strcpy(magic, "");  // bad magic
+  ASSERT_FALSE(oat_header->IsValid());
+  strcpy(magic, "oat\n000");  // bad version
+  ASSERT_FALSE(oat_header->IsValid());
 }
 
 TEST_F(OatTest, EmptyTextSection) {
@@ -491,10 +491,7 @@
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   for (const DexFile* dex_file : dex_files) {
     ScopedObjectAccess soa(Thread::Current());
-    class_linker->RegisterDexFile(
-        *dex_file,
-        class_linker->GetOrCreateAllocatorForClassLoader(
-            soa.Decode<mirror::ClassLoader*>(class_loader)));
+    class_linker->RegisterDexFile(*dex_file, soa.Decode<mirror::ClassLoader*>(class_loader));
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
   compiler_driver_->CompileAll(class_loader, dex_files, &timings);
@@ -766,4 +763,28 @@
   TestZipFileInput(true);
 }
 
+TEST_F(OatTest, UpdateChecksum) {
+  InstructionSet insn_set = kX86;
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> insn_features(
+    InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
+  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
+  std::unique_ptr<OatHeader> oat_header(OatHeader::Create(insn_set,
+                                                          insn_features.get(),
+                                                          0u,
+                                                          nullptr));
+  // The starting adler32 value is 1.
+  EXPECT_EQ(1U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  // Make sure that null data does not reset the checksum.
+  oat_header->UpdateChecksum(nullptr, 0);
+  EXPECT_EQ(64291151U, oat_header->GetChecksum());
+
+  oat_header->UpdateChecksum(OatHeader::kOatMagic, sizeof(OatHeader::kOatMagic));
+  EXPECT_EQ(216138397U, oat_header->GetChecksum());
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 8da9f06..4232002 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -63,6 +63,29 @@
     return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
 }
 
+class ChecksumUpdatingOutputStream : public OutputStream {
+ public:
+  ChecksumUpdatingOutputStream(OutputStream* out, OatHeader* oat_header)
+      : OutputStream(out->GetLocation()), out_(out), oat_header_(oat_header) { }
+
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    oat_header_->UpdateChecksum(buffer, byte_count);
+    return out_->WriteFully(buffer, byte_count);
+  }
+
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    return out_->Seek(offset, whence);
+  }
+
+  bool Flush() OVERRIDE {
+    return out_->Flush();
+  }
+
+ private:
+  OutputStream* const out_;
+  OatHeader* const oat_header_;
+};
+
 }  // anonymous namespace
 
 // Defines the location of the raw dex file to write.
@@ -422,13 +445,21 @@
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
     oat_dex_file.ReserveClassOffsets(this);
   }
-  if (!WriteOatDexFiles(rodata) ||
+  ChecksumUpdatingOutputStream checksum_updating_rodata(rodata, oat_header_.get());
+  if (!WriteOatDexFiles(&checksum_updating_rodata) ||
       !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
       !OpenDexFiles(file, verify, &dex_files_map, &dex_files) ||
       !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
     return false;
   }
 
+  // Do a bulk checksum update for Dex[] and TypeLookupTable[]. Doing it piece by
+  // piece would be difficult because we're not using the OutpuStream directly.
+  if (!oat_dex_files_.empty()) {
+    size_t size = size_after_type_lookup_tables - oat_dex_files_[0].dex_file_offset_;
+    oat_header_->UpdateChecksum(dex_files_map->Begin(), size);
+  }
+
   *opened_dex_files_map = std::move(dex_files_map);
   *opened_dex_files = std::move(dex_files);
   write_state_ = WriteState::kPrepareLayout;
@@ -996,7 +1027,7 @@
             << PrettyMethod(it.GetMemberIndex(), *dex_file_);
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
-        if (!writer_->WriteData(out, &method_header, sizeof(method_header))) {
+        if (!out->WriteFully(&method_header, sizeof(method_header))) {
           ReportWriteFailure("method header", it);
           return false;
         }
@@ -1063,7 +1094,7 @@
           }
         }
 
-        if (!writer_->WriteData(out, quick_code.data(), code_size)) {
+        if (!out->WriteFully(quick_code.data(), code_size)) {
           ReportWriteFailure("method code", it);
           return false;
         }
@@ -1279,7 +1310,7 @@
         size_t map_size = map.size() * sizeof(map[0]);
         if (map_offset == offset_) {
           // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
-          if (UNLIKELY(!writer_->WriteData(out, map.data(), map_size))) {
+          if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
             ReportWriteFailure(it);
             return false;
           }
@@ -1413,8 +1444,8 @@
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field = compiler_driver_->Create ## fn_name(); \
-      offset += field->size();
+      (field) = compiler_driver_->Create ## fn_name(); \
+      offset += (field)->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
@@ -1457,6 +1488,10 @@
 bool OatWriter::WriteRodata(OutputStream* out) {
   CHECK(write_state_ == WriteState::kWriteRoData);
 
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
   if (!WriteClassOffsets(out)) {
     LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
@@ -1499,6 +1534,10 @@
 bool OatWriter::WriteCode(OutputStream* out) {
   CHECK(write_state_ == WriteState::kWriteText);
 
+  // Wrap out to update checksum with each write.
+  ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
+  out = &checksum_updating_out;
+
   SetMultiOatRelativePatcherAdjustment();
 
   const size_t file_offset = oat_data_offset_;
@@ -1526,8 +1565,8 @@
   if (kIsDebugBuild) {
     uint32_t size_total = 0;
     #define DO_STAT(x) \
-      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << x << "B)"; \
-      size_total += x;
+      VLOG(compiler) << #x "=" << PrettySize(x) << " (" << (x) << "B)"; \
+      size_total += (x);
 
     DO_STAT(size_dex_file_alignment_);
     DO_STAT(size_executable_offset_alignment_);
@@ -1683,12 +1722,12 @@
         uint32_t alignment_padding = aligned_offset - relative_offset; \
         out->Seek(alignment_padding, kSeekCurrent); \
         size_trampoline_alignment_ += alignment_padding; \
-        if (!WriteData(out, field->data(), field->size())) { \
+        if (!out->WriteFully((field)->data(), (field)->size())) { \
           PLOG(ERROR) << "Failed to write " # field " to " << out->GetLocation(); \
           return false; \
         } \
-        size_ ## field += field->size(); \
-        relative_offset += alignment_padding + field->size(); \
+        size_ ## field += (field)->size(); \
+        relative_offset += alignment_padding + (field)->size(); \
         DCHECK_OFFSET(); \
       } while (false)
 
@@ -2200,11 +2239,6 @@
   return true;
 }
 
-bool OatWriter::WriteData(OutputStream* out, const void* data, size_t size) {
-  oat_header_->UpdateChecksum(data, size);
-  return out->WriteFully(data, size);
-}
-
 void OatWriter::SetMultiOatRelativePatcherAdjustment() {
   DCHECK(dex_files_ != nullptr);
   DCHECK(relative_patcher_ != nullptr);
@@ -2274,39 +2308,37 @@
   const size_t file_offset = oat_writer->oat_data_offset_;
   DCHECK_OFFSET_();
 
-  if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
+  if (!out->WriteFully(&dex_file_location_size_, sizeof(dex_file_location_size_))) {
     PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
 
-  if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
+  if (!out->WriteFully(dex_file_location_data_, dex_file_location_size_)) {
     PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
 
-  if (!oat_writer->WriteData(out,
-                             &dex_file_location_checksum_,
-                             sizeof(dex_file_location_checksum_))) {
+  if (!out->WriteFully(&dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) {
     PLOG(ERROR) << "Failed to write dex file location checksum to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
 
-  if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
+  if (!out->WriteFully(&dex_file_offset_, sizeof(dex_file_offset_))) {
     PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
 
-  if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+  if (!out->WriteFully(&class_offsets_offset_, sizeof(class_offsets_offset_))) {
     PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
 
-  if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
+  if (!out->WriteFully(&lookup_table_offset_, sizeof(lookup_table_offset_))) {
     PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
     return false;
   }
@@ -2316,7 +2348,7 @@
 }
 
 bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
-  if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
+  if (!out->WriteFully(class_offsets_.data(), GetClassOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
                 << " to " << out->GetLocation();
     return false;
@@ -2405,13 +2437,13 @@
                                 OutputStream* out,
                                 const size_t file_offset) const {
   DCHECK_OFFSET_();
-  if (!oat_writer->WriteData(out, &status_, sizeof(status_))) {
+  if (!out->WriteFully(&status_, sizeof(status_))) {
     PLOG(ERROR) << "Failed to write class status to " << out->GetLocation();
     return false;
   }
   oat_writer->size_oat_class_status_ += sizeof(status_);
 
-  if (!oat_writer->WriteData(out, &type_, sizeof(type_))) {
+  if (!out->WriteFully(&type_, sizeof(type_))) {
     PLOG(ERROR) << "Failed to write oat class type to " << out->GetLocation();
     return false;
   }
@@ -2419,20 +2451,20 @@
 
   if (method_bitmap_size_ != 0) {
     CHECK_EQ(kOatClassSomeCompiled, type_);
-    if (!oat_writer->WriteData(out, &method_bitmap_size_, sizeof(method_bitmap_size_))) {
+    if (!out->WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) {
       PLOG(ERROR) << "Failed to write method bitmap size to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_);
 
-    if (!oat_writer->WriteData(out, method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
+    if (!out->WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) {
       PLOG(ERROR) << "Failed to write method bitmap to " << out->GetLocation();
       return false;
     }
     oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_;
   }
 
-  if (!oat_writer->WriteData(out, method_offsets_.data(), GetMethodOffsetsRawSize())) {
+  if (!out->WriteFully(method_offsets_.data(), GetMethodOffsetsRawSize())) {
     PLOG(ERROR) << "Failed to write method offsets to " << out->GetLocation();
     return false;
   }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 3862798..cc81f39 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -271,7 +271,6 @@
   bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
-  bool WriteData(OutputStream* out, const void* data, size_t size);
   void SetMultiOatRelativePatcherAdjustment();
 
   enum class WriteState {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 6c6e5af..e9fcfe2 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -912,14 +912,15 @@
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
     DCHECK(phi->IsLoopHeaderPhi());
+    auto&& inputs = phi->GetInputs();
     // Start with input 1. Input 0 is from the incoming block.
-    HInstruction* input1 = phi->InputAt(1);
+    HInstruction* input1 = inputs[1];
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
         *phi->GetBlock()->GetPredecessors()[1]));
-    for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+    for (size_t i = 2; i < inputs.size(); ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
           *phi->GetBlock()->GetPredecessors()[i]));
-      if (input1 != phi->InputAt(i)) {
+      if (input1 != inputs[i]) {
         return false;
       }
     }
@@ -1169,7 +1170,11 @@
           loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
         SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
         if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
-          HoistToPreHeaderOrDeoptBlock(loop, array_get);
+          // We can hoist ArrayGet only if its execution is guaranteed on every iteration.
+          // In other words only if array_get_bb dominates all back branches.
+          if (loop->DominatesAllBackEdges(array_get->GetBlock())) {
+            HoistToPreHeaderOrDeoptBlock(loop, array_get);
+          }
         }
       }
     }
@@ -1394,13 +1399,7 @@
       }
       // Does the current basic block dominate all back edges? If not,
       // don't apply dynamic bce to something that may not be executed.
-      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
-        if (!block->Dominates(back_edge)) {
-          return false;
-        }
-      }
-      // Success!
-      return true;
+      return loop->DominatesAllBackEdges(block);
     }
     return false;
   }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 08670a0..6e851bf 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -111,10 +111,10 @@
         << " " << locations->Out();
   }
 
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    DCHECK(CheckType(instruction->InputAt(i)->GetType(), locations->InAt(i)))
-      << instruction->InputAt(i)->GetType()
-      << " " << locations->InAt(i);
+  auto&& inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i)))
+      << inputs[i]->GetType() << " " << locations->InAt(i);
   }
 
   HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e010662..6e74d08 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,8 @@
 
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
-#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
 
 class NullCheckSlowPathARM : public SlowPathCode {
@@ -674,7 +675,8 @@
 };
 
 #undef __
-#define __ down_cast<ArmAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
 
 inline Condition ARMCondition(IfCondition cond) {
   switch (cond) {
@@ -3695,7 +3697,7 @@
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 261c04f..5560ae2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -132,7 +132,8 @@
   return ARM64ReturnLocation(return_type);
 }
 
-#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
 
 // Calculate memory accessing operand for save/restore live registers.
@@ -4399,7 +4400,7 @@
 
 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index fb50680..ed0767e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -141,7 +141,8 @@
   return MipsReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
@@ -478,7 +479,8 @@
 }
 
 #undef __
-#define __ down_cast<MipsAssembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, x).Int32Value()
 
 void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
@@ -663,6 +665,18 @@
   }
 }
 
+void CodeGeneratorMIPS::ComputeSpillMask() {
+  core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+  fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
+  DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+  // If there're FPU callee-saved registers and there's an odd number of GPR callee-saved
+  // registers, include the ZERO register to force alignment of FPU callee-saved registers
+  // within the stack frame.
+  if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) {
+    core_spill_mask_ |= (1 << ZERO);
+  }
+}
+
 static dwarf::Reg DWARFReg(Register reg) {
   return dwarf::Reg::MipsCore(static_cast<int>(reg));
 }
@@ -692,105 +706,61 @@
   }
 
   // Spill callee-saved registers.
-  // Note that their cumulative size is small and they can be indexed using
-  // 16-bit offsets.
 
-  // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-  uint32_t ofs = FrameEntrySpillSize();
-  bool unaligned_float = ofs & 0x7;
-  bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
+  uint32_t ofs = GetFrameSize();
   __ IncreaseFrameSize(ofs);
 
-  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
-    Register reg = kCoreCalleeSaves[i];
-    if (allocated_registers_.ContainsCoreRegister(reg)) {
-      ofs -= kMipsWordSize;
-      __ Sw(reg, SP, ofs);
+  for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+    Register reg = static_cast<Register>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsWordSize;
+    // The ZERO register is only included for alignment.
+    if (reg != ZERO) {
+      __ StoreToOffset(kStoreWord, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
-  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
-    FRegister reg = kFpuCalleeSaves[i];
-    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-      ofs -= kMipsDoublewordSize;
-      // TODO: Change the frame to avoid unaligned accesses for fpu registers.
-      if (unaligned_float) {
-        if (fpu_32bit) {
-          __ Swc1(reg, SP, ofs);
-          __ Swc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-        } else {
-          __ Mfhc1(TMP, reg);
-          __ Swc1(reg, SP, ofs);
-          __ Sw(TMP, SP, ofs + 4);
-        }
-      } else {
-        __ Sdc1(reg, SP, ofs);
-      }
-      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
-    }
+  for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+    FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+    mask ^= 1u << reg;
+    ofs -= kMipsDoublewordSize;
+    __ StoreDToOffset(reg, SP, ofs);
+    // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
   }
 
-  // Allocate the rest of the frame and store the current method pointer
-  // at its end.
-
-  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
-  static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                "kCurrentMethodStackOffset must fit into int16_t");
-  __ Sw(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  // Store the current method pointer.
+  __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
 }
 
 void CodeGeneratorMIPS::GenerateFrameExit() {
   __ cfi().RememberState();
 
   if (!HasEmptyFrame()) {
-    // Deallocate the rest of the frame.
-
-    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
     // Restore callee-saved registers.
-    // Note that their cumulative size is small and they can be indexed using
-    // 16-bit offsets.
 
-    // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-    uint32_t ofs = 0;
-    bool unaligned_float = FrameEntrySpillSize() & 0x7;
-    bool fpu_32bit = isa_features_.Is32BitFloatingPoint();
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      FRegister reg = kFpuCalleeSaves[i];
-      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-        if (unaligned_float) {
-          if (fpu_32bit) {
-            __ Lwc1(reg, SP, ofs);
-            __ Lwc1(static_cast<FRegister>(reg + 1), SP, ofs + 4);
-          } else {
-            __ Lwc1(reg, SP, ofs);
-            __ Lw(TMP, SP, ofs + 4);
-            __ Mthc1(TMP, reg);
-          }
-        } else {
-          __ Ldc1(reg, SP, ofs);
-        }
-        ofs += kMipsDoublewordSize;
-        // TODO: __ cfi().Restore(DWARFReg(reg));
-      }
-    }
-
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
-      Register reg = kCoreCalleeSaves[i];
-      if (allocated_registers_.ContainsCoreRegister(reg)) {
-        __ Lw(reg, SP, ofs);
-        ofs += kMipsWordSize;
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t ofs = GetFrameSize();
+    for (uint32_t mask = core_spill_mask_; mask != 0; ) {
+      Register reg = static_cast<Register>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsWordSize;
+      // The ZERO register is only included for alignment.
+      if (reg != ZERO) {
+        __ LoadFromOffset(kLoadWord, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    DCHECK_EQ(ofs, FrameEntrySpillSize());
-    __ DecreaseFrameSize(ofs);
+    for (uint32_t mask = fpu_spill_mask_; mask != 0; ) {
+      FRegister reg = static_cast<FRegister>(MostSignificantBit(mask));
+      mask ^= 1u << reg;
+      ofs -= kMipsDoublewordSize;
+      __ LoadDFromOffset(reg, SP, ofs);
+      // TODO: __ cfi().Restore(DWARFReg(reg));
+    }
+
+    __ DecreaseFrameSize(GetFrameSize());
   }
 
   __ Jr(RA);
@@ -4437,7 +4407,7 @@
 
 void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 435a869..8c0bae6 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -262,6 +262,7 @@
                     OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGeneratorMIPS() {}
 
+  void ComputeSpillMask() OVERRIDE;
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index e67d8d0..8c73e35 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -102,7 +102,8 @@
   return Mips64ReturnLocation(type);
 }
 
-#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
@@ -424,7 +425,8 @@
 }
 
 #undef __
-#define __ down_cast<Mips64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
@@ -3592,7 +3594,7 @@
 
 void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index bdbafcd..8c643a0 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,7 +47,8 @@
 
 static constexpr int kFakeReturnRegister = Register(8);
 
-#define __ down_cast<X86Assembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86WordSize, x).Int32Value()
 
 class NullCheckSlowPathX86 : public SlowPathCode {
@@ -691,7 +692,8 @@
 };
 
 #undef __
-#define __ down_cast<X86Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */
 
 inline Condition X86Condition(IfCondition cond) {
   switch (cond) {
@@ -4227,7 +4229,7 @@
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 30eca2c..72de3e6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,7 +51,8 @@
 
 static constexpr int kC2ConditionMask = 0x400;
 
-#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
 
 class NullCheckSlowPathX86_64 : public SlowPathCode {
@@ -710,7 +711,8 @@
 };
 
 #undef __
-#define __ down_cast<X86_64Assembler*>(GetAssembler())->
+// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
 
 inline Condition X86_64IntegerCondition(IfCondition cond) {
   switch (cond) {
@@ -4027,7 +4029,7 @@
 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
     locations->SetInAt(i, Location::Any());
   }
   locations->SetOut(Location::Any());
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 968e267..2bd2403 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -335,9 +335,7 @@
   }
 
   // Ensure the inputs of `instruction` are defined in a block of the graph.
-  for (HInputIterator input_it(instruction); !input_it.Done();
-       input_it.Advance()) {
-    HInstruction* input = input_it.Current();
+  for (HInstruction* input : instruction->GetInputs()) {
     const HInstructionList& list = input->IsPhi()
         ? input->GetBlock()->GetPhis()
         : input->GetBlock()->GetInstructions();
@@ -364,7 +362,8 @@
                             instruction->GetId()));
     }
     size_t use_index = use.GetIndex();
-    if ((use_index >= user->InputCount()) || (user->InputAt(use_index) != instruction)) {
+    auto&& user_inputs = user->GetInputs();
+    if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) {
       AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
                             user->DebugName(),
@@ -387,8 +386,9 @@
   }
 
   // Ensure 'instruction' has pointers to its inputs' use entries.
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
+  auto&& input_records = instruction->GetInputRecords();
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    const HUserRecord<HInstruction*>& input_record = input_records[i];
     HInstruction* input = input_record.GetInstruction();
     if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
         (input_record.GetUseNode() == input->GetUses().end()) ||
@@ -490,8 +490,7 @@
   VisitInstruction(invoke);
 
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HInstruction* last_input = invoke->InputAt(last_input_index);
+    HInstruction* last_input = invoke->GetInputs().back();
     if (last_input == nullptr) {
       AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
                             "has a null pointer as last input.",
@@ -673,16 +672,21 @@
 
 static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) {
   if (insn1->IsPhi() &&
-      insn1->AsPhi()->IsVRegEquivalentOf(insn2) &&
-      insn1->InputCount() == insn2->InputCount()) {
+      insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+    auto&& insn1_inputs = insn1->GetInputs();
+    auto&& insn2_inputs = insn2->GetInputs();
+    if (insn1_inputs.size() != insn2_inputs.size()) {
+      return false;
+    }
+
     // Testing only one of the two inputs for recursion is sufficient.
     if (visited->IsBitSet(insn1->GetId())) {
       return true;
     }
     visited->SetBit(insn1->GetId());
 
-    for (size_t i = 0, e = insn1->InputCount(); i < e; ++i) {
-      if (!IsConstantEquivalent(insn1->InputAt(i), insn2->InputAt(i), visited)) {
+    for (size_t i = 0; i < insn1_inputs.size(); ++i) {
+      if (!IsConstantEquivalent(insn1_inputs[i], insn2_inputs[i], visited)) {
         return false;
       }
     }
@@ -698,15 +702,16 @@
   VisitInstruction(phi);
 
   // Ensure the first input of a phi is not itself.
-  if (phi->InputAt(0) == phi) {
+  ArrayRef<HUserRecord<HInstruction*>> input_records = phi->GetInputRecords();
+  if (input_records[0].GetInstruction() == phi) {
     AddError(StringPrintf("Loop phi %d in block %d is its own first input.",
                           phi->GetId(),
                           phi->GetBlock()->GetBlockId()));
   }
 
   // Ensure that the inputs have the same primitive kind as the phi.
-  for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-    HInstruction* input = phi->InputAt(i);
+  for (size_t i = 0; i < input_records.size(); ++i) {
+    HInstruction* input = input_records[i].GetInstruction();
     if (Primitive::PrimitiveKind(input->GetType()) != Primitive::PrimitiveKind(phi->GetType())) {
         AddError(StringPrintf(
             "Input %d at index %zu of phi %d from block %d does not have the "
@@ -729,8 +734,7 @@
     // because we do not remove the corresponding inputs when we prove that an
     // instruction cannot throw. Instead, we at least test that all phis have the
     // same, non-zero number of inputs (b/24054676).
-    size_t input_count_this = phi->InputCount();
-    if (input_count_this == 0u) {
+    if (input_records.empty()) {
       AddError(StringPrintf("Phi %d in catch block %d has zero inputs.",
                             phi->GetId(),
                             phi->GetBlock()->GetBlockId()));
@@ -738,12 +742,12 @@
       HInstruction* next_phi = phi->GetNext();
       if (next_phi != nullptr) {
         size_t input_count_next = next_phi->InputCount();
-        if (input_count_this != input_count_next) {
+        if (input_records.size() != input_count_next) {
           AddError(StringPrintf("Phi %d in catch block %d has %zu inputs, "
                                 "but phi %d has %zu inputs.",
                                 phi->GetId(),
                                 phi->GetBlock()->GetBlockId(),
-                                input_count_this,
+                                input_records.size(),
                                 next_phi->GetId(),
                                 input_count_next));
         }
@@ -753,17 +757,17 @@
     // Ensure the number of inputs of a non-catch phi is the same as the number
     // of its predecessors.
     const ArenaVector<HBasicBlock*>& predecessors = phi->GetBlock()->GetPredecessors();
-    if (phi->InputCount() != predecessors.size()) {
+    if (input_records.size() != predecessors.size()) {
       AddError(StringPrintf(
           "Phi %d in block %d has %zu inputs, "
           "but block %d has %zu predecessors.",
-          phi->GetId(), phi->GetBlock()->GetBlockId(), phi->InputCount(),
+          phi->GetId(), phi->GetBlock()->GetBlockId(), input_records.size(),
           phi->GetBlock()->GetBlockId(), predecessors.size()));
     } else {
       // Ensure phi input at index I either comes from the Ith
       // predecessor or from a block that dominates this predecessor.
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
+      for (size_t i = 0; i < input_records.size(); ++i) {
+        HInstruction* input = input_records[i].GetInstruction();
         HBasicBlock* predecessor = predecessors[i];
         if (!(input->GetBlock() == predecessor
               || input->GetBlock()->Dominates(predecessor))) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 6aec463..3084a4f 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -497,12 +497,13 @@
 
   void PrintInstruction(HInstruction* instruction) {
     output_ << instruction->DebugName();
-    if (instruction->InputCount() > 0) {
-      StringList inputs;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-        inputs.NewEntryStream() << GetTypeId(it.Current()->GetType()) << it.Current()->GetId();
+    auto&& inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
+      StringList input_list;
+      for (const HInstruction* input : inputs) {
+        input_list.NewEntryStream() << GetTypeId(input->GetType()) << input->GetId();
       }
-      StartAttributeStream() << inputs;
+      StartAttributeStream() << input_list;
     }
     instruction->Accept(this);
     if (instruction->HasEnvironment()) {
@@ -544,12 +545,12 @@
       StartAttributeStream("liveness") << instruction->GetLifetimePosition();
       LocationSummary* locations = instruction->GetLocations();
       if (locations != nullptr) {
-        StringList inputs;
-        for (size_t i = 0; i < instruction->InputCount(); ++i) {
-          DumpLocation(inputs.NewEntryStream(), locations->InAt(i));
+        StringList input_list;
+        for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+          DumpLocation(input_list.NewEntryStream(), locations->InAt(i));
         }
         std::ostream& attr = StartAttributeStream("locations");
-        attr << inputs << "->";
+        attr << input_list << "->";
         DumpLocation(attr, locations->Out());
       }
     }
@@ -739,8 +740,8 @@
       HInstruction* instruction = it.Current();
       output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType())
               << instruction->GetId() << "[ ";
-      for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) {
-        output_ << inputs.Current()->GetId() << " ";
+      for (const HInstruction* input : instruction->GetInputs()) {
+        output_ << input->GetId() << " ";
       }
       output_ << "]\n";
     }
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index c06d19d..0a5cf80 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -152,8 +152,8 @@
 
   // Visit all descendants.
   uint32_t low = d1;
-  for (size_t i = 0, count = instruction->InputCount(); i < count; ++i) {
-    low = std::min(low, VisitDescendant(loop, instruction->InputAt(i)));
+  for (HInstruction* input : instruction->GetInputs()) {
+    low = std::min(low, VisitDescendant(loop, input));
   }
 
   // Lower or found SCC?
@@ -341,11 +341,11 @@
                                                                          HInstruction* phi,
                                                                          size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index));
-  for (size_t i = input_index + 1; i < count; i++) {
-    InductionInfo* b = LookupInfo(loop, phi->InputAt(i));
+  auto&& inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  InductionInfo* a = LookupInfo(loop, inputs[input_index]);
+  for (size_t i = input_index + 1; i < inputs.size(); i++) {
+    InductionInfo* b = LookupInfo(loop, inputs[i]);
     if (!InductionEqual(a, b)) {
       return nullptr;
     }
@@ -464,12 +464,12 @@
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi,
                                                                       size_t input_index) {
   // Match all phi inputs from input_index onwards exactly.
-  const size_t count = phi->InputCount();
-  DCHECK_LT(input_index, count);
-  auto ita = cycle_.find(phi->InputAt(input_index));
+  auto&& inputs = phi->GetInputs();
+  DCHECK_LT(input_index, inputs.size());
+  auto ita = cycle_.find(inputs[input_index]);
   if (ita != cycle_.end()) {
-    for (size_t i = input_index + 1; i < count; i++) {
-      auto itb = cycle_.find(phi->InputAt(i));
+    for (size_t i = input_index + 1; i < inputs.size(); i++) {
+      auto itb = cycle_.find(inputs[i]);
       if (itb == cycle_.end() ||
           !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) {
         return nullptr;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index aaddc01..f2286e4 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -381,10 +381,11 @@
   // If the operation requests a specific type, we make sure its input is of that type.
   if (type != value->GetType()) {
     if (Primitive::IsFloatingPointType(type)) {
-      return ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+      value = ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
     } else if (type == Primitive::kPrimNot) {
-      return ssa_builder_->GetReferenceTypeEquivalent(value);
+      value = ssa_builder_->GetReferenceTypeEquivalent(value);
     }
+    DCHECK(value != nullptr);
   }
 
   return value;
@@ -832,7 +833,8 @@
                         register_index,
                         is_range,
                         descriptor,
-                        nullptr /* clinit_check */);
+                        nullptr, /* clinit_check */
+                        true /* is_unresolved */);
   }
 
   // Potential class initialization check, in the case of a static method call.
@@ -897,7 +899,8 @@
                       register_index,
                       is_range,
                       descriptor,
-                      clinit_check);
+                      clinit_check,
+                      false /* is_unresolved */);
 }
 
 bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
@@ -1090,14 +1093,17 @@
                                        uint32_t register_index,
                                        bool is_range,
                                        const char* descriptor,
-                                       HClinitCheck* clinit_check) {
+                                       HClinitCheck* clinit_check,
+                                       bool is_unresolved) {
   DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
 
   size_t start_index = 0;
   size_t argument_index = 0;
   if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadNullCheckedLocal(is_range ? register_index : args[0],
-                                             invoke->GetDexPc());
+    uint32_t obj_reg = is_range ? register_index : args[0];
+    HInstruction* arg = is_unresolved
+        ? LoadLocal(obj_reg, Primitive::kPrimNot)
+        : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc());
     invoke->SetArgumentAt(0, arg);
     start_index = 1;
     argument_index = 1;
@@ -1204,7 +1210,12 @@
       compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
 
 
-  HInstruction* object = LoadNullCheckedLocal(obj_reg, dex_pc);
+  // Generate an explicit null check on the reference, unless the field access
+  // is unresolved. In that case, we rely on the runtime to perform various
+  // checks first, followed by a null check.
+  HInstruction* object = (resolved_field == nullptr)
+      ? LoadLocal(obj_reg, Primitive::kPrimNot)
+      : LoadNullCheckedLocal(obj_reg, dex_pc);
 
   Primitive::Type field_type = (resolved_field == nullptr)
       ? GetFieldAccessType(*dex_file_, field_index)
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 0e3e5a7..9cfc065 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -237,7 +237,8 @@
                     uint32_t register_index,
                     bool is_range,
                     const char* descriptor,
-                    HClinitCheck* clinit_check);
+                    HClinitCheck* clinit_check,
+                    bool is_unresolved);
 
   bool HandleStringInit(HInvoke* invoke,
                         uint32_t number_of_vreg_arguments,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index fd79901..eb1d156 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -250,6 +250,7 @@
       //    src
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
     }
   }
 }
@@ -278,6 +279,7 @@
   if (!shl->GetRight()->HasUses()) {
     shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
   }
+  RecordSimplification();
   return true;
 }
 
@@ -907,6 +909,7 @@
     if (Primitive::IsIntegralType(instruction->GetType())) {
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
       return;
     }
   }
@@ -999,6 +1002,7 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1116,6 +1120,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1176,6 +1181,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1216,6 +1222,7 @@
       //    0
       instruction->ReplaceWith(input_cst);
       instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
     } else if (IsPowerOfTwo(factor)) {
       // Replace code looking like
       //    MUL dst, src, pow_of_2
@@ -1334,6 +1341,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1347,6 +1355,7 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1382,6 +1391,7 @@
     // yields `-0.0`.
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1460,6 +1470,7 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
     return;
   }
 
@@ -1539,7 +1550,7 @@
   HRor* ror = new (GetGraph()->GetArena()) HRor(type, value, distance);
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
   // Remove ClinitCheck and LoadClass, if possible.
-  HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+  HInstruction* clinit = invoke->GetInputs().back();
   if (clinit->IsClinitCheck() && !clinit->HasUses()) {
     clinit->GetBlock()->RemoveInstruction(clinit);
     HInstruction* ldclass = clinit->InputAt(0);
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 214250f..83a5127 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -165,7 +165,7 @@
 void Set##name() { SetBit(k##name); }                                 \
 bool Get##name() const { return IsBitSet(k##name); }                  \
 private:                                                              \
-static constexpr size_t k##name = bit + kNumberOfGenericOptimizations
+static constexpr size_t k##name = (bit) + kNumberOfGenericOptimizations
 
 class StringEqualsOptimizations : public IntrinsicOptimizations {
  public:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 97b8839..29f7672 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1150,17 +1150,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ CompareAndBranchIfZero(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ CompareAndBranchIfZero(arg, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ ldr(temp, Address(str, class_offset));
-  __ ldr(temp1, Address(arg, class_offset));
-  __ cmp(temp, ShifterOperand(temp1));
-  __ b(&return_false, NE);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ ldr(temp, Address(str, class_offset));
+    __ ldr(temp1, Address(arg, class_offset));
+    __ cmp(temp, ShifterOperand(temp1));
+    __ b(&return_false, NE);
+  }
 
   // Load lengths of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 07d9d87..d776fb4 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1327,21 +1327,26 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ Cbz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Cmp(str, arg);
   __ B(&return_true, eq);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Ldr(temp, MemOperand(str.X(), class_offset));
-  __ Ldr(temp1, MemOperand(arg.X(), class_offset));
-  __ Cmp(temp, temp1);
-  __ B(&return_false, ne);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str.X(), class_offset));
+    __ Ldr(temp1, MemOperand(arg.X(), class_offset));
+    __ Cmp(temp, temp1);
+    __ B(&return_false, ne);
+  }
 
   // Load lengths of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d66940f..05377f9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1319,11 +1319,11 @@
     __ j(kEqual, &return_false);
   }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
   if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
     __ movl(ecx, Address(str, class_offset));
     __ cmpl(ecx, Address(arg, class_offset));
     __ j(kNotEqual, &return_false);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 2a86769..67c2f3a 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1416,17 +1416,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(rcx, Address(str, class_offset));
-  __ cmpl(rcx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ movl(rcx, Address(str, class_offset));
+    __ cmpl(rcx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 7543cd6..a0ded74 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -30,8 +30,8 @@
 static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
   DCHECK(instruction->IsInLoop());
   HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-  for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
-    HLoopInformation* input_loop = it.Current()->GetBlock()->GetLoopInformation();
+  for (const HInstruction* input : instruction->GetInputs()) {
+    HLoopInformation* input_loop = input->GetBlock()->GetLoopInformation();
     // We only need to check whether the input is defined in the loop. If it is not
     // it is defined before the loop.
     if (input_loop != nullptr && input_loop->IsIn(*info)) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 60329cc..ae3c4b0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -101,10 +101,7 @@
 }
 
 static void RemoveAsUser(HInstruction* instruction) {
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->RemoveAsUserOfInput(i);
-  }
-
+  instruction->RemoveAsUserOfAllInputs();
   RemoveEnvironmentUses(instruction);
 }
 
@@ -734,6 +731,15 @@
   return false;
 }
 
+bool HLoopInformation::DominatesAllBackEdges(HBasicBlock* block) {
+  for (HBasicBlock* back_edge : GetBackEdges()) {
+    if (!block->Dominates(back_edge)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -748,8 +754,9 @@
 }
 
 static void UpdateInputsUsers(HInstruction* instruction) {
-  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
-    instruction->InputAt(i)->AddUseAt(instruction, i);
+  auto&& inputs = instruction->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs[i]->AddUseAt(instruction, i);
   }
   // Environment should be created later.
   DCHECK(!instruction->HasEnvironment());
@@ -778,22 +785,6 @@
   RemoveInstruction(initial);
 }
 
-void HBasicBlock::MoveInstructionBefore(HInstruction* insn, HInstruction* cursor) {
-  DCHECK(!cursor->IsPhi());
-  DCHECK(!insn->IsPhi());
-  DCHECK(!insn->IsControlFlow());
-  DCHECK(insn->CanBeMoved());
-  DCHECK(!insn->HasSideEffects());
-
-  HBasicBlock* from_block = insn->GetBlock();
-  HBasicBlock* to_block = cursor->GetBlock();
-  DCHECK(from_block != to_block);
-
-  from_block->RemoveInstruction(insn, /* ensure_safety */ false);
-  insn->SetBlock(to_block);
-  to_block->instructions_.InsertInstructionBefore(insn, cursor);
-}
-
 static void Add(HInstructionList* instruction_list,
                 HBasicBlock* block,
                 HInstruction* instruction) {
@@ -1117,9 +1108,10 @@
 void HPhi::RemoveInputAt(size_t index) {
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
-  for (size_t i = index, e = InputCount(); i < e; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -1315,16 +1307,18 @@
   return this == instruction->GetPreviousDisregardingMoves();
 }
 
-bool HInstruction::Equals(HInstruction* other) const {
+bool HInstruction::Equals(const HInstruction* other) const {
   if (!InstructionTypeEquals(other)) return false;
   DCHECK_EQ(GetKind(), other->GetKind());
   if (!InstructionDataEquals(other)) return false;
   if (GetType() != other->GetType()) return false;
-  if (InputCount() != other->InputCount()) return false;
-
-  for (size_t i = 0, e = InputCount(); i < e; ++i) {
-    if (InputAt(i) != other->InputAt(i)) return false;
+  auto&& inputs = GetInputs();
+  auto&& other_inputs = other->GetInputs();
+  if (inputs.size() != other_inputs.size()) return false;
+  for (size_t i = 0; i != inputs.size(); ++i) {
+    if (inputs[i] != other_inputs[i]) return false;
   }
+
   DCHECK_EQ(ComputeHashCode(), other->ComputeHashCode());
   return true;
 }
@@ -1342,6 +1336,11 @@
 }
 
 void HInstruction::MoveBefore(HInstruction* cursor) {
+  DCHECK(!IsPhi());
+  DCHECK(!IsControlFlow());
+  DCHECK(CanBeMoved());
+  DCHECK(!cursor->IsPhi());
+
   next_->previous_ = previous_;
   if (previous_ != nullptr) {
     previous_->next_ = next_;
@@ -2383,9 +2382,9 @@
   inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
   input->AddUseAt(this, index);
   // Update indexes in use nodes of inputs that have been pushed further back by the insert().
-  for (size_t i = index + 1u, size = inputs_.size(); i != size; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i - 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  for (size_t i = index + 1u, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i - 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -2393,9 +2392,9 @@
   RemoveAsUserOfInput(index);
   inputs_.erase(inputs_.begin() + index);
   // Update indexes in use nodes of inputs that have been pulled forward by the erase().
-  for (size_t i = index, e = InputCount(); i < e; ++i) {
-    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
-    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  for (size_t i = index, e = inputs_.size(); i < e; ++i) {
+    DCHECK_EQ(inputs_[i].GetUseNode()->GetIndex(), i + 1u);
+    inputs_[i].GetUseNode()->SetIndex(i);
   }
 }
 
@@ -2433,8 +2432,8 @@
   }
 }
 
-bool HLoadString::InstructionDataEquals(HInstruction* other) const {
-  HLoadString* other_load_string = other->AsLoadString();
+bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
+  const HLoadString* other_load_string = other->AsLoadString();
   if (string_index_ != other_load_string->string_index_ ||
       GetPackedFields() != other_load_string->GetPackedFields()) {
     return false;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c08323a..711a6c1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -37,6 +37,7 @@
 #include "primitive.h"
 #include "utils/array_ref.h"
 #include "utils/intrusive_forward_list.h"
+#include "utils/transform_array_ref.h"
 
 namespace art {
 
@@ -733,6 +734,8 @@
     return blocks_.GetHighestBitSet() != -1;
   }
 
+  bool DominatesAllBackEdges(HBasicBlock* block);
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -1046,7 +1049,6 @@
   // Replace instruction `initial` with `replacement` within this block.
   void ReplaceAndRemoveInstructionWith(HInstruction* initial,
                                        HInstruction* replacement);
-  void MoveInstructionBefore(HInstruction* insn, HInstruction* cursor);
   void AddPhi(HPhi* phi);
   void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
   // RemoveInstruction and RemovePhi delete a given instruction from the respective
@@ -1331,12 +1333,12 @@
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
-#define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }  \
-  const char* DebugName() const OVERRIDE { return #type; }              \
-  bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
-    return other->Is##type();                                           \
-  }                                                                     \
+#define DECLARE_INSTRUCTION(type)                                         \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }    \
+  const char* DebugName() const OVERRIDE { return #type; }                \
+  bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE {  \
+    return other->Is##type();                                             \
+  }                                                                       \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
 #define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
@@ -1779,16 +1781,41 @@
     return IsLoopHeaderPhi() && GetBlock()->GetLoopInformation()->IsIrreducible();
   }
 
-  virtual size_t InputCount() const = 0;
+  virtual ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() = 0;
+
+  ArrayRef<const HUserRecord<HInstruction*>> GetInputRecords() const {
+    // One virtual method is enough, just const_cast<> and then re-add the const.
+    return ArrayRef<const HUserRecord<HInstruction*>>(
+        const_cast<HInstruction*>(this)->GetInputRecords());
+  }
+
+  auto GetInputs() {
+    return MakeTransformArrayRef(
+        GetInputRecords(),
+        [](HUserRecord<HInstruction*>& record) -> HInstruction* {
+            return record.GetInstruction();
+        });
+  }
+
+  auto GetInputs() const {
+    return MakeTransformArrayRef(
+        GetInputRecords(),
+        [](const HUserRecord<HInstruction*>& record) -> const HInstruction* {
+            return record.GetInstruction();
+        });
+  }
+
+  size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  void SetRawInputAt(size_t index, HInstruction* input) {
+    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+  }
+
   virtual void Accept(HGraphVisitor* visitor) = 0;
   virtual const char* DebugName() const = 0;
 
   virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
-  void SetRawInputAt(size_t index, HInstruction* input) {
-    SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
-  }
 
   virtual bool NeedsEnvironment() const { return false; }
 
@@ -1853,6 +1880,14 @@
     input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
   }
 
+  void RemoveAsUserOfAllInputs() {
+    for (const HUserRecord<HInstruction*>& input_use : GetInputRecords()) {
+      HUseList<HInstruction*>::iterator before_use_node = input_use.GetBeforeUseNode();
+      input_use.GetInstruction()->uses_.erase_after(before_use_node);
+      input_use.GetInstruction()->FixUpUserRecordsAfterUseRemoval(before_use_node);
+    }
+  }
+
   const HUseList<HInstruction*>& GetUses() const { return uses_; }
   const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
 
@@ -1957,21 +1992,21 @@
   virtual bool CanBeMoved() const { return false; }
 
   // Returns whether the two instructions are of the same kind.
-  virtual bool InstructionTypeEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether any data encoded in the two instructions is equal.
   // This method does not look at the inputs. Both instructions must be
   // of the same type, otherwise the method has undefined behavior.
-  virtual bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
     return false;
   }
 
   // Returns whether two instructions are equal, that is:
   // 1) They have the same type and contain the same data (InstructionDataEquals).
   // 2) Their inputs are identical.
-  bool Equals(HInstruction* other) const;
+  bool Equals(const HInstruction* other) const;
 
   // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
   // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
@@ -1982,8 +2017,8 @@
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
-    for (size_t i = 0, e = InputCount(); i < e; ++i) {
-      result = (result * 31) + InputAt(i)->GetId();
+    for (const HInstruction* input : GetInputs()) {
+      result = (result * 31) + input->GetId();
     }
     return result;
   }
@@ -2033,8 +2068,14 @@
   static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1;
   static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
 
-  virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
-  virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const {
+    return GetInputRecords()[i];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = GetInputRecords();
+    input_records[index] = input;
+  }
 
   uint32_t GetPackedFields() const {
     return packed_fields_;
@@ -2155,21 +2196,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs);
 
-class HInputIterator : public ValueObject {
- public:
-  explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {}
-
-  bool Done() const { return index_ == instruction_->InputCount(); }
-  HInstruction* Current() const { return instruction_->InputAt(index_); }
-  void Advance() { index_++; }
-
- private:
-  HInstruction* instruction_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HInputIterator);
-};
-
 class HInstructionIterator : public ValueObject {
  public:
   explicit HInstructionIterator(const HInstructionList& instructions)
@@ -2219,17 +2245,9 @@
       : HInstruction(side_effects, dex_pc), inputs_() {}
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return N; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    DCHECK_LT(i, N);
-    return inputs_[i];
-  }
-
-  void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LT(i, N);
-    inputs_[i] = input;
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
   }
 
  private:
@@ -2247,18 +2265,9 @@
 
   virtual ~HTemplateInstruction() {}
 
-  size_t InputCount() const OVERRIDE { return 0; }
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
-  }
-
-  void SetRawInputRecordAt(size_t i ATTRIBUTE_UNUSED,
-                           const HUserRecord<HInstruction*>& input ATTRIBUTE_UNUSED) OVERRIDE {
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>();
   }
 
  private:
@@ -2346,7 +2355,10 @@
 
   bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
 
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
 
   void AddInput(HInstruction* input);
   void RemoveInputAt(size_t index);
@@ -2396,15 +2408,6 @@
 
   DECLARE_INSTRUCTION(Phi);
 
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
  private:
   static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
   static constexpr size_t kFieldTypeSize =
@@ -2415,7 +2418,7 @@
   static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
 
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
+  ArenaVector<HUserRecord<HInstruction*>> inputs_;
   const uint32_t reg_number_;
 
   DISALLOW_COPY_AND_ASSIGN(HPhi);
@@ -2479,7 +2482,7 @@
 
 class HNullConstant FINAL : public HConstant {
  public:
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2509,7 +2512,7 @@
     return static_cast<uint64_t>(static_cast<uint32_t>(value_));
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsIntConstant()) << other->DebugName();
     return other->AsIntConstant()->value_ == value_;
   }
@@ -2548,7 +2551,7 @@
 
   uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsLongConstant()) << other->DebugName();
     return other->AsLongConstant()->value_ == value_;
   }
@@ -2580,7 +2583,7 @@
     return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsFloatConstant()) << other->DebugName();
     return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
@@ -2631,7 +2634,7 @@
 
   uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     DCHECK(other->IsDoubleConstant()) << other->DebugName();
     return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
   }
@@ -2775,7 +2778,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool NeedsEnvironment() const OVERRIDE { return true; }
@@ -2822,7 +2825,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return other->AsClassTableGet()->GetIndex() == index_ &&
         other->AsClassTableGet()->GetPackedFields() == GetPackedFields();
   }
@@ -2892,7 +2895,7 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -2964,7 +2967,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -3037,7 +3040,7 @@
   ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
   void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return GetPackedFields() == other->AsCondition()->GetPackedFields();
   }
 
@@ -3541,7 +3544,7 @@
     return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
   }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return GetPackedFields() == other->AsCompare()->GetPackedFields();
   }
 
@@ -3671,10 +3674,13 @@
 
 class HInvoke : public HInstruction {
  public:
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
   bool NeedsEnvironment() const OVERRIDE;
 
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+  }
+
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
   }
@@ -3711,7 +3717,7 @@
 
   bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
   }
 
@@ -3762,14 +3768,6 @@
     SetPackedFlag<kFlagCanThrow>(true);
   }
 
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
   void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
 
   uint32_t number_of_arguments_;
@@ -3936,6 +3934,25 @@
     InsertInputAt(GetSpecialInputIndex(), input);
   }
 
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+    ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
+    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) {
+      DCHECK(!input_records.empty());
+      DCHECK_GT(input_records.size(), GetNumberOfArguments());
+      HInstruction* last_input = input_records.back().GetInstruction();
+      // Note: `last_input` may be null during arguments setup.
+      if (last_input != nullptr) {
+        // `last_input` is the last input of a static invoke marked as having
+        // an explicit clinit check. It must either be:
+        // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
+        // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
+        DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+      }
+    }
+    return input_records;
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
@@ -4020,8 +4037,8 @@
   // instruction; only relevant for static calls with explicit clinit check.
   void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
     DCHECK(IsStaticWithExplicitClinitCheck());
-    size_t last_input_index = InputCount() - 1;
-    HInstruction* last_input = InputAt(last_input_index);
+    size_t last_input_index = inputs_.size() - 1u;
+    HInstruction* last_input = inputs_.back().GetInstruction();
     DCHECK(last_input != nullptr);
     DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
@@ -4050,20 +4067,6 @@
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE {
-    const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i);
-    if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) {
-      HInstruction* input = input_record.GetInstruction();
-      // `input` is the last input of a static invoke marked as having
-      // an explicit clinit check. It must either be:
-      // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or
-      // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation.
-      DCHECK(input != nullptr);
-      DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName();
-    }
-    return input_record;
-  }
-
   void InsertInputAt(size_t index, HInstruction* input);
   void RemoveInputAt(size_t index);
 
@@ -4435,7 +4438,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4800,7 +4803,7 @@
       : HUnaryOperation(result_type, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4833,7 +4836,7 @@
       : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {}
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4881,7 +4884,9 @@
   Primitive::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
 
   // Try to statically evaluate the conversion and return a HConstant
   // containing the result.  If the input cannot be converted, return nullptr.
@@ -4917,7 +4922,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -4995,8 +5000,8 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
@@ -5081,7 +5086,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
@@ -5228,7 +5233,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -5266,7 +5271,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -5350,7 +5355,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     // Note that we don't need to test for generate_clinit_check_.
     // Whether or not we need to generate the clinit check is processed in
     // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
@@ -5428,7 +5433,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLoadClass);
 };
 
-class HLoadString FINAL : public HExpression<1> {
+class HLoadString FINAL : public HInstruction {
  public:
   // Determines how to load the String.
   enum class LoadKind {
@@ -5467,12 +5472,12 @@
               uint32_t string_index,
               const DexFile& dex_file,
               uint32_t dex_pc)
-      : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
+      : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc),
+        special_input_(HUserRecord<HInstruction*>(current_method)),
         string_index_(string_index) {
     SetPackedFlag<kFlagIsInDexCache>(false);
     SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
     load_data_.ref.dex_file = &dex_file;
-    SetRawInputAt(0, current_method);
   }
 
   void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) {
@@ -5519,7 +5524,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE;
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
 
   size_t ComputeHashCode() const OVERRIDE { return string_index_; }
 
@@ -5555,16 +5560,22 @@
     SetSideEffects(SideEffects::None());
   }
 
-  size_t InputCount() const OVERRIDE {
-    return (InputAt(0) != nullptr) ? 1u : 0u;
+  void AddSpecialInput(HInstruction* special_input);
+
+  using HInstruction::GetInputRecords;  // Keep the const version visible.
+  ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+    return ArrayRef<HUserRecord<HInstruction*>>(
+        &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
   }
 
-  void AddSpecialInput(HInstruction* special_input);
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimNot;
+  }
 
   DECLARE_INSTRUCTION(LoadString);
 
  private:
-  static constexpr size_t kFlagIsInDexCache = kNumberOfExpressionPackedBits;
+  static constexpr size_t kFlagIsInDexCache = kNumberOfGenericPackedBits;
   static constexpr size_t kFieldLoadKind = kFlagIsInDexCache + 1;
   static constexpr size_t kFieldLoadKindSize =
       MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
@@ -5588,6 +5599,8 @@
 
   void SetLoadKindInternal(LoadKind load_kind);
 
+  HUserRecord<HInstruction*> special_input_;
+
   // String index serves also as the hash code and it's also needed for slow-paths,
   // so it must not be overwritten with other load data.
   uint32_t string_index_;
@@ -5622,8 +5635,10 @@
   // The special input is used for PC-relative loads on some architectures.
   DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
          GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind();
-  DCHECK(InputAt(0) == nullptr);
-  SetRawInputAt(0u, special_input);
+  // HLoadString::GetInputRecords() returns an empty array at this point,
+  // so use the GetInputRecords() from the base class to set the input record.
+  DCHECK(special_input_.GetInstruction() == nullptr);
+  special_input_ = HUserRecord<HInstruction*>(special_input);
   special_input->AddUseAt(this, 0);
 }
 
@@ -5641,7 +5656,7 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -5687,8 +5702,8 @@
 
   bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
 
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    HStaticFieldGet* other_get = other->AsStaticFieldGet();
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    const HStaticFieldGet* other_get = other->AsStaticFieldGet();
     return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
   }
 
@@ -5960,7 +5975,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -6056,7 +6071,7 @@
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
     return true;
   }
 
@@ -6179,7 +6194,9 @@
   HInstruction* GetCondition() const { return InputAt(2); }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
 
   bool CanBeNull() const OVERRIDE {
     return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
index 371e8ef..d9f9740e 100644
--- a/compiler/optimizing/nodes_arm.h
+++ b/compiler/optimizing/nodes_arm.h
@@ -26,6 +26,8 @@
         dex_file_(&dex_file),
         element_offset_(static_cast<size_t>(-1)) { }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+
   void UpdateElementOffset(size_t element_offset) {
     // Use the lowest offset from the requested elements so that all offsets from
     // this base are non-negative because our assemblers emit negative-offset loads
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 737aece..06b073c 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -56,8 +56,8 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
-    HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+  bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+    const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
     return instr_kind_ == other->instr_kind_ &&
         op_kind_ == other->op_kind_ &&
         shift_amount_ == other->shift_amount_;
@@ -106,7 +106,9 @@
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool IsActualObject() const OVERRIDE { return false; }
 
   HInstruction* GetBaseAddress() const { return InputAt(0); }
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index bdcf54a..f2d5cf3 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -38,7 +38,7 @@
   static constexpr int kInputMulRightIndex = 2;
 
   bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
     return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
   }
 
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index c3696b5..fa47976 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -26,6 +26,8 @@
   HX86ComputeBaseMethodAddress()
       : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+
   DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress);
 
  private:
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index fc66823..764160a 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -140,53 +140,43 @@
 // 0x0000002d: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips[] = {
-    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
-    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
-    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
-    0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7,
-    0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7,
-    0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F,
-    0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
+    0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips[] = {
-    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1,
-    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48,
+    0x0B, 0x0E, 0x40,
 };
-// 0x00000000: addiu r29, r29, -28
-// 0x00000004: .cfi_def_cfa_offset: 28
-// 0x00000004: sw r31, +24(r29)
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +20(r29)
+// 0x00000008: sw r17, +56(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +16(r29)
+// 0x0000000c: sw r16, +52(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: swc1 f22, +8(r29)
-// 0x00000014: swc1 f23, +12(r29)
-// 0x00000018: swc1 f20, +0(r29)
-// 0x0000001c: swc1 f21, +4(r29)
-// 0x00000020: addiu r29, r29, -36
-// 0x00000024: .cfi_def_cfa_offset: 64
-// 0x00000024: sw r4, +0(r29)
-// 0x00000028: .cfi_remember_state
-// 0x00000028: addiu r29, r29, 36
-// 0x0000002c: .cfi_def_cfa_offset: 28
-// 0x0000002c: lwc1 f20, +0(r29)
-// 0x00000030: lwc1 f21, +4(r29)
-// 0x00000034: lwc1 f22, +8(r29)
-// 0x00000038: lwc1 f23, +12(r29)
-// 0x0000003c: lw r16, +16(r29)
-// 0x00000040: .cfi_restore: r16
-// 0x00000040: lw r17, +20(r29)
-// 0x00000044: .cfi_restore: r17
-// 0x00000044: lw r31, +24(r29)
-// 0x00000048: .cfi_restore: r31
-// 0x00000048: addiu r29, r29, 28
-// 0x0000004c: .cfi_def_cfa_offset: 0
-// 0x0000004c: jr r31
-// 0x00000050: nop
-// 0x00000054: .cfi_restore_state
-// 0x00000054: .cfi_def_cfa_offset: 64
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r31, +60(r29)
+// 0x00000020: .cfi_restore: r31
+// 0x00000020: lw r17, +56(r29)
+// 0x00000024: .cfi_restore: r17
+// 0x00000024: lw r16, +52(r29)
+// 0x00000028: .cfi_restore: r16
+// 0x00000028: ldc1 f22, +40(r29)
+// 0x0000002c: ldc1 f20, +32(r29)
+// 0x00000030: addiu r29, r29, 64
+// 0x00000034: .cfi_def_cfa_offset: 0
+// 0x00000034: jr r31
+// 0x00000038: nop
+// 0x0000003c: .cfi_restore_state
+// 0x0000003c: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
@@ -349,75 +339,65 @@
 // 0x00000098: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
-    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
-    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
-    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
+    0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
     0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
     0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
     0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
     0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
 };
 static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
-    0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7,
-    0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F,
-    0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27,
+    0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
+    0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27,
     0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
 static constexpr uint8_t expected_cfi_kMips_adjust[] = {
-    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00,
-    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+    0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+    0x40,
 };
-// 0x00000000: addiu r29, r29, -28
-// 0x00000004: .cfi_def_cfa_offset: 28
-// 0x00000004: sw r31, +24(r29)
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +20(r29)
+// 0x00000008: sw r17, +56(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +16(r29)
+// 0x0000000c: sw r16, +52(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: swc1 f22, +8(r29)
-// 0x00000014: swc1 f23, +12(r29)
-// 0x00000018: swc1 f20, +0(r29)
-// 0x0000001c: swc1 f21, +4(r29)
-// 0x00000020: addiu r29, r29, -36
-// 0x00000024: .cfi_def_cfa_offset: 64
-// 0x00000024: sw r4, +0(r29)
-// 0x00000028: bne r0, r4, 0x0000004c ; +36
-// 0x0000002c: addiu r29, r29, -4
-// 0x00000030: .cfi_def_cfa_offset: 68
-// 0x00000030: sw r31, +0(r29)
-// 0x00000034: bltzal r0, 0x00000038 ; +4
-// 0x00000038: lui r1, 0x20000
-// 0x0000003c: ori r1, r1, 24
-// 0x00000040: addu r1, r1, r31
-// 0x00000044: lw r31, +0(r29)
-// 0x00000048: jr r1
-// 0x0000004c: addiu r29, r29, 4
-// 0x00000050: .cfi_def_cfa_offset: 64
-// 0x00000050: nop
+// 0x00000010: sdc1 f22, +40(r29)
+// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: bne r0, r4, 0x00000040 ; +36
+// 0x00000020: addiu r29, r29, -4
+// 0x00000024: .cfi_def_cfa_offset: 68
+// 0x00000024: sw r31, +0(r29)
+// 0x00000028: bltzal r0, 0x0000002c ; +4
+// 0x0000002c: lui r1, 0x20000
+// 0x00000030: ori r1, r1, 24
+// 0x00000034: addu r1, r1, r31
+// 0x00000038: lw r31, +0(r29)
+// 0x0000003c: jr r1
+// 0x00000040: addiu r29, r29, 4
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: nop
 //             ...
-// 0x00020050: nop
-// 0x00020054: .cfi_remember_state
-// 0x00020054: addiu r29, r29, 36
-// 0x00020058: .cfi_def_cfa_offset: 28
-// 0x00020058: lwc1 f20, +0(r29)
-// 0x0002005c: lwc1 f21, +4(r29)
-// 0x00020060: lwc1 f22, +8(r29)
-// 0x00020064: lwc1 f23, +12(r29)
-// 0x00020068: lw r16, +16(r29)
-// 0x0002006c: .cfi_restore: r16
-// 0x0002006c: lw r17, +20(r29)
-// 0x00020070: .cfi_restore: r17
-// 0x00020070: lw r31, +24(r29)
-// 0x00020074: .cfi_restore: r31
-// 0x00020074: addiu r29, r29, 28
-// 0x00020078: .cfi_def_cfa_offset: 0
-// 0x00020078: jr r31
-// 0x0002007c: nop
-// 0x00020080: .cfi_restore_state
-// 0x00020080: .cfi_def_cfa_offset: 64
+// 0x00020044: nop
+// 0x00020048: .cfi_remember_state
+// 0x00020048: lw r31, +60(r29)
+// 0x0002004c: .cfi_restore: r31
+// 0x0002004c: lw r17, +56(r29)
+// 0x00020050: .cfi_restore: r17
+// 0x00020050: lw r16, +52(r29)
+// 0x00020054: .cfi_restore: r16
+// 0x00020054: ldc1 f22, +40(r29)
+// 0x00020058: ldc1 f20, +32(r29)
+// 0x0002005c: addiu r29, r29, 64
+// 0x00020060: .cfi_def_cfa_offset: 0
+// 0x00020060: jr r31
+// 0x00020064: nop
+// 0x00020068: .cfi_restore_state
+// 0x00020068: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 37197af..c9a4bfe 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -321,6 +321,7 @@
                             jobject class_loader,
                             const DexFile& dex_file,
                             Handle<mirror::DexCache> dex_cache,
+                            ArtMethod* method,
                             bool osr) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -614,6 +615,7 @@
                                               jobject class_loader,
                                               const DexFile& dex_file,
                                               Handle<mirror::DexCache> dex_cache,
+                                              ArtMethod* method,
                                               bool osr) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
@@ -679,17 +681,30 @@
       osr);
 
   const uint8_t* interpreter_metadata = nullptr;
-  {
+  if (method == nullptr) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::ClassLoader> loader(hs.NewHandle(
         soa.Decode<mirror::ClassLoader*>(class_loader)));
-    ArtMethod* art_method = compiler_driver->ResolveMethod(
+    method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
-    // We may not get a method, for example if its class is erroneous.
-    if (art_method != nullptr) {
-      graph->SetArtMethod(art_method);
-      interpreter_metadata = art_method->GetQuickenedInfo();
+  }
+  // For AOT compilation, we may not get a method, for example if its class is erroneous.
+  // JIT should always have a method.
+  DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
+  if (method != nullptr) {
+    graph->SetArtMethod(method);
+    ScopedObjectAccess soa(Thread::Current());
+    interpreter_metadata = method->GetQuickenedInfo();
+    uint16_t type_index = method->GetDeclaringClass()->GetDexTypeIndex();
+
+    // Update the dex cache if the type is not in it yet. Note that under AOT,
+    // the verifier must have set it, but under JIT, there's no guarantee, as we
+    // don't necessarily run the verifier.
+    // The compiler and the compiler driver assume the compiling class is
+    // in the dex cache.
+    if (dex_cache->GetResolvedType(type_index) == nullptr) {
+      dex_cache->SetResolvedType(type_index, method->GetDeclaringClass());
     }
   }
 
@@ -798,6 +813,7 @@
                    jclass_loader,
                    dex_file,
                    dex_cache,
+                   nullptr,
                    /* osr */ false));
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
@@ -884,6 +900,7 @@
                    jclass_loader,
                    *dex_file,
                    dex_cache,
+                   method,
                    osr));
     if (codegen.get() == nullptr) {
       return false;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index dafbd3d..cb2fc0a 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -202,8 +202,9 @@
     }
 
     // Ensure that we can load FP arguments from the constant area.
-    for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
-      HConstant* input = invoke->InputAt(i)->AsConstant();
+    auto&& inputs = invoke->GetInputs();
+    for (size_t i = 0; i < inputs.size(); i++) {
+      HConstant* input = inputs[i]->AsConstant();
       if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) {
         ReplaceInput(invoke, input, i, true);
       }
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index dcc89e8..c941c0c 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -169,8 +169,7 @@
 
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
-    size_t last_input_index = invoke->InputCount() - 1;
-    HLoadClass* last_input = invoke->InputAt(last_input_index)->AsLoadClass();
+    HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index ee32518..f9bef68 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -39,16 +39,17 @@
   }
 
   void PrintPostInstruction(HInstruction* instruction) {
-    if (instruction->InputCount() != 0) {
+    auto&& inputs = instruction->GetInputs();
+    if (!inputs.empty()) {
       PrintString("(");
       bool first = true;
-      for (HInputIterator it(instruction); !it.Done(); it.Advance()) {
+      for (const HInstruction* input : inputs) {
         if (first) {
           first = false;
         } else {
           PrintString(", ");
         }
-        PrintInt(it.Current()->GetId());
+        PrintInt(input->GetId());
       }
       PrintString(")");
     }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index f2394f6..2a281dd 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -809,7 +809,11 @@
   // Make sure that we don't go over the bounded type.
   ReferenceTypeInfo upper_bound_rti = instr->GetUpperBound();
   if (!upper_bound_rti.IsSupertypeOf(new_rti)) {
-    new_rti = upper_bound_rti;
+    // Note that the input might be exact, in which case we know the branch leading
+    // to the bound type is dead. We play it safe by not marking the bound type as
+    // exact.
+    bool is_exact = upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
+    new_rti = ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), is_exact);
   }
   instr->SetReferenceTypeInfo(new_rti);
 }
@@ -819,13 +823,13 @@
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
   DCHECK(instr->IsLive());
 
-  size_t input_count = instr->InputCount();
+  auto&& inputs = instr->GetInputs();
   size_t first_input_index_not_null = 0;
-  while (first_input_index_not_null < input_count &&
-      instr->InputAt(first_input_index_not_null)->IsNullConstant()) {
+  while (first_input_index_not_null < inputs.size() &&
+      inputs[first_input_index_not_null]->IsNullConstant()) {
     first_input_index_not_null++;
   }
-  if (first_input_index_not_null == input_count) {
+  if (first_input_index_not_null == inputs.size()) {
     // All inputs are NullConstants, set the type to object.
     // This may happen in the presence of inlining.
     instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
@@ -840,11 +844,11 @@
     return;
   }
 
-  for (size_t i = first_input_index_not_null + 1; i < input_count; i++) {
-    if (instr->InputAt(i)->IsNullConstant()) {
+  for (size_t i = first_input_index_not_null + 1; i < inputs.size(); i++) {
+    if (inputs[i]->IsNullConstant()) {
       continue;
     }
-    new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+    new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
         break;
@@ -875,8 +879,8 @@
   if (instr->IsPhi()) {
     HPhi* phi = instr->AsPhi();
     bool new_can_be_null = false;
-    for (size_t i = 0; i < phi->InputCount(); i++) {
-      if (phi->InputAt(i)->CanBeNull()) {
+    for (HInstruction* input : phi->GetInputs()) {
+      if (input->CanBeNull()) {
         new_can_be_null = true;
         break;
       }
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 4405b80..4a6b835 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -305,7 +305,7 @@
     BlockRegisters(position, position + 1, /* caller_save_only */ true);
   }
 
-  for (size_t i = 0; i < instruction->InputCount(); ++i) {
+  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
     Location input = locations->InAt(i);
     if (input.IsRegister() || input.IsFpuRegister()) {
       BlockRegister(input, position, position + 1);
@@ -753,10 +753,11 @@
   if (defined_by != nullptr && !current->IsSplit()) {
     LocationSummary* locations = defined_by->GetLocations();
     if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
-      for (size_t i = 0, e = defined_by->InputCount(); i < e; ++i) {
+      auto&& inputs = defined_by->GetInputs();
+      for (size_t i = 0; i < inputs.size(); ++i) {
         // Take the last interval of the input. It is the location of that interval
         // that will be used at `defined_by`.
-        LiveInterval* interval = defined_by->InputAt(i)->GetLiveInterval()->GetLastSibling();
+        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
         // Note that interval may have not been processed yet.
         // TODO: Handle non-split intervals last in the work list.
         if (locations->InAt(i).IsValid()
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index e52476e..e409035 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -96,10 +96,10 @@
     // TODO(dbrazdil): This puts an instruction between If and its condition.
     //                 Implement moving of conditions to first users if possible.
     if (!true_block->IsSingleGoto()) {
-      true_block->MoveInstructionBefore(true_block->GetFirstInstruction(), if_instruction);
+      true_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
     if (!false_block->IsSingleGoto()) {
-      false_block->MoveInstructionBefore(false_block->GetFirstInstruction(), if_instruction);
+      false_block->GetFirstInstruction()->MoveBefore(if_instruction);
     }
     DCHECK(true_block->IsSingleGoto());
     DCHECK(false_block->IsSingleGoto());
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index f96ca32..ed50c69 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -123,8 +123,7 @@
 static bool TypePhiFromInputs(HPhi* phi) {
   Primitive::Type common_type = phi->GetType();
 
-  for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-    HInstruction* input = it.Current();
+  for (HInstruction* input : phi->GetInputs()) {
     if (input->IsPhi() && input->AsPhi()->IsDead()) {
       // Phis are constructed live so if an input is a dead phi, it must have
       // been made dead due to type conflict. Mark this phi conflicting too.
@@ -169,8 +168,7 @@
     // or `common_type` is integral and we do not need to retype ambiguous inputs
     // because they are always constructed with the integral type candidate.
     if (kIsDebugBuild) {
-      for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-        HInstruction* input = phi->InputAt(i);
+      for (HInstruction* input : phi->GetInputs()) {
         if (common_type == Primitive::kPrimVoid) {
           DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid);
         } else {
@@ -183,8 +181,9 @@
     return true;
   } else {
     DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type));
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      HInstruction* input = phi->InputAt(i);
+    auto&& inputs = phi->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      HInstruction* input = inputs[i];
       if (input->GetType() != common_type) {
         // Input type does not match phi's type. Try to retype the input or
         // generate a suitably typed equivalent.
@@ -618,11 +617,14 @@
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
     ArenaAllocator* allocator = graph_->GetArena();
-    HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
-    for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-      // Copy the inputs. Note that the graph may not be correctly typed
-      // by doing this copy, but the type propagation phase will fix it.
-      new_phi->SetRawInputAt(i, phi->InputAt(i));
+    auto&& inputs = phi->GetInputs();
+    HPhi* new_phi =
+        new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
+    // Copy the inputs. Note that the graph may not be correctly typed
+    // by doing this copy, but the type propagation phase will fix it.
+    ArrayRef<HUserRecord<HInstruction*>> new_input_records = new_phi->GetInputRecords();
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      new_input_records[i] = HUserRecord<HInstruction*>(inputs[i]);
     }
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     DCHECK(new_phi->IsLive());
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 36e0d99..212d935 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -177,8 +177,9 @@
 static void RecursivelyProcessInputs(HInstruction* current,
                                      HInstruction* actual_user,
                                      BitVector* live_in) {
-  for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
-    HInstruction* input = current->InputAt(i);
+  auto&& inputs = current->GetInputs();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    HInstruction* input = inputs[i];
     bool has_in_location = current->GetLocations()->InAt(i).IsValid();
     bool has_out_location = input->GetLocations()->Out().IsValid();
 
@@ -430,12 +431,12 @@
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
         if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
-          for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
+          auto&& inputs = user->GetInputs();
+          for (size_t i = 0; i < inputs.size(); ++i) {
             if (i == input_index) {
               continue;
             }
-            HInstruction* input = user->InputAt(i);
-            Location location = input->GetLiveInterval()->GetLocationAt(
+            Location location = inputs[i]->GetLiveInterval()->GetLocationAt(
                 user->GetBlock()->GetPredecessors()[i]->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
@@ -471,10 +472,10 @@
   if (defined_by_->IsPhi()) {
     // Try to use the same register as one of the inputs.
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
-    for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
-      HInstruction* input = defined_by_->InputAt(i);
+    auto&& inputs = defined_by_->GetInputs();
+    for (size_t i = 0; i < inputs.size(); ++i) {
       size_t end = predecessors[i]->GetLifetimeEnd();
-      LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
+      LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
         // If the input dies at the end of the predecessor, we know its register can
         // be reused.
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 1fcba8b..dc98864 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -797,8 +797,8 @@
   bool IsUsingInputRegister() const {
     CHECK(kIsDebugBuild) << "Function should be used only for DCHECKs";
     if (defined_by_ != nullptr && !IsSplit()) {
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
@@ -828,8 +828,8 @@
       if (locations->OutputCanOverlapWithInputs()) {
         return false;
       }
-      for (HInputIterator it(defined_by_); !it.Done(); it.Advance()) {
-        LiveInterval* interval = it.Current()->GetLiveInterval();
+      for (const HInstruction* input : defined_by_->GetInputs()) {
+        LiveInterval* interval = input->GetLiveInterval();
 
         // Find the interval that covers `defined_by`_. Calls to this function
         // are made outside the linear scan, hence we need to use CoversSlow.
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index c67612e..b1ec99a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -67,8 +67,8 @@
   while (!worklist_.empty()) {
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
-    for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HPhi* input = it.Current()->AsPhi();
+    for (HInstruction* raw_input : phi->GetInputs()) {
+      HPhi* input = raw_input->AsPhi();
       if (input != nullptr && input->IsDead()) {
         // Input is a dead phi. Revive it and add to the worklist. We make sure
         // that the phi was not dead initially (see definition of `initially_live`).
@@ -102,9 +102,7 @@
           }
         }
         // Remove the phi from use lists of its inputs.
-        for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
-          phi->RemoveAsUserOfInput(i);
-        }
+        phi->RemoveAsUserOfAllInputs();
         // Remove the phi from environments that use it.
         for (const HUseListNode<HEnvironment*>& use : phi->GetEnvUses()) {
           HEnvironment* user = use.GetUser();
@@ -159,8 +157,7 @@
     bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi();
 
     // First do a simple loop over inputs and check if they are all the same.
-    for (size_t j = 0; j < phi->InputCount(); ++j) {
-      HInstruction* input = phi->InputAt(j);
+    for (HInstruction* input : phi->GetInputs()) {
       if (input == phi) {
         continue;
       } else if (candidate == nullptr) {
@@ -181,8 +178,7 @@
         DCHECK(!current->IsLoopHeaderPhi() ||
                current->GetBlock()->IsLoopPreHeaderFirstPredecessor());
 
-        for (size_t j = 0; j < current->InputCount(); ++j) {
-          HInstruction* input = current->InputAt(j);
+        for (HInstruction* input : current->GetInputs()) {
           if (input == current) {
             continue;
           } else if (input->IsPhi()) {
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 5c33639..8dc9ab4 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -39,9 +39,6 @@
  */
 template <typename T>
 class ArrayRef {
- private:
-  struct tag { };
-
  public:
   typedef T value_type;
   typedef T& reference;
@@ -63,14 +60,14 @@
 
   template <size_t size>
   explicit constexpr ArrayRef(T (&array)[size])
-    : array_(array), size_(size) {
+      : array_(array), size_(size) {
   }
 
-  template <typename U, size_t size>
-  explicit constexpr ArrayRef(U (&array)[size],
-                              typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                                  t ATTRIBUTE_UNUSED = tag())
-    : array_(array), size_(size) {
+  template <typename U,
+            size_t size,
+            typename = typename std::enable_if<std::is_same<T, const U>::value>::type>
+  explicit constexpr ArrayRef(U (&array)[size])
+      : array_(array), size_(size) {
   }
 
   constexpr ArrayRef(T* array_in, size_t size_in)
@@ -165,13 +162,21 @@
   value_type* data() { return array_; }
   const value_type* data() const { return array_; }
 
-  ArrayRef SubArray(size_type pos) const {
-    return SubArray(pos, size_ - pos);
+  ArrayRef SubArray(size_type pos) {
+    return SubArray(pos, size() - pos);
   }
-  ArrayRef SubArray(size_type pos, size_type length) const {
+  ArrayRef<const T> SubArray(size_type pos) const {
+    return SubArray(pos, size() - pos);
+  }
+  ArrayRef SubArray(size_type pos, size_type length) {
     DCHECK_LE(pos, size());
     DCHECK_LE(length, size() - pos);
-    return ArrayRef(array_ + pos, length);
+    return ArrayRef(data() + pos, length);
+  }
+  ArrayRef<const T> SubArray(size_type pos, size_type length) const {
+    DCHECK_LE(pos, size());
+    DCHECK_LE(length, size() - pos);
+    return ArrayRef<const T>(data() + pos, length);
   }
 
  private:
diff --git a/compiler/utils/transform_array_ref.h b/compiler/utils/transform_array_ref.h
new file mode 100644
index 0000000..6297b88
--- /dev/null
+++ b/compiler/utils/transform_array_ref.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
+
+#include <type_traits>
+
+#include "utils/array_ref.h"
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+/**
+ * @brief An ArrayRef<> wrapper that uses a transformation function for element access.
+ */
+template <typename BaseType, typename Function>
+class TransformArrayRef {
+ private:
+  using Iter = TransformIterator<typename ArrayRef<BaseType>::iterator, Function>;
+
+  // The Function may take a non-const reference, so const_iterator may not exist.
+  using FallbackConstIter = std::iterator<std::random_access_iterator_tag, void, void, void, void>;
+  using PreferredConstIter =
+      TransformIterator<typename ArrayRef<BaseType>::const_iterator, Function>;
+  template <typename F, typename = typename std::result_of<F(const BaseType&)>::type>
+  static PreferredConstIter ConstIterHelper(int&);
+  template <typename F>
+  static FallbackConstIter ConstIterHelper(const int&);
+
+  using ConstIter = decltype(ConstIterHelper<Function>(*reinterpret_cast<int*>(0)));
+
+ public:
+  using value_type = typename Iter::value_type;
+  using reference = typename Iter::reference;
+  using const_reference = typename ConstIter::reference;
+  using pointer = typename Iter::pointer;
+  using const_pointer = typename ConstIter::pointer;
+  using iterator = Iter;
+  using const_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      ConstIter>::type;
+  using reverse_iterator = std::reverse_iterator<Iter>;
+  using const_reverse_iterator = typename std::conditional<
+      std::is_same<ConstIter, FallbackConstIter>::value,
+      void,
+      std::reverse_iterator<ConstIter>>::type;
+  using difference_type = typename ArrayRef<BaseType>::difference_type;
+  using size_type = typename ArrayRef<BaseType>::size_type;
+
+  // Constructors.
+
+  TransformArrayRef(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT>
+  TransformArrayRef(const ArrayRef<OtherBT>& base, Function fn)
+      : data_(base, fn) { }
+
+  // Assignment operators.
+
+  TransformArrayRef& operator=(const TransformArrayRef& other) = default;
+
+  template <typename OtherBT,
+            typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type>
+  TransformArrayRef& operator=(const TransformArrayRef<OtherBT, Function>& other) {
+    return *this = TransformArrayRef(other.base(), other.GetFunction());
+  }
+
+  // Destructor.
+  ~TransformArrayRef() = default;
+
+  // Iterators.
+  iterator begin() { return MakeIterator(base().begin()); }
+  const_iterator begin() const { return MakeIterator(base().cbegin()); }
+  const_iterator cbegin() const { return MakeIterator(base().cbegin()); }
+  iterator end() { return MakeIterator(base().end()); }
+  const_iterator end() const { MakeIterator(base().cend()); }
+  const_iterator cend() const { return MakeIterator(base().cend()); }
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
+  const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
+  const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
+
+  // Size.
+  size_type size() const { return base().size(); }
+  bool empty() const { return base().empty(); }
+
+  // Element access. NOTE: Not providing data().
+
+  reference operator[](size_type n) { return GetFunction()(base()[n]); }
+  const_reference operator[](size_type n) const { return GetFunction()(base()[n]); }
+
+  reference front() { return GetFunction()(base().front()); }
+  const_reference front() const { return GetFunction()(base().front()); }
+
+  reference back() { return GetFunction()(base().back()); }
+  const_reference back() const { return GetFunction()(base().back()); }
+
+  TransformArrayRef SubArray(size_type pos) {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos) const {
+    return TransformArrayRef(base().subarray(pos), GetFunction());
+  }
+  TransformArrayRef SubArray(size_type pos, size_type length) const {
+    return TransformArrayRef(base().subarray(pos, length), GetFunction());
+  }
+
+  // Retrieve the base ArrayRef<>.
+  ArrayRef<BaseType> base() {
+    return data_.base_;
+  }
+  ArrayRef<const BaseType> base() const {
+    return ArrayRef<const BaseType>(data_.base_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(ArrayRef<BaseType> base, Function fn) : Function(fn), base_(base) { }
+
+    ArrayRef<BaseType> base_;
+  };
+
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+  template <typename BaseIterator>
+  auto MakeIterator(BaseIterator base) const {
+    return MakeTransformIterator(base, GetFunction());
+  }
+
+  Data data_;
+};
+
+template <typename BaseType, typename Function>
+bool operator==(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+
+template <typename BaseType, typename Function>
+bool operator!=(const TransformArrayRef<BaseType, Function>& lhs,
+                const TransformArrayRef<BaseType, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename ValueType, typename Function>
+TransformArrayRef<ValueType, Function> MakeTransformArrayRef(
+    ArrayRef<ValueType> container, Function f) {
+  return TransformArrayRef<ValueType, Function>(container, f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<typename Container::value_type, Function> MakeTransformArrayRef(
+    Container& container, Function f) {
+  return TransformArrayRef<typename Container::value_type, Function>(
+      ArrayRef<typename Container::value_type>(container.data(), container.size()), f);
+}
+
+template <typename Container, typename Function>
+TransformArrayRef<const typename Container::value_type, Function> MakeTransformArrayRef(
+    const Container& container, Function f) {
+  return TransformArrayRef<const typename Container::value_type, Function>(
+      ArrayRef<const typename Container::value_type>(container.data(), container.size()), f);
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ARRAY_REF_H_
diff --git a/compiler/utils/transform_array_ref_test.cc b/compiler/utils/transform_array_ref_test.cc
new file mode 100644
index 0000000..2593fad
--- /dev/null
+++ b/compiler/utils/transform_array_ref_test.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_array_ref.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+ATTRIBUTE_UNUSED bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformArrayRef, ConstRefAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 6, 4, 0 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, add1);
+  using TarefIter = decltype(taref)::iterator;
+  using ConstTarefIter = decltype(taref)::const_iterator;
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<ConstTarefIter, decltype(taref)::const_pointer>::value,
+                "const_pointer");
+  static_assert(std::is_same<int, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 7, 5, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 5, 7, 8 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value + 1, taref.front());
+  ASSERT_EQ(input.back().value + 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, NonConstRefSub1) {
+  auto sub1 = [](ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 4, 4, 5, 7, 10 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, sub1);
+  using TarefIter = decltype(taref)::iterator;
+  static_assert(std::is_same<void, decltype(taref)::const_iterator>::value, "const_iterator");
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<TarefIter, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<void, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<void, decltype(taref)::const_reference>::value, "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 4, 6, 9 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 6, 4, 3, 3 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value - 1, taref.front());
+  ASSERT_EQ(input.back().value - 1, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value - 1, taref[i]);
+  }
+}
+
+TEST(TransformArrayRef, ConstAndNonConstRef) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 1, 0, 1, 0, 3, 1 });
+  std::vector<int> output;
+
+  auto taref = MakeTransformArrayRef(input, ref);
+  static_assert(std::is_same<int, decltype(taref)::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, decltype(taref)::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, decltype(taref)::reference>::value, "reference");
+  static_assert(std::is_same<const int*, decltype(taref)::const_pointer>::value, "const_pointer");
+  static_assert(std::is_same<const int&, decltype(taref)::const_reference>::value,
+                "const_reference");
+
+  std::copy(taref.begin(), taref.end(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.cbegin(), taref.cend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output);
+  output.clear();
+
+  std::copy(taref.rbegin(), taref.rend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  std::copy(taref.crbegin(), taref.crend(), std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output);
+  output.clear();
+
+  ASSERT_EQ(input.size(), taref.size());
+  ASSERT_EQ(input.empty(), taref.empty());
+  ASSERT_EQ(input.front().value, taref.front());
+  ASSERT_EQ(input.back().value, taref.back());
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, taref[i]);
+  }
+
+  // Test writing through the transform iterator.
+  std::vector<int> transform_input({ 24, 37, 11, 71 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  taref = MakeTransformArrayRef(transformed, ref);
+  for (size_t i = 0; i != transform_input.size(); ++i) {
+    taref[i] = transform_input[i];
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 24, 37, 11, 71 }), transformed);
+}
+
+}  // namespace art
diff --git a/compiler/utils/transform_iterator.h b/compiler/utils/transform_iterator.h
new file mode 100644
index 0000000..f0769d4
--- /dev/null
+++ b/compiler/utils/transform_iterator.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+#define ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
+
+#include <iterator>
+#include <type_traits>
+
+#include "base/iteration_range.h"
+
+namespace art {
+
+// The transform iterator transforms values from the base iterator with a given
+// transformation function. It can serve as a replacement for std::transform(), i.e.
+//    std::copy(MakeTransformIterator(begin, f), MakeTransformIterator(end, f), out)
+// is equivalent to
+//    std::transform(begin, end, f)
+// If the function returns an l-value reference or a wrapper that supports assignment,
+// the TransformIterator can be used also as an output iterator, i.e.
+//    std::copy(begin, end, MakeTransformIterator(out, f))
+// is equivalent to
+//    for (auto it = begin; it != end; ++it) {
+//      f(*out++) = *it;
+//    }
+template <typename BaseIterator, typename Function>
+class TransformIterator {
+ private:
+  static_assert(std::is_base_of<
+                    std::input_iterator_tag,
+                    typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+                "Transform iterator base must be an input iterator.");
+
+  using InputType =
+      typename std::conditional<
+          std::is_same<void, typename std::iterator_traits<BaseIterator>::reference>::value,
+          typename std::iterator_traits<BaseIterator>::value_type,
+          typename std::iterator_traits<BaseIterator>::reference>::type;
+  using ResultType = typename std::result_of<Function(InputType)>::type;
+
+ public:
+  using iterator_category = typename std::iterator_traits<BaseIterator>::iterator_category;
+  using value_type =
+      typename std::remove_const<typename std::remove_reference<ResultType>::type>::type;
+  using difference_type = typename std::iterator_traits<BaseIterator>::difference_type;
+  using pointer = typename std::conditional<
+      std::is_reference<ResultType>::value,
+      typename std::add_pointer<typename std::remove_reference<ResultType>::type>::type,
+      TransformIterator>::type;
+  using reference = ResultType;
+
+  TransformIterator(BaseIterator base, Function fn)
+      : data_(base, fn) { }
+
+  template <typename OtherBI>
+  TransformIterator(const TransformIterator<OtherBI, Function>& other)
+      : data_(other.base(), other.GetFunction()) {
+  }
+
+  TransformIterator& operator++() {
+    ++data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator++(int) {
+    TransformIterator tmp(*this);
+    ++*this;
+    return tmp;
+  }
+
+  TransformIterator& operator--() {
+    static_assert(
+        std::is_base_of<std::bidirectional_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be bidirectional iterator to use operator--()");
+    --data_.base_;
+    return *this;
+  }
+
+  TransformIterator& operator--(int) {
+    TransformIterator tmp(*this);
+    --*this;
+    return tmp;
+  }
+
+  reference operator*() const {
+    return GetFunction()(*base());
+  }
+
+  reference operator[](difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator[]");
+    return GetFunction()(base()[n]);
+  }
+
+  TransformIterator operator+(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator+");
+    return TransformIterator(base() + n, GetFunction());
+  }
+
+  TransformIterator operator-(difference_type n) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return TransformIterator(base() - n, GetFunction());
+  }
+
+  difference_type operator-(const TransformIterator& other) const {
+    static_assert(
+        std::is_base_of<std::random_access_iterator_tag,
+                        typename std::iterator_traits<BaseIterator>::iterator_category>::value,
+        "BaseIterator must be random access iterator to use operator-");
+    return base() - other.base();
+  }
+
+  // Retrieve the base iterator.
+  BaseIterator base() const {
+    return data_.base_;
+  }
+
+  // Retrieve the transformation function.
+  const Function& GetFunction() const {
+    return static_cast<const Function&>(data_);
+  }
+
+ private:
+  // Allow EBO for state-less Function.
+  struct Data : Function {
+   public:
+    Data(BaseIterator base, Function fn) : Function(fn), base_(base) { }
+
+    BaseIterator base_;
+  };
+
+  Data data_;
+};
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator==(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return lhs.base() == rhs.base();
+}
+
+template <typename BaseIterator1, typename BaseIterator2, typename Function>
+bool operator!=(const TransformIterator<BaseIterator1, Function>& lhs,
+                const TransformIterator<BaseIterator2, Function>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename BaseIterator, typename Function>
+TransformIterator<BaseIterator, Function> MakeTransformIterator(BaseIterator base, Function f) {
+  return TransformIterator<BaseIterator, Function>(base, f);
+}
+
+template <typename BaseRange, typename Function>
+auto MakeTransformRange(BaseRange& range, Function f) {
+  return MakeIterationRange(MakeTransformIterator(range.begin(), f),
+                            MakeTransformIterator(range.end(), f));
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_TRANSFORM_ITERATOR_H_
diff --git a/compiler/utils/transform_iterator_test.cc b/compiler/utils/transform_iterator_test.cc
new file mode 100644
index 0000000..dbb4779
--- /dev/null
+++ b/compiler/utils/transform_iterator_test.cc
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <forward_list>
+#include <list>
+#include <type_traits>
+#include <vector>
+
+#include <array>
+
+#include "gtest/gtest.h"
+
+#include "utils/transform_iterator.h"
+
+namespace art {
+
+namespace {  // anonymous namespace
+
+struct ValueHolder {
+  // Deliberately not explicit.
+  ValueHolder(int v) : value(v) { }  // NOLINT
+  int value;
+};
+
+bool operator==(const ValueHolder& lhs, const ValueHolder& rhs) {
+  return lhs.value == rhs.value;
+}
+
+}  // anonymous namespace
+
+TEST(TransformIterator, VectorAdd1) {
+  auto add1 = [](const ValueHolder& h) { return h.value + 1; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 1, 7, 3, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_titer, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_ctiter, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_rtiter, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), add1));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<vector_crtiter, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), add1),
+            MakeTransformIterator(input.end(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), add1),
+            MakeTransformIterator(input.cend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 2, 8, 4, 9 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), add1),
+            MakeTransformIterator(input.rend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), add1),
+            MakeTransformIterator(input.crend(), add1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 9, 4, 8, 2 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.begin(), add1)[i]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cbegin(), add1)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rbegin(), add1)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crbegin(), add1)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.end(), add1)[index_from_end]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.cend(), add1)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.rend(), add1)[index_from_rend]);
+    ASSERT_EQ(input[i].value + 1, MakeTransformIterator(input.crend(), add1)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), add1) + i,
+              MakeTransformIterator(input.begin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), add1) + i,
+              MakeTransformIterator(input.cbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), add1) + i,
+              MakeTransformIterator(input.rbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), add1) + i,
+              MakeTransformIterator(input.crbegin() + i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.end(), add1) - i,
+              MakeTransformIterator(input.end() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), add1) - i,
+              MakeTransformIterator(input.cend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), add1) - i,
+              MakeTransformIterator(input.rend() - i, add1));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), add1) - i,
+              MakeTransformIterator(input.crend() - i, add1));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), add1) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), add1) - MakeTransformIterator(input.begin(), add1),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), add1);
+  decltype(MakeTransformIterator(input.cbegin(), add1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), add1);
+  decltype(MakeTransformIterator(input.crbegin(), add1)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+}
+
+TEST(TransformIterator, ListSub1) {
+  auto sub1 = [](const ValueHolder& h) { return h.value - 1; };  // NOLINT [readability/braces]
+  std::list<ValueHolder> input({ 2, 3, 5, 7, 11 });
+  std::vector<int> output;
+
+  using list_titer = decltype(MakeTransformIterator(input.begin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<list_titer, list_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_titer::reference>::value, "reference");
+
+  using list_ctiter = decltype(MakeTransformIterator(input.cbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_ctiter, list_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_ctiter::reference>::value, "reference");
+
+  using list_rtiter = decltype(MakeTransformIterator(input.rbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_rtiter, list_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_rtiter::reference>::value, "reference");
+
+  using list_crtiter = decltype(MakeTransformIterator(input.crbegin(), sub1));
+  static_assert(std::is_same<std::bidirectional_iterator_tag,
+                             list_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, list_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<list_crtiter, list_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, list_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), sub1),
+            MakeTransformIterator(input.end(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), sub1),
+            MakeTransformIterator(input.cend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 1, 2, 4, 6, 10 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), sub1),
+            MakeTransformIterator(input.rend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), sub1),
+            MakeTransformIterator(input.crend(), sub1),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 10, 6, 4, 2, 1  }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), sub1);
+  decltype(MakeTransformIterator(input.cbegin(), sub1)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, ForwardListSub1) {
+  auto mul3 = [](const ValueHolder& h) { return h.value * 3; };  // NOLINT [readability/braces]
+  std::forward_list<ValueHolder> input({ 1, 1, 2, 3, 5, 8 });
+  std::vector<int> output;
+
+  using flist_titer = decltype(MakeTransformIterator(input.begin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_titer, flist_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_titer::reference>::value, "reference");
+
+  using flist_ctiter = decltype(MakeTransformIterator(input.cbegin(), mul3));
+  static_assert(std::is_same<std::forward_iterator_tag,
+                             flist_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, flist_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<flist_ctiter, flist_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int, flist_ctiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), mul3),
+            MakeTransformIterator(input.end(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), mul3),
+            MakeTransformIterator(input.cend(), mul3),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 3, 3, 6, 9, 15, 24 }), output);
+  output.clear();
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), mul3);
+  decltype(MakeTransformIterator(input.cbegin(), mul3)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+}
+
+TEST(TransformIterator, VectorConstReference) {
+  auto ref = [](const ValueHolder& h) -> const int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+}
+
+TEST(TransformIterator, VectorNonConstReference) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 1, -1, 2, -2, 3, -3 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -2 * v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -2, 2, -4, 4, -6, 6 }), transformed);
+}
+
+TEST(TransformIterator, VectorConstAndNonConstReference) {
+  struct Ref {
+    int& operator()(ValueHolder& h) const { return h.value; }
+    const int& operator()(const ValueHolder& h) const { return h.value; }
+  };
+  Ref ref;
+  std::vector<ValueHolder> input({ 7, 3, 1, 2, 4, 8 });
+  std::vector<int> output;
+
+  using vector_titer = decltype(MakeTransformIterator(input.begin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_titer::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_titer::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_titer::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_titer::reference>::value, "reference");
+
+  using vector_ctiter = decltype(MakeTransformIterator(input.cbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_ctiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_ctiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_ctiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_ctiter::reference>::value, "reference");
+
+  using vector_rtiter = decltype(MakeTransformIterator(input.rbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_rtiter::iterator_category>::value, "category");
+  static_assert(std::is_same<int, vector_rtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<int*, vector_rtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<int&, vector_rtiter::reference>::value, "reference");
+
+  using vector_crtiter = decltype(MakeTransformIterator(input.crbegin(), ref));
+  static_assert(std::is_same<std::random_access_iterator_tag,
+                             vector_crtiter::iterator_category>::value, "category");
+  // static_assert(std::is_same<int, vector_crtiter::value_type>::value, "value_type");
+  static_assert(std::is_same<const int*, vector_crtiter::pointer>::value, "pointer");
+  static_assert(std::is_same<const int&, vector_crtiter::reference>::value, "reference");
+
+  std::copy(MakeTransformIterator(input.begin(), ref),
+            MakeTransformIterator(input.end(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.cbegin(), ref),
+            MakeTransformIterator(input.cend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 7, 3, 1, 2, 4, 8 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.rbegin(), ref),
+            MakeTransformIterator(input.rend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  std::copy(MakeTransformIterator(input.crbegin(), ref),
+            MakeTransformIterator(input.crend(), ref),
+            std::back_inserter(output));
+  ASSERT_EQ(std::vector<int>({ 8, 4, 2, 1, 3, 7 }), output);
+  output.clear();
+
+  for (size_t i = 0; i != input.size(); ++i) {
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.begin(), ref)[i]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cbegin(), ref)[i]);
+    ptrdiff_t index_from_rbegin = static_cast<ptrdiff_t>(input.size() - i - 1u);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rbegin(), ref)[index_from_rbegin]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crbegin(), ref)[index_from_rbegin]);
+    ptrdiff_t index_from_end = -static_cast<ptrdiff_t>(input.size() - i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.end(), ref)[index_from_end]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.cend(), ref)[index_from_end]);
+    ptrdiff_t index_from_rend = -1 - static_cast<ptrdiff_t>(i);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.rend(), ref)[index_from_rend]);
+    ASSERT_EQ(input[i].value, MakeTransformIterator(input.crend(), ref)[index_from_rend]);
+
+    ASSERT_EQ(MakeTransformIterator(input.begin(), ref) + i,
+              MakeTransformIterator(input.begin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cbegin(), ref) + i,
+              MakeTransformIterator(input.cbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rbegin(), ref) + i,
+              MakeTransformIterator(input.rbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crbegin(), ref) + i,
+              MakeTransformIterator(input.crbegin() + i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.end(), ref) - i,
+              MakeTransformIterator(input.end() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.cend(), ref) - i,
+              MakeTransformIterator(input.cend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.rend(), ref) - i,
+              MakeTransformIterator(input.rend() - i, ref));
+    ASSERT_EQ(MakeTransformIterator(input.crend(), ref) - i,
+              MakeTransformIterator(input.crend() - i, ref));
+  }
+  ASSERT_EQ(input.end(),
+            (MakeTransformIterator(input.begin(), ref) + input.size()).base());
+  ASSERT_EQ(MakeTransformIterator(input.end(), ref) - MakeTransformIterator(input.begin(), ref),
+            static_cast<ptrdiff_t>(input.size()));
+
+  // Test iterator->const_iterator conversion and comparison.
+  auto it = MakeTransformIterator(input.begin(), ref);
+  decltype(MakeTransformIterator(input.cbegin(), ref)) cit = it;
+  static_assert(!std::is_same<decltype(it), decltype(cit)>::value, "Types must be different");
+  ASSERT_EQ(it, cit);
+  auto rit = MakeTransformIterator(input.rbegin(), ref);
+  decltype(MakeTransformIterator(input.crbegin(), ref)) crit(rit);
+  static_assert(!std::is_same<decltype(rit), decltype(crit)>::value, "Types must be different");
+  ASSERT_EQ(rit, crit);
+
+  // Test writing through the transform iterator.
+  std::list<int> transform_input({ 42, 73, 11, 17 });
+  std::vector<ValueHolder> transformed(transform_input.size(), 0);
+  std::transform(transform_input.begin(),
+                 transform_input.end(),
+                 MakeTransformIterator(transformed.begin(), ref),
+                 [](int v) { return -v; });
+  ASSERT_EQ(std::vector<ValueHolder>({ -42, -73, -11, -17 }), transformed);
+}
+
+TEST(TransformIterator, TransformRange) {
+  auto ref = [](ValueHolder& h) -> int& { return h.value; };  // NOLINT [readability/braces]
+  std::vector<ValueHolder> data({ 1, 0, 1, 3, 1, 0 });
+
+  for (int& v : MakeTransformRange(data, ref)) {
+    v += 11;
+  }
+  ASSERT_EQ(std::vector<ValueHolder>({ 12, 11, 12, 14, 12, 11 }), data);
+}
+
+}  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index cb274dc..2dce2f1 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -685,12 +685,6 @@
       Usage("Can't have both --image and (--app-image-fd or --app-image-file)");
     }
 
-    if (IsBootImage()) {
-      // We need the boot image to always be debuggable.
-      // TODO: Remove this once we better deal with full frame deoptimization.
-      compiler_options_->debuggable_ = true;
-    }
-
     if (oat_filenames_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
@@ -1232,18 +1226,6 @@
       unlink(swap_file_name_.c_str());
     }
 
-    // If we use a swap file, ensure we are above the threshold to make it necessary.
-    if (swap_fd_ != -1) {
-      if (!UseSwap(IsBootImage(), dex_files_)) {
-        close(swap_fd_);
-        swap_fd_ = -1;
-        VLOG(compiler) << "Decided to run without swap.";
-      } else {
-        LOG(INFO) << "Large app, accepted running with swap.";
-      }
-    }
-    // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
-
     return true;
   }
 
@@ -1269,6 +1251,21 @@
       CHECK(runtime != nullptr);
       std::set<DexCacheResolvedClasses> resolved_classes(
           profile_compilation_info_->GetResolvedClasses());
+
+      // Filter out class path classes since we don't want to include these in the image.
+      std::unordered_set<std::string> dex_files_locations;
+      for (const DexFile* dex_file : dex_files_) {
+        dex_files_locations.insert(dex_file->GetLocation());
+      }
+      for (auto it = resolved_classes.begin(); it != resolved_classes.end(); ) {
+        if (dex_files_locations.find(it->GetDexLocation()) == dex_files_locations.end()) {
+          VLOG(compiler) << "Removed profile samples for non-app dex file " << it->GetDexLocation();
+          it = resolved_classes.erase(it);
+        } else {
+          ++it;
+        }
+      }
+
       image_classes_.reset(new std::unordered_set<std::string>(
           runtime->GetClassLinker()->GetClassDescriptorsForProfileKeys(resolved_classes)));
       VLOG(compiler) << "Loaded " << image_classes_->size()
@@ -1399,6 +1396,24 @@
     }
 
     dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
+
+    // We had to postpone the swap decision till now, as this is the point when we actually
+    // know about the dex files we're going to use.
+
+    // Make sure that we didn't create the driver, yet.
+    CHECK(driver_ == nullptr);
+    // If we use a swap file, ensure we are above the threshold to make it necessary.
+    if (swap_fd_ != -1) {
+      if (!UseSwap(IsBootImage(), dex_files_)) {
+        close(swap_fd_);
+        swap_fd_ = -1;
+        VLOG(compiler) << "Decided to run without swap.";
+      } else {
+        LOG(INFO) << "Large app, accepted running with swap.";
+      }
+    }
+    // Note that dex2oat won't close the swap_fd_. The compiler driver's swap space will do that.
+
     if (IsBootImage()) {
       // For boot image, pass opened dex files to the Runtime::Create().
       // Note: Runtime acquires ownership of these dex files.
@@ -1447,26 +1462,8 @@
     for (const auto& dex_file : dex_files_) {
       ScopedObjectAccess soa(self);
       dex_caches_.push_back(soa.AddLocalReference<jobject>(
-          class_linker->RegisterDexFile(*dex_file, Runtime::Current()->GetLinearAlloc())));
-    }
-
-    /*
-     * If we're not in interpret-only or verify-none or verify-at-runtime or verify-profile mode,
-     * go ahead and compile small applications.  Don't bother to check if we're doing the image.
-     */
-    if (!IsBootImage() &&
-        compiler_options_->IsCompilationEnabled() &&
-        compiler_kind_ == Compiler::kQuick) {
-      size_t num_methods = 0;
-      for (size_t i = 0; i != dex_files_.size(); ++i) {
-        const DexFile* dex_file = dex_files_[i];
-        CHECK(dex_file != nullptr);
-        num_methods += dex_file->NumMethodIds();
-      }
-      if (num_methods <= compiler_options_->GetNumDexMethodsThreshold()) {
-        compiler_options_->SetCompilerFilter(CompilerFilter::kSpeed);
-        VLOG(compiler) << "Below method threshold, compiling anyways";
-      }
+          class_linker->RegisterDexFile(*dex_file,
+                                        soa.Decode<mirror::ClassLoader*>(class_loader_))));
     }
 
     return true;
@@ -2462,6 +2459,7 @@
   bool multi_image_;
   bool is_host_;
   std::string android_root_;
+  // Dex files we are compiling, does not include the class path dex files.
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
   std::vector<jobject> dex_caches_;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f5458c0..7239a47 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1118,8 +1118,7 @@
       ScopedObjectAccess soa(Thread::Current());
       Runtime* const runtime = Runtime::Current();
       Handle<mirror::DexCache> dex_cache(
-          hs->NewHandle(runtime->GetClassLinker()->RegisterDexFile(*dex_file,
-                                                                   runtime->GetLinearAlloc())));
+          hs->NewHandle(runtime->GetClassLinker()->RegisterDexFile(*dex_file, nullptr)));
       DCHECK(options_.class_loader_ != nullptr);
       return verifier::MethodVerifier::VerifyMethodAndDump(
           soa.Self(), vios, dex_method_idx, dex_file, dex_cache, *options_.class_loader_,
@@ -1979,7 +1978,7 @@
       size_t sum_of_expansion = 0;
       size_t sum_of_expansion_squared = 0;
       size_t n = method_outlier_size.size();
-      if (n == 0) {
+      if (n <= 1) {
         return;
       }
       for (size_t i = 0; i < n; i++) {
@@ -2283,7 +2282,7 @@
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(odf, &error_msg);
     CHECK(dex_file != nullptr) << error_msg;
-    class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+    class_linker->RegisterDexFile(*dex_file, nullptr);
     class_path.push_back(dex_file);
   }
 
diff --git a/profman/profman.cc b/profman/profman.cc
index 4d9276f..754e431 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -31,8 +31,10 @@
 #include "base/stringprintf.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
+#include "dex_file.h"
 #include "jit/offline_profiling_info.h"
 #include "utils.h"
+#include "zip_archive.h"
 #include "profile_assistant.h"
 
 namespace art {
@@ -48,6 +50,12 @@
   return Join(command, ' ');
 }
 
+static constexpr int kInvalidFd = -1;
+
+static bool FdIsValid(int fd) {
+  return fd != kInvalidFd;
+}
+
 static void UsageErrorV(const char* fmt, va_list ap) {
   std::string error;
   StringAppendV(&error, fmt, ap);
@@ -70,8 +78,11 @@
   UsageError("Command: %s", CommandLine().c_str());
   UsageError("Usage: profman [options]...");
   UsageError("");
-  UsageError("  --dump-info-for=<filename>: dumps the content of the profile file");
-  UsageError("      to standard output in a human readable form.");
+  UsageError("  --dump-only: dumps the content of the specified profile files");
+  UsageError("      to standard output (default) in a human readable form.");
+  UsageError("");
+  UsageError("  --dump-output-to-fd=<number>: redirects --dump-info-for output to a file");
+  UsageError("      descriptor.");
   UsageError("");
   UsageError("  --profile-file=<filename>: specify profiler output file to use for compilation.");
   UsageError("      Can be specified multiple time, in which case the data from the different");
@@ -90,6 +101,12 @@
   UsageError("      accepts a file descriptor. Cannot be used together with");
   UsageError("      --reference-profile-file.");
   UsageError("");
+  UsageError("  --dex-location=<string>: location string to use with corresponding");
+  UsageError("      apk-fd to find dex files");
+  UsageError("");
+  UsageError("  --apk-fd=<number>: file descriptor containing an open APK to");
+  UsageError("      search for dex files");
+  UsageError("");
 
   exit(EXIT_FAILURE);
 }
@@ -97,7 +114,9 @@
 class ProfMan FINAL {
  public:
   ProfMan() :
-      reference_profile_file_fd_(-1),
+      reference_profile_file_fd_(kInvalidFd),
+      dump_only_(false),
+      dump_output_to_fd_(kInvalidFd),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -124,8 +143,10 @@
       if (log_options) {
         LOG(INFO) << "profman: option[" << i << "]=" << argv[i];
       }
-      if (option.starts_with("--dump-info-for=")) {
-        dump_info_for_ = option.substr(strlen("--dump-info-for=")).ToString();
+      if (option == "--dump-only") {
+        dump_only_ = true;
+      } else if (option.starts_with("--dump-output-to-fd=")) {
+        ParseUintOption(option, "--dump-output-to-fd", &dump_output_to_fd_, Usage);
       } else if (option.starts_with("--profile-file=")) {
         profile_files_.push_back(option.substr(strlen("--profile-file=")).ToString());
       } else if (option.starts_with("--profile-file-fd=")) {
@@ -134,32 +155,37 @@
         reference_profile_file_ = option.substr(strlen("--reference-profile-file=")).ToString();
       } else if (option.starts_with("--reference-profile-file-fd=")) {
         ParseUintOption(option, "--reference-profile-file-fd", &reference_profile_file_fd_, Usage);
+      } else if (option.starts_with("--dex-location=")) {
+        dex_locations_.push_back(option.substr(strlen("--dex-location=")).ToString());
+      } else if (option.starts_with("--apk-fd=")) {
+        ParseFdForCollection(option, "--apk-fd", &apks_fd_);
       } else {
-        Usage("Unknown argument %s", option.data());
+        Usage("Unknown argument '%s'", option.data());
       }
     }
 
     bool has_profiles = !profile_files_.empty() || !profile_files_fd_.empty();
     bool has_reference_profile = !reference_profile_file_.empty() ||
-        (reference_profile_file_fd_ != -1);
+        FdIsValid(reference_profile_file_fd_);
 
-    if (!dump_info_for_.empty()) {
-      if (has_profiles || has_reference_profile) {
-        Usage("dump-info-for cannot be specified together with other options");
-      }
-      return;
-    }
-    if (!has_profiles) {
+    // --dump-only may be specified with only --reference-profiles present.
+    if (!dump_only_ && !has_profiles) {
       Usage("No profile files specified.");
     }
     if (!profile_files_.empty() && !profile_files_fd_.empty()) {
       Usage("Profile files should not be specified with both --profile-file-fd and --profile-file");
     }
-    if (!has_reference_profile) {
-      Usage("--reference-profile-file-fd should only be supplied with --profile-file-fd");
+    if (!dump_only_ && !has_reference_profile) {
+      Usage("No reference profile file specified.");
     }
-    if (reference_profile_file_.empty() && (reference_profile_file_fd_ == -1)) {
-      Usage("Reference profile file not specified");
+    if (!reference_profile_file_.empty() && FdIsValid(reference_profile_file_fd_)) {
+      Usage("Reference profile should not be specified with both "
+            "--reference-profile-file-fd and --reference-profile-file");
+    }
+    if ((!profile_files_.empty() && FdIsValid(reference_profile_file_fd_)) ||
+        (!dump_only_ && !profile_files_fd_.empty() && !FdIsValid(reference_profile_file_fd_))) {
+      Usage("Options --profile-file-fd and --reference-profile-file-fd "
+            "should only be used together");
     }
   }
 
@@ -177,24 +203,116 @@
     return result;
   }
 
-  int DumpProfileInfo() {
-    int fd = open(dump_info_for_.c_str(), O_RDWR);
-    if (fd < 0) {
-      std::cerr << "Cannot open " << dump_info_for_ << strerror(errno);
-      return -1;
+  int DumpOneProfile(const std::string& banner, const std::string& filename, int fd,
+                     const std::vector<const DexFile*>* dex_files, std::string* dump) {
+    if (!filename.empty()) {
+      fd = open(filename.c_str(), O_RDWR);
+      if (fd < 0) {
+        std::cerr << "Cannot open " << filename << strerror(errno);
+        return -1;
+      }
     }
     ProfileCompilationInfo info;
     if (!info.Load(fd)) {
-      std::cerr << "Cannot load profile info from " << dump_info_for_;
+      std::cerr << "Cannot load profile info from fd=" << fd << "\n";
       return -1;
     }
-    std::string dump = info.DumpInfo(/*dex_files*/ nullptr);
-    std::cout << dump << "\n";
+    std::string this_dump = banner + "\n" + info.DumpInfo(dex_files) + "\n";
+    *dump += this_dump;
+    if (close(fd) < 0) {
+      PLOG(WARNING) << "Failed to close descriptor";
+    }
+    return 0;
+  }
+
+  int DumpProfileInfo() {
+    static const char* kEmptyString = "";
+    static const char* kOrdinaryProfile = "=== profile ===";
+    static const char* kReferenceProfile = "=== reference profile ===";
+
+    // Open apk/zip files and and read dex files.
+    MemMap::Init();  // for ZipArchive::OpenFromFd
+    std::vector<const DexFile*> dex_files;
+    assert(dex_locations_.size() == apks_fd_.size());
+    for (size_t i = 0; i < dex_locations_.size(); ++i) {
+      std::string error_msg;
+      std::vector<std::unique_ptr<const DexFile>> dex_files_for_location;
+      std::unique_ptr<ZipArchive> zip_archive(ZipArchive::OpenFromFd(apks_fd_[i],
+                                                                     dex_locations_[i].c_str(),
+                                                                     &error_msg));
+      if (zip_archive == nullptr) {
+        LOG(WARNING) << "OpenFromFd failed for '" << dex_locations_[i] << "' " << error_msg;
+        continue;
+      }
+      if (DexFile::OpenFromZip(*zip_archive,
+                               dex_locations_[i],
+                               &error_msg,
+                               &dex_files_for_location)) {
+      } else {
+        LOG(WARNING) << "OpenFromZip failed for '" << dex_locations_[i] << "' " << error_msg;
+        continue;
+      }
+      for (std::unique_ptr<const DexFile>& dex_file : dex_files_for_location) {
+        dex_files.push_back(dex_file.release());
+      }
+    }
+
+    std::string dump;
+    // Dump individual profile files.
+    if (!profile_files_fd_.empty()) {
+      for (int profile_file_fd : profile_files_fd_) {
+        int ret = DumpOneProfile(kOrdinaryProfile,
+                                 kEmptyString,
+                                 profile_file_fd,
+                                 &dex_files,
+                                 &dump);
+        if (ret != 0) {
+          return ret;
+        }
+      }
+    }
+    if (!profile_files_.empty()) {
+      for (const std::string& profile_file : profile_files_) {
+        int ret = DumpOneProfile(kOrdinaryProfile, profile_file, kInvalidFd, &dex_files, &dump);
+        if (ret != 0) {
+          return ret;
+        }
+      }
+    }
+    // Dump reference profile file.
+    if (FdIsValid(reference_profile_file_fd_)) {
+      int ret = DumpOneProfile(kReferenceProfile,
+                               kEmptyString,
+                               reference_profile_file_fd_,
+                               &dex_files,
+                               &dump);
+      if (ret != 0) {
+        return ret;
+      }
+    }
+    if (!reference_profile_file_.empty()) {
+      int ret = DumpOneProfile(kReferenceProfile,
+                               reference_profile_file_,
+                               kInvalidFd,
+                               &dex_files,
+                               &dump);
+      if (ret != 0) {
+        return ret;
+      }
+    }
+    if (!FdIsValid(dump_output_to_fd_)) {
+      std::cout << dump;
+    } else {
+      unix_file::FdFile out_fd(dump_output_to_fd_, false /*check_usage*/);
+      if (!out_fd.WriteFully(dump.c_str(), dump.length())) {
+        return -1;
+      }
+    }
     return 0;
   }
 
   bool ShouldOnlyDumpProfile() {
-    return !dump_info_for_.empty();
+    return dump_only_;
   }
 
  private:
@@ -224,10 +342,13 @@
 
   std::vector<std::string> profile_files_;
   std::vector<int> profile_files_fd_;
+  std::vector<std::string> dex_locations_;
+  std::vector<int> apks_fd_;
   std::string reference_profile_file_;
   int reference_profile_file_fd_;
+  bool dump_only_;
+  int dump_output_to_fd_;
   uint64_t start_ns_;
-  std::string dump_info_for_;
 };
 
 // See ProfileAssistant::ProcessingResult for return codes.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 1bba4f9..5209bb6 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -88,6 +88,36 @@
 #endif
 .endm
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * and preserves the value of rTemp2 at entry.
+     */
+.macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2 rTemp1, rTemp2
+    push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
+    .cfi_adjust_cfa_offset 28
+    .cfi_rel_offset r5, 0
+    .cfi_rel_offset r6, 4
+    .cfi_rel_offset r7, 8
+    .cfi_rel_offset r8, 12
+    .cfi_rel_offset r10, 16
+    .cfi_rel_offset r11, 20
+    .cfi_rel_offset lr, 24
+    sub sp, #4                                    @ bottom word will hold Method*
+    .cfi_adjust_cfa_offset 4
+    str \rTemp2, [sp, #0]                         @ save rTemp2
+    RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
+    ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
+    ldr \rTemp2, [sp, #0]                         @ restore rTemp2
+    str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
+    str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
+
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
+#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#endif
+.endm
+
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     add sp, #4               @ bottom word holds Method*
     .cfi_adjust_cfa_offset -4
@@ -831,23 +861,13 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    sub    sp, #12                    @ alignment padding
-    .cfi_adjust_cfa_offset 12
-    push   {r3}                       @ Save r3 as is it used as a temp register in the
-    .cfi_adjust_cfa_offset 4          @   expansion of the SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
-    .cfi_rel_offset r3, 0             @   macro below, which clobbers its arguments.
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
-    ldr    r3, [sp, 32]               @ restore r3
-    .cfi_restore r3
-
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2  r12, r3  @ save callee saves in case of GC
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    add    sp, #16                    @ pop r3 + padding
-    .cfi_adjust_cfa_offset -16
     \return
 END \name
 .endm
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 485da9f..2d7f664 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -73,6 +73,38 @@
 #endif
 END_MACRO
 
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     * and preserves the value of got_reg at entry.
+     */
+MACRO2(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG, got_reg, temp_reg)
+    PUSH edi  // Save callee saves (ebx is saved/restored by the upcall)
+    PUSH esi
+    PUSH ebp
+    pushl REG_VAR(got_reg)  // Save got_reg
+    subl MACRO_LITERAL(8), %esp  // Grow stack by 2 words.
+    CFI_ADJUST_CFA_OFFSET(8)
+
+    SETUP_GOT_NOSAVE RAW_VAR(got_reg)
+    // Load Runtime::instance_ from GOT.
+    movl SYMBOL(_ZN3art7Runtime9instance_E)@GOT(REG_VAR(got_reg)), REG_VAR(temp_reg)
+    movl (REG_VAR(temp_reg)), REG_VAR(temp_reg)
+    // Push save all callee-save method.
+    pushl RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(REG_VAR(temp_reg))
+    CFI_ADJUST_CFA_OFFSET(4)
+    // Store esp as the top quick frame.
+    movl %esp, %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+    // Restore got_reg.
+    movl 12(%esp), REG_VAR(got_reg)
+
+    // Ugly compile-time check, but we only have the preprocessor.
+    // Last +4: implicit return address pushed on stack when caller made call.
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 3*4 + 16 + 4)
+#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86) size not as expected."
+#endif
+END_MACRO
+
 MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(16), %esp  // Unwind stack up to saved values
     CFI_ADJUST_CFA_OFFSET(-16)
@@ -686,14 +718,7 @@
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
-    subl MACRO_LITERAL(12), %esp                 // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH ebx                                     // Save ebx as the expansion of the
-                                                 //   SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
-                                                 //   macro below clobbers it.
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
-    movl 28(%esp), %ebx                          // restore ebx
-    CFI_RESTORE_REG ebx
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_GOT_REG  ebx, ebx  // save ref containing registers for GC
 
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                 // alignment padding
@@ -708,8 +733,6 @@
     addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
-    addl MACRO_LITERAL(16), %esp                 // pop ebx + padding
-    CFI_ADJUST_CFA_OFFSET(-16)
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index eddd172..48bec73 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -19,8 +19,8 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE (64 + 4*8)
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE (64 + 4*8)
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE (176 + 4*8)
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index d911497..98d3345 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -122,21 +122,21 @@
 
 #define FIELD_GET(object, type) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    return object->GetField ## type ## Volatile(GetOffset()); \
+    return (object)->GetField ## type ## Volatile(GetOffset()); \
   } \
-  return object->GetField ## type(GetOffset());
+  return (object)->GetField ## type(GetOffset());
 
 #define FIELD_SET(object, type, value) \
   DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK(object != nullptr) << PrettyField(this); \
-  DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
+  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
-    object->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
   } else { \
-    object->SetField ## type<kTransactionActive>(GetOffset(), value); \
+    (object)->SetField ## type<kTransactionActive>(GetOffset(), value); \
   }
 
 inline uint8_t ArtField::GetBoolean(mirror::Object* object) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 1790df6..f86cb13 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -79,10 +79,10 @@
 
 InvokeType ArtMethod::GetInvokeType() {
   // TODO: kSuper?
-  if (GetDeclaringClass()->IsInterface()) {
-    return kInterface;
-  } else if (IsStatic()) {
+  if (IsStatic()) {
     return kStatic;
+  } else if (GetDeclaringClass()->IsInterface()) {
+    return kInterface;
   } else if (IsDirect()) {
     return kDirect;
   } else {
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 21725d3..8eb3742 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -396,10 +396,10 @@
 #define THREAD_CHECKPOINT_REQUEST 2
 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
 
-#define JIT_CHECK_OSR -1
+#define JIT_CHECK_OSR (-1)
 ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
 
-#define JIT_HOTNESS_DISABLE -2
+#define JIT_HOTNESS_DISABLE (-2)
 ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
 
 #if defined(__cplusplus)
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 424ebb7..5609067 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -111,6 +111,20 @@
     const BitVector* const bit_vector_;
   };
 
+  // MoveConstructible but not MoveAssignable, CopyConstructible or CopyAssignable.
+
+  BitVector(const BitVector& other) = delete;
+  BitVector& operator=(const BitVector& other) = delete;
+
+  BitVector(BitVector&& other)
+      : storage_(other.storage_),
+        storage_size_(other.storage_size_),
+        allocator_(other.allocator_),
+        expandable_(other.expandable_) {
+    other.storage_ = nullptr;
+    other.storage_size_ = 0u;
+  }
+
   BitVector(uint32_t start_bits,
             bool expandable,
             Allocator* allocator);
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index c7a0ba2..4af47d1 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -202,9 +202,13 @@
 
 template <class Value>
 inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const {
-  os << Name()
-     << ": Avg: " << PrettySize(Mean()) << " Max: "
-     << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  os << Name();
+  if (sample_size_ != 0u) {
+    os << ": Avg: " << PrettySize(Mean()) << " Max: "
+       << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n";
+  } else {
+    os << ": <no data>\n";
+  }
 }
 
 template <class Value>
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index e9e97b8..6323eee 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -140,11 +140,11 @@
 
 // Helper for CHECK_STRxx(s1,s2) macros.
 #define CHECK_STROP(s1, s2, sense) \
-  if (UNLIKELY((strcmp(s1, s2) == 0) != sense)) \
+  if (UNLIKELY((strcmp(s1, s2) == 0) != (sense))) \
     LOG(::art::FATAL) << "Check failed: " \
-        << "\"" << s1 << "\"" \
-        << (sense ? " == " : " != ") \
-        << "\"" << s2 << "\""
+        << "\"" << (s1) << "\"" \
+        << ((sense) ? " == " : " != ") \
+        << "\"" << (s2) << "\""
 
 // Check for string (const char*) equality between s1 and s2, LOG(FATAL) if not.
 #define CHECK_STREQ(s1, s2) CHECK_STROP(s1, s2, true)
@@ -156,7 +156,7 @@
     int rc = call args; \
     if (rc != 0) { \
       errno = rc; \
-      PLOG(::art::FATAL) << # call << " failed for " << what; \
+      PLOG(::art::FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 7a293c7..3c43253 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -75,7 +75,7 @@
     ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } \
     ALWAYS_INLINE void operator delete(void*, void*) noexcept { } \
   private: \
-    void* operator new(size_t) = delete
+    void* operator new(size_t) = delete // NOLINT
 
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
@@ -135,7 +135,7 @@
 #define ARRAYSIZE_UNSAFE(a) \
   ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
 
-#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f)
+#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f) // NOLINT
 
 #define OFFSETOF_MEMBER(t, f) \
   (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 620bf9c..6f689d7 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -49,7 +49,7 @@
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
 ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
-ReaderWriterMutex* Locks::oat_file_count_lock_ = nullptr;
+Mutex* Locks::host_dlopen_handles_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
 Mutex* Locks::reference_queue_cleared_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_finalizer_references_lock_ = nullptr;
@@ -953,7 +953,7 @@
     DCHECK(deoptimization_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
     DCHECK(oat_file_manager_lock_ != nullptr);
-    DCHECK(oat_file_count_lock_ != nullptr);
+    DCHECK(host_dlopen_handles_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -971,7 +971,7 @@
     instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-      if (new_level >= current_lock_level) { \
+      if ((new_level) >= current_lock_level) { \
         /* Do not use CHECKs or FATAL here, abort_lock_ is not setup yet. */ \
         fprintf(stderr, "New local level %d is not less than current level %d\n", \
                 new_level, current_lock_level); \
@@ -1042,9 +1042,9 @@
     DCHECK(oat_file_manager_lock_ == nullptr);
     oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kOatFileCountLock);
-    DCHECK(oat_file_count_lock_ == nullptr);
-    oat_file_count_lock_ = new ReaderWriterMutex("OatFile count lock", current_lock_level);
+    UPDATE_CURRENT_LOCK_LEVEL(kHostDlOpenHandlesLock);
+    DCHECK(host_dlopen_handles_lock_ == nullptr);
+    host_dlopen_handles_lock_ = new Mutex("host dlopen handles lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3dca12a..3d7624d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -83,7 +83,7 @@
   kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
-  kOatFileCountLock,
+  kHostDlOpenHandlesLock,
   kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
@@ -651,11 +651,11 @@
   // Guards opened oat files in OatFileManager.
   static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
 
-  // Guards opened oat files in OatFileManager.
-  static ReaderWriterMutex* oat_file_count_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
+  // Guards dlopen_handles_ in DlOpenOatFile.
+  static Mutex* host_dlopen_handles_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
 
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(oat_file_count_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(host_dlopen_handles_lock_);
 
   // Guards reference processor.
   static Mutex* reference_processor_lock_ ACQUIRED_AFTER(intern_table_lock_);
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 639f913..96fa53c 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -2429,19 +2429,20 @@
                                                      Primitive::kPrimDouble));
   }
 
+// NOLINT added to avoid wrong warning/fix from clang-tidy.
 #define PRIMITIVE_ARRAY_FUNCTIONS(ctype, name, ptype) \
-  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { \
-    return reinterpret_cast<ctype*>( \
+  static ctype* Get##name##ArrayElements(JNIEnv* env, ctype##Array array, jboolean* is_copy) { /* NOLINT */ \
+    return reinterpret_cast<ctype*>( /* NOLINT */ \
         GetPrimitiveArrayElements(__FUNCTION__, ptype, env, array, is_copy)); \
   } \
   \
-  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, \
+  static void Release##name##ArrayElements(JNIEnv* env, ctype##Array array, ctype* elems, /* NOLINT */ \
                                            jint mode) { \
     ReleasePrimitiveArrayElements(__FUNCTION__, ptype, env, array, elems, mode); \
   } \
   \
   static void Get##name##ArrayRegion(JNIEnv* env, ctype##Array array, jsize start, jsize len, \
-                                     ctype* buf) { \
+                                     ctype* buf) { /* NOLINT */ \
     GetPrimitiveArrayRegion(__FUNCTION__, ptype, env, array, start, len, buf); \
   } \
   \
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d03b57c..db0e9ac 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1348,7 +1348,8 @@
         for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
           // The image space is not yet added to the heap, avoid read barriers.
           mirror::Class* klass = types[j].Read();
-          if (klass != nullptr) {
+          // There may also be boot image classes,
+          if (space->HasAddress(klass)) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
             // Update the class loader from the one in the image class loader to the one that loaded
             // the app image.
@@ -1387,6 +1388,9 @@
                 VLOG(image) << PrettyMethod(&m);
               }
             }
+          } else {
+            DCHECK(klass == nullptr || heap->ObjectIsInBootImageSpace(klass))
+                << klass << " " << PrettyClass(klass);
           }
         }
       }
@@ -1394,10 +1398,10 @@
         for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
           // The image space is not yet added to the heap, avoid read barriers.
           mirror::Class* klass = types[j].Read();
-          if (klass != nullptr) {
+          if (space->HasAddress(klass)) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
             if (kIsDebugBuild) {
-              if (new_class_set != nullptr)   {
+              if (new_class_set != nullptr) {
                 auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
                 DCHECK(it != new_class_set->end());
                 DCHECK_EQ(it->Read(), klass);
@@ -1662,6 +1666,10 @@
     // resolve the same way, simply flatten the hierarchy in the way the resolution order would be,
     // and check that the dex file names are the same.
     for (mirror::ClassLoader* image_class_loader : image_class_loaders) {
+      if (IsBootClassLoader(soa, image_class_loader)) {
+        // The dex cache can reference types from the boot class loader.
+        continue;
+      }
       std::list<mirror::String*> image_dex_file_names;
       std::string temp_error_msg;
       if (!FlattenPathClassLoader(image_class_loader, &image_dex_file_names, &temp_error_msg)) {
@@ -2084,6 +2092,21 @@
       reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
   ArtField** fields = (dex_file.NumFieldIds() == 0u) ? nullptr :
       reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+  if (kIsDebugBuild) {
+    // Sanity check to make sure all the dex cache arrays are empty. b/28992179
+    for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
+      CHECK(strings[i].Read<kWithoutReadBarrier>() == nullptr);
+    }
+    for (size_t i = 0; i < dex_file.NumTypeIds(); ++i) {
+      CHECK(types[i].Read<kWithoutReadBarrier>() == nullptr);
+    }
+    for (size_t i = 0; i < dex_file.NumMethodIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size_) == nullptr);
+    }
+    for (size_t i = 0; i < dex_file.NumFieldIds(); ++i) {
+      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size_) == nullptr);
+    }
+  }
   dex_cache->Init(&dex_file,
                   location.Get(),
                   strings,
@@ -2445,9 +2468,7 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  mirror::DexCache* dex_cache = RegisterDexFile(
-      dex_file,
-      GetOrCreateAllocatorForClassLoader(class_loader.Get()));
+  mirror::DexCache* dex_cache = RegisterDexFile(dex_file, class_loader.Get());
   if (dex_cache == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -3206,7 +3227,8 @@
   dex_caches_.push_back(data);
 }
 
-mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc) {
+mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file,
+                                               mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
@@ -3215,21 +3237,31 @@
       return dex_cache;
     }
   }
+  LinearAlloc* const linear_alloc = GetOrCreateAllocatorForClassLoader(class_loader);
+  DCHECK(linear_alloc != nullptr);
+  ClassTable* table;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    table = InsertClassTableForClassLoader(class_loader);
+  }
   // Don't alloc while holding the lock, since allocation may need to
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
   StackHandleScope<1> hs(self);
   Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(self, dex_file, linear_alloc)));
-  WriterMutexLock mu(self, dex_lock_);
-  mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
-  if (dex_cache != nullptr) {
-    return dex_cache;
+  {
+    WriterMutexLock mu(self, dex_lock_);
+    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+    if (dex_cache != nullptr) {
+      return dex_cache;
+    }
+    if (h_dex_cache.Get() == nullptr) {
+      self->AssertPendingOOMException();
+      return nullptr;
+    }
+    RegisterDexFileLocked(dex_file, h_dex_cache);
   }
-  if (h_dex_cache.Get() == nullptr) {
-    self->AssertPendingOOMException();
-    return nullptr;
-  }
-  RegisterDexFileLocked(dex_file, h_dex_cache);
+  table->InsertStrongRoot(h_dex_cache.Get());
   return h_dex_cache.Get();
 }
 
@@ -4945,6 +4977,7 @@
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
     EnsureSkipAccessChecksMethods(c);
+    self->AssertNoPendingException();
     return true;
   }
   const bool success = InitializeClass(self, c, can_init_fields, can_init_parents);
@@ -7949,6 +7982,16 @@
   find_array_class_cache_next_victim_ = 0;
 }
 
+void ClassLinker::ClearClassTableStrongRoots() const {
+  Thread* const self = Thread::Current();
+  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  for (const ClassLoaderData& data : class_loaders_) {
+    if (data.class_table != nullptr) {
+      data.class_table->ClearStrongRoots();
+    }
+  }
+}
+
 void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const {
   Thread* const self = Thread::Current();
   for (const ClassLoaderData& data : class_loaders_) {
@@ -7967,7 +8010,7 @@
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ClassTable* const table = ClassTableForClassLoader(class_loader);
   DCHECK(table != nullptr);
-  if (table->InsertDexFile(dex_file) && class_loader != nullptr) {
+  if (table->InsertStrongRoot(dex_file) && class_loader != nullptr) {
     // It was not already inserted, perform the write barrier to let the GC know the class loader's
     // class table was modified.
     Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index f6ce545..cd1ca7f 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -377,7 +377,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  mirror::DexCache* RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc)
+  mirror::DexCache* RegisterDexFile(const DexFile& dex_file,
+                                    mirror::ClassLoader* class_loader)
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
@@ -634,6 +635,17 @@
   // Create the IMT and conflict tables for a class.
   void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Clear class table strong roots (other than classes themselves). This is done by dex2oat to
+  // allow pruning dex caches.
+  void ClearClassTableStrongRoots() const
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Throw the class initialization failure recorded when first trying to initialize the given
+  // class.
+  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
@@ -1051,12 +1063,6 @@
   // Return the quick generic JNI stub for testing.
   const void* GetRuntimeQuickGenericJniStub() const;
 
-  // Throw the class initialization failure recorded when first trying to initialize the given
-  // class.
-  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
-
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index 42e320a..d52365d 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -29,7 +29,7 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
-  for (GcRoot<mirror::Object>& root : dex_files_) {
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
     visitor.VisitRoot(root.AddressWithoutBarrier());
   }
 }
@@ -42,7 +42,7 @@
       visitor.VisitRoot(root.AddressWithoutBarrier());
     }
   }
-  for (GcRoot<mirror::Object>& root : dex_files_) {
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
     visitor.VisitRoot(root.AddressWithoutBarrier());
   }
 }
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 8267c68..e9154cb 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -146,15 +146,15 @@
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
-bool ClassTable::InsertDexFile(mirror::Object* dex_file) {
+bool ClassTable::InsertStrongRoot(mirror::Object* obj) {
   WriterMutexLock mu(Thread::Current(), lock_);
-  DCHECK(dex_file != nullptr);
-  for (GcRoot<mirror::Object>& root : dex_files_) {
-    if (root.Read() == dex_file) {
+  DCHECK(obj != nullptr);
+  for (GcRoot<mirror::Object>& root : strong_roots_) {
+    if (root.Read() == obj) {
       return false;
     }
   }
-  dex_files_.push_back(GcRoot<mirror::Object>(dex_file));
+  strong_roots_.push_back(GcRoot<mirror::Object>(obj));
   return true;
 }
 
@@ -189,4 +189,8 @@
   classes_.insert(classes_.begin(), std::move(set));
 }
 
+void ClassTable::ClearStrongRoots() {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  strong_roots_.clear();
+}
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 686381d..6fb4206 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -133,8 +133,8 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Return true if we inserted the dex file, false if it already exists.
-  bool InsertDexFile(mirror::Object* dex_file)
+  // Return true if we inserted the strong root, false if it already exists.
+  bool InsertStrongRoot(mirror::Object* obj)
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -153,6 +153,11 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Clear strong roots (other than classes themselves).
+  void ClearStrongRoots()
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   ReaderWriterMutex& GetLock() {
     return lock_;
   }
@@ -162,9 +167,10 @@
   mutable ReaderWriterMutex lock_;
   // We have a vector to help prevent dirty pages after the zygote forks by calling FreezeSnapshot.
   std::vector<ClassSet> classes_ GUARDED_BY(lock_);
-  // Dex files used by the class loader which may not be owned by the class loader. We keep these
-  // live so that we do not have issues closing any of the dex files.
-  std::vector<GcRoot<mirror::Object>> dex_files_ GUARDED_BY(lock_);
+  // Extra strong roots that can be either dex files or dex caches. Dex files used by the class
+  // loader which may not be owned by the class loader must be held strongly live. Also dex caches
+  // are held live to prevent them being unloading once they have classes in them.
+  std::vector<GcRoot<mirror::Object>> strong_roots_ GUARDED_BY(lock_);
 };
 
 }  // namespace art
diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc
index d617caf..dc197c1 100644
--- a/runtime/compiler_filter.cc
+++ b/runtime/compiler_filter.cc
@@ -20,7 +20,7 @@
 
 namespace art {
 
-bool CompilerFilter::IsCompilationEnabled(Filter filter) {
+bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) {
   switch (filter) {
     case CompilerFilter::kVerifyNone:
     case CompilerFilter::kVerifyAtRuntime:
@@ -39,6 +39,25 @@
   UNREACHABLE();
 }
 
+bool CompilerFilter::IsJniCompilationEnabled(Filter filter) {
+  switch (filter) {
+    case CompilerFilter::kVerifyNone:
+    case CompilerFilter::kVerifyAtRuntime: return false;
+
+    case CompilerFilter::kVerifyProfile:
+    case CompilerFilter::kInterpretOnly:
+    case CompilerFilter::kSpaceProfile:
+    case CompilerFilter::kSpace:
+    case CompilerFilter::kBalanced:
+    case CompilerFilter::kTime:
+    case CompilerFilter::kSpeedProfile:
+    case CompilerFilter::kSpeed:
+    case CompilerFilter::kEverythingProfile:
+    case CompilerFilter::kEverything: return true;
+  }
+  UNREACHABLE();
+}
+
 bool CompilerFilter::IsVerificationEnabled(Filter filter) {
   switch (filter) {
     case CompilerFilter::kVerifyNone:
diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h
index e8d74dd..37631cc 100644
--- a/runtime/compiler_filter.h
+++ b/runtime/compiler_filter.h
@@ -30,10 +30,10 @@
   // Note: Order here matters. Later filter choices are considered "as good
   // as" earlier filter choices.
   enum Filter {
-    kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
-    kVerifyAtRuntime,     // Only compile JNI stubs and verify at runtime.
-    kVerifyProfile,       // Verify only the classes in the profile.
-    kInterpretOnly,       // Verify, and compile only JNI stubs.
+    kVerifyNone,          // Skip verification but mark all classes as verified anyway.
+    kVerifyAtRuntime,     // Delay verication to runtime, do not compile anything.
+    kVerifyProfile,       // Verify only the classes in the profile, compile only JNI stubs.
+    kInterpretOnly,       // Verify everything, compile only JNI stubs.
     kTime,                // Compile methods, but minimize compilation time.
     kSpaceProfile,        // Maximize space savings based on profile.
     kSpace,               // Maximize space savings.
@@ -47,8 +47,12 @@
   static const Filter kDefaultCompilerFilter = kSpeed;
 
   // Returns true if an oat file with this compiler filter contains
-  // compiled executable code.
-  static bool IsCompilationEnabled(Filter filter);
+  // compiled executable code for bytecode.
+  static bool IsBytecodeCompilationEnabled(Filter filter);
+
+  // Returns true if an oat file with this compiler filter contains
+  // compiled executable code for JNI methods.
+  static bool IsJniCompilationEnabled(Filter filter);
 
   // Returns true if this compiler filter requires running verification.
   static bool IsVerificationEnabled(Filter filter);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 8005642..5b54f7d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -644,8 +644,7 @@
 
   LOG(INFO) << "Debugger is no longer active";
 
-  // Suspend all threads and exclusively acquire the mutator lock. Set the state of the thread
-  // to kRunnable to avoid scoped object access transitions. Remove the debugger as a listener
+  // Suspend all threads and exclusively acquire the mutator lock. Remove the debugger as a listener
   // and clear the object registry.
   Runtime* runtime = Runtime::Current();
   Thread* self = Thread::Current();
@@ -655,7 +654,6 @@
                                     gc::kGcCauseInstrumentation,
                                     gc::kCollectorTypeInstrumentation);
     ScopedSuspendAll ssa(__FUNCTION__);
-    ThreadState old_state = self->SetStateUnsafe(kRunnable);
     // Debugger may not be active at this point.
     if (IsDebuggerActive()) {
       {
@@ -676,7 +674,6 @@
       }
       gDebuggerActive = false;
     }
-    CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
   }
 
   {
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index af12abf..05c95e0 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1163,6 +1163,18 @@
   return val;
 }
 
+// Checks that visibility is as expected. Includes special behavior for M and
+// before to allow runtime and build visibility when expecting runtime.
+static bool IsVisibilityCompatible(uint32_t actual, uint32_t expected) {
+  if (expected == DexFile::kDexVisibilityRuntime) {
+    int32_t sdk_version = Runtime::Current()->GetTargetSdkVersion();
+    if (sdk_version > 0 && sdk_version <= 23) {
+      return actual == DexFile::kDexVisibilityRuntime || actual == DexFile::kDexVisibilityBuild;
+    }
+  }
+  return actual == expected;
+}
+
 const DexFile::AnnotationSetItem* DexFile::FindAnnotationSetForField(ArtField* field) const {
   mirror::Class* klass = field->GetDeclaringClass();
   const AnnotationsDirectoryItem* annotations_dir = GetAnnotationsDirectory(*klass->GetClassDef());
@@ -1640,7 +1652,7 @@
     Handle<mirror::Class> annotation_class) const {
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
-    if (annotation_item->visibility_ != visibility) {
+    if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
       continue;
     }
     const uint8_t* annotation = annotation_item->annotation_;
@@ -1758,6 +1770,8 @@
   uint32_t dest_index = 0;
   for (uint32_t i = 0; i < size; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
+    // Note that we do not use IsVisibilityCompatible here because older code
+    // was correct for this case.
     if (annotation_item->visibility_ != visibility) {
       continue;
     }
@@ -2146,7 +2160,7 @@
   const AnnotationItem* result = nullptr;
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const AnnotationItem* annotation_item = GetAnnotationItem(annotation_set, i);
-    if (annotation_item->visibility_ != visibility) {
+    if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
       continue;
     }
     const uint8_t* annotation = annotation_item->annotation_;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index ce7f62a..638821b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -57,7 +57,11 @@
 // TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
+  // First Dex format version supporting default methods.
   static const uint32_t kDefaultMethodsVersion = 37;
+  // First Dex format version enforcing class definition ordering rules.
+  static const uint32_t kClassDefinitionOrderEnforcedVersion = 37;
+
   static const uint8_t kDexMagic[];
   static constexpr size_t kNumDexVersions = 2;
   static constexpr size_t kDexVersionLen = 4;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index bbffbbb..1d24349 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -101,31 +101,31 @@
 }
 
 // Helper macro to load string and return false on error.
-#define LOAD_STRING(var, idx, error)                  \
-  const char* var = CheckLoadStringByIdx(idx, error); \
-  if (UNLIKELY(var == nullptr)) {                     \
-    return false;                                     \
+#define LOAD_STRING(var, idx, error)                    \
+  const char* (var) = CheckLoadStringByIdx(idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                     \
+    return false;                                       \
   }
 
 // Helper macro to load string by type idx and return false on error.
-#define LOAD_STRING_BY_TYPE(var, type_idx, error)              \
-  const char* var = CheckLoadStringByTypeIdx(type_idx, error); \
-  if (UNLIKELY(var == nullptr)) {                              \
-    return false;                                              \
+#define LOAD_STRING_BY_TYPE(var, type_idx, error)                \
+  const char* (var) = CheckLoadStringByTypeIdx(type_idx, error); \
+  if (UNLIKELY((var) == nullptr)) {                              \
+    return false;                                                \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_METHOD(var, idx, error_string, error_stmt)                 \
-  const DexFile::MethodId* var  = CheckLoadMethodId(idx, error_string); \
-  if (UNLIKELY(var == nullptr)) {                                       \
-    error_stmt;                                                         \
+#define LOAD_METHOD(var, idx, error_string, error_stmt)                   \
+  const DexFile::MethodId* (var)  = CheckLoadMethodId(idx, error_string); \
+  if (UNLIKELY((var) == nullptr)) {                                       \
+    error_stmt;                                                           \
   }
 
 // Helper macro to load method id. Return last parameter on error.
-#define LOAD_FIELD(var, idx, fmt, error_stmt)               \
-  const DexFile::FieldId* var = CheckLoadFieldId(idx, fmt); \
-  if (UNLIKELY(var == nullptr)) {                           \
-    error_stmt;                                             \
+#define LOAD_FIELD(var, idx, fmt, error_stmt)                 \
+  const DexFile::FieldId* (var) = CheckLoadFieldId(idx, fmt); \
+  if (UNLIKELY((var) == nullptr)) {                           \
+    error_stmt;                                               \
   }
 
 bool DexFileVerifier::Verify(const DexFile* dex_file, const uint8_t* begin, size_t size,
@@ -1956,6 +1956,31 @@
   }
 
   if (item->superclass_idx_ != DexFile::kDexNoIndex16) {
+    if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+      // Check that a class does not inherit from itself directly (by having
+      // the same type idx as its super class).
+      if (UNLIKELY(item->superclass_idx_ == item->class_idx_)) {
+        ErrorStringPrintf("Class with same type idx as its superclass: '%d'", item->class_idx_);
+        return false;
+      }
+
+      // Check that a class is defined after its super class (if the
+      // latter is defined in the same Dex file).
+      const DexFile::ClassDef* superclass_def = dex_file_->FindClassDef(item->superclass_idx_);
+      if (superclass_def != nullptr) {
+        // The superclass is defined in this Dex file.
+        if (superclass_def > item) {
+          // ClassDef item for super class appearing after the class' ClassDef item.
+          ErrorStringPrintf("Invalid class definition ordering:"
+                            " class with type idx: '%d' defined before"
+                            " superclass with type idx: '%d'",
+                            item->class_idx_,
+                            item->superclass_idx_);
+          return false;
+        }
+      }
+    }
+
     LOAD_STRING_BY_TYPE(superclass_descriptor, item->superclass_idx_,
                         "inter_class_def_item superclass_idx")
     if (UNLIKELY(!IsValidDescriptor(superclass_descriptor) || superclass_descriptor[0] != 'L')) {
@@ -1964,12 +1989,39 @@
     }
   }
 
+  // Check interfaces.
   const DexFile::TypeList* interfaces = dex_file_->GetInterfacesList(*item);
   if (interfaces != nullptr) {
     uint32_t size = interfaces->Size();
-
-    // Ensure that all interfaces refer to classes (not arrays or primitives).
     for (uint32_t i = 0; i < size; i++) {
+      if (header_->GetVersion() >= DexFile::kClassDefinitionOrderEnforcedVersion) {
+        // Check that a class does not implement itself directly (by having the
+        // same type idx as one of its immediate implemented interfaces).
+        if (UNLIKELY(interfaces->GetTypeItem(i).type_idx_ == item->class_idx_)) {
+          ErrorStringPrintf("Class with same type idx as implemented interface: '%d'",
+                            item->class_idx_);
+          return false;
+        }
+
+        // Check that a class is defined after the interfaces it implements
+        // (if they are defined in the same Dex file).
+        const DexFile::ClassDef* interface_def =
+            dex_file_->FindClassDef(interfaces->GetTypeItem(i).type_idx_);
+        if (interface_def != nullptr) {
+          // The interface is defined in this Dex file.
+          if (interface_def > item) {
+            // ClassDef item for interface appearing after the class' ClassDef item.
+            ErrorStringPrintf("Invalid class definition ordering:"
+                              " class with type idx: '%d' defined before"
+                              " implemented interface with type idx: '%d'",
+                              item->class_idx_,
+                              interfaces->GetTypeItem(i).type_idx_);
+            return false;
+          }
+        }
+      }
+
+      // Ensure that the interface refers to a class (not an array nor a primitive type).
       LOAD_STRING_BY_TYPE(inf_descriptor, interfaces->GetTypeItem(i).type_idx_,
                           "inter_class_def_item interface type_idx")
       if (UNLIKELY(!IsValidDescriptor(inf_descriptor) || inf_descriptor[0] != 'L')) {
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 3741c1e..4e53914 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -184,6 +184,12 @@
   return dex_file;
 }
 
+// To generate a base64 encoded Dex file (such as kGoodTestDex, below)
+// from Smali files, use:
+//
+//   smali -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
 // For reference.
 static const char kGoodTestDex[] =
     "ZGV4CjAzNQDrVbyVkxX1HljTznNf95AglkUAhQuFtmKkAgAAcAAAAHhWNBIAAAAAAAAAAAQCAAAN"
@@ -1521,4 +1527,174 @@
   }
 }
 
+// To generate a base64 encoded Dex file version 037 from Smali files, use:
+//
+//   smali --api-level 24 -o classes.dex class1.smali [class2.smali ...]
+//   base64 classes.dex >classes.dex.base64
+
+// Dex file version 037 generated from:
+//
+//   .class public LB28685551;
+//   .super LB28685551;
+
+static const char kClassExtendsItselfTestDex[] =
+    "ZGV4CjAzNwDeGbgRg1kb6swszpcTWrrOAALB++F4OPT0AAAAcAAAAHhWNBIAAAAAAAAAAKgAAAAB"
+    "AAAAcAAAAAEAAAB0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAHgAAABcAAAAmAAAAJgA"
+    "AAAAAAAAAAAAAAEAAAAAAAAAAAAAAP////8AAAAAAAAAAAAAAAALTEIyODY4NTU1MTsAAAAABgAA"
+    "AAAAAAABAAAAAAAAAAEAAAABAAAAcAAAAAIAAAABAAAAdAAAAAYAAAABAAAAeAAAAAIgAAABAAAA"
+    "mAAAAAAQAAABAAAAqAAAAA==";
+
+TEST_F(DexFileVerifierTest, ClassExtendsItself) {
+  VerifyModification(
+      kClassExtendsItselfTestDex,
+      "class_extends_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as its superclass: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LFoo;
+//   .super LBar;
+//
+// and:
+//
+//    .class public LBar;
+//    .super LFoo;
+
+static const char kClassesExtendOneAnotherTestDex[] =
+    "ZGV4CjAzNwBXHSrwpDMwRBkg+L+JeQCuFNRLhQ86duEcAQAAcAAAAHhWNBIAAAAAAAAAANAAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIAAAABcAAAAwAAAAMAA"
+    "AADHAAAAAAAAAAEAAAABAAAAAQAAAAAAAAAAAAAA/////wAAAAAAAAAAAAAAAAAAAAABAAAAAQAA"
+    "AAAAAAD/////AAAAAAAAAAAAAAAABUxCYXI7AAVMRm9vOwAAAAYAAAAAAAAAAQAAAAAAAAABAAAA"
+    "AgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAgAAAIAAAAACIAAAAgAAAMAAAAAAEAAAAQAAANAAAAA=";
+
+TEST_F(DexFileVerifierTest, ClassesExtendOneAnother) {
+  VerifyModification(
+      kClassesExtendOneAnotherTestDex,
+      "classes_extend_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public LAll;
+//   .super LYour;
+//
+// and:
+//
+//   .class public LYour;
+//   .super LBase;
+//
+// and:
+//
+//   .class public LBase;
+//   .super LAll;
+
+static const char kCircularClassInheritanceTestDex[] =
+    "ZGV4CjAzNwBMJxgP0SJz6oLXnKfl+J7lSEORLRwF5LNMAQAAcAAAAHhWNBIAAAAAAAAAAAABAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAIgAAABkAAAA6AAAAOgA"
+    "AADvAAAA9wAAAAAAAAABAAAAAgAAAAEAAAABAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAgAA"
+    "AAEAAAABAAAAAAAAAP////8AAAAAAAAAAAAAAAAAAAAAAQAAAAIAAAAAAAAA/////wAAAAAAAAAA"
+    "AAAAAAVMQWxsOwAGTEJhc2U7AAZMWW91cjsAAAYAAAAAAAAAAQAAAAAAAAABAAAAAwAAAHAAAAAC"
+    "AAAAAwAAAHwAAAAGAAAAAwAAAIgAAAACIAAAAwAAAOgAAAAAEAAAAQAAAAABAAA=";
+
+TEST_F(DexFileVerifierTest, CircularClassInheritance) {
+  VerifyModification(
+      kCircularClassInheritanceTestDex,
+      "circular_class_inheritance",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " superclass with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LInterfaceImplementsItself;
+//   .super Ljava/lang/Object;
+//   .implements LInterfaceImplementsItself;
+
+static const char kInterfaceImplementsItselfTestDex[] =
+    "ZGV4CjAzNwCKKrjatp8XbXl5S/bEVJnqaBhjZkQY4440AQAAcAAAAHhWNBIAAAAAAAAAANwAAAAC"
+    "AAAAcAAAAAIAAAB4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAIAAAACUAAAAoAAAAKAA"
+    "AAC9AAAAAAAAAAEAAAAAAAAAAQYAAAEAAADUAAAA/////wAAAAAAAAAAAAAAABtMSW50ZXJmYWNl"
+    "SW1wbGVtZW50c0l0c2VsZjsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAAABAAAAAAAAAAcAAAAAAAAA"
+    "AQAAAAAAAAABAAAAAgAAAHAAAAACAAAAAgAAAHgAAAAGAAAAAQAAAIAAAAACIAAAAgAAAKAAAAAB"
+    "EAAAAQAAANQAAAAAEAAAAQAAANwAAAA=";
+
+TEST_F(DexFileVerifierTest, InterfaceImplementsItself) {
+  VerifyModification(
+      kInterfaceImplementsItselfTestDex,
+      "interface_implements_itself",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Class with same type idx as implemented interface: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LPing;
+//   .super Ljava/lang/Object;
+//   .implements LPong;
+//
+// and:
+//
+//   .class public abstract interface LPong;
+//   .super Ljava/lang/Object;
+//   .implements LPing;
+
+static const char kInterfacesImplementOneAnotherTestDex[] =
+    "ZGV4CjAzNwD0Kk9sxlYdg3Dy1Cff0gQCuJAQfEP6ohZUAQAAcAAAAHhWNBIAAAAAAAAAAPwAAAAD"
+    "AAAAcAAAAAMAAAB8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAIgAAACMAAAAyAAAAMgA"
+    "AADQAAAA2AAAAAAAAAABAAAAAgAAAAEAAAABBgAAAgAAAOwAAAD/////AAAAAAAAAAAAAAAAAAAA"
+    "AAEGAAACAAAA9AAAAP////8AAAAAAAAAAAAAAAAGTFBpbmc7AAZMUG9uZzsAEkxqYXZhL2xhbmcv"
+    "T2JqZWN0OwABAAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAADAAAAcAAAAAIAAAAD"
+    "AAAAfAAAAAYAAAACAAAAiAAAAAIgAAADAAAAyAAAAAEQAAACAAAA7AAAAAAQAAABAAAA/AAAAA==";
+
+TEST_F(DexFileVerifierTest, InterfacesImplementOneAnother) {
+  VerifyModification(
+      kInterfacesImplementOneAnotherTestDex,
+      "interfaces_implement_one_another",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '1' defined before"
+      " implemented interface with type idx: '0'");
+}
+
+// Dex file version 037 generated from:
+//
+//   .class public abstract interface LA;
+//   .super Ljava/lang/Object;
+//   .implements LB;
+//
+// and:
+//
+//   .class public abstract interface LB;
+//   .super Ljava/lang/Object;
+//   .implements LC;
+//
+// and:
+//
+//   .class public abstract interface LC;
+//   .super Ljava/lang/Object;
+//   .implements LA;
+
+static const char kCircularInterfaceImplementationTestDex[] =
+    "ZGV4CjAzNwCzKmD5Fol6XAU6ichYHcUTIP7Z7MdTcEmEAQAAcAAAAHhWNBIAAAAAAAAAACwBAAAE"
+    "AAAAcAAAAAQAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAJAAAACUAAAA8AAAAPAA"
+    "AAD1AAAA+gAAAP8AAAAAAAAAAQAAAAIAAAADAAAAAgAAAAEGAAADAAAAHAEAAP////8AAAAAAAAA"
+    "AAAAAAABAAAAAQYAAAMAAAAUAQAA/////wAAAAAAAAAAAAAAAAAAAAABBgAAAwAAACQBAAD/////"
+    "AAAAAAAAAAAAAAAAA0xBOwADTEI7AANMQzsAEkxqYXZhL2xhbmcvT2JqZWN0OwAAAQAAAAIAAAAB"
+    "AAAAAAAAAAEAAAABAAAABwAAAAAAAAABAAAAAAAAAAEAAAAEAAAAcAAAAAIAAAAEAAAAgAAAAAYA"
+    "AAADAAAAkAAAAAIgAAAEAAAA8AAAAAEQAAADAAAAFAEAAAAQAAABAAAALAEAAA==";
+
+TEST_F(DexFileVerifierTest, CircularInterfaceImplementation) {
+  VerifyModification(
+      kCircularInterfaceImplementationTestDex,
+      "circular_interface_implementation",
+      [](DexFile* dex_file ATTRIBUTE_UNUSED) { /* empty */ },
+      "Invalid class definition ordering: class with type idx: '2' defined before"
+      " implemented interface with type idx: '0'");
+}
+
 }  // namespace art
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 3f62124..300e618 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -69,11 +69,11 @@
 
 int const Instruction::kInstructionSizeInCodeUnits[] = {
 #define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
-    ((opcode == NOP)                        ? -1 : \
-     ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k25x)) ?  2 : \
-     ((format >= k32x) && (format <= k3rc)) ?  3 : \
-      (format == k51l)                      ?  5 : -1),
+    (((opcode) == NOP)                        ? -1 :       \
+     (((format) >= k10x) && ((format) <= k10t)) ?  1 :     \
+     (((format) >= k20t) && ((format) <= k25x)) ?  2 :     \
+     (((format) >= k32x) && ((format) <= k3rc)) ?  3 :     \
+      ((format) == k51l)                      ?  5 : -1),
 #include "dex_instruction_list.h"
   DEX_INSTRUCTION_LIST(INSTRUCTION_SIZE)
 #undef DEX_INSTRUCTION_LIST
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 035230e..89c3db6 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -80,7 +80,7 @@
   };
 
   enum Code {  // private marker to avoid generate-operator-out.py from processing.
-#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = opcode,
+#define INSTRUCTION_ENUM(opcode, cname, p, f, r, i, a, v) cname = (opcode),
 #include "dex_instruction_list.h"
     DEX_INSTRUCTION_LIST(INSTRUCTION_ENUM)
 #undef DEX_INSTRUCTION_LIST
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 4e4f851..c3b3ac0 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -32,7 +32,7 @@
     uint32_t type_idx, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, sizeof(void*)); \
     if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
       size_t byte_count = klass->GetObjectSize(); \
@@ -59,7 +59,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     if (LIKELY(klass->IsInitialized())) { \
       size_t byte_count = klass->GetObjectSize(); \
       byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
@@ -85,7 +85,7 @@
     mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
     size_t byte_count = klass->GetObjectSize(); \
     byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
     mirror::Object* obj; \
@@ -136,7 +136,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, false, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, false, allocator_type); \
@@ -146,7 +146,7 @@
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   ScopedQuickEntrypointChecks sqec(self); \
-  if (!instrumented_bool) { \
+  if (!(instrumented_bool)) { \
     return CheckAndAllocArrayFromCode(type_idx, component_count, method, self, true, allocator_type); \
   } else { \
     return CheckAndAllocArrayFromCodeInstrumented(type_idx, component_count, method, self, true, allocator_type); \
@@ -170,7 +170,7 @@
   return mirror::String::AllocFromCharArray<instrumented_bool>(self, char_count, handle_array, \
                                                                offset, allocator_type); \
 } \
-extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( \
+extern "C" mirror::String* artAllocStringFromStringFromCode##suffix##suffix2( /* NOLINT */ \
     mirror::String* string, Thread* self) \
     SHARED_REQUIRES(Locks::mutator_lock_) { \
   StackHandleScope<1> hs(self); \
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index c019cae..f35c2fe 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -29,39 +29,51 @@
 
 namespace art {
 
-extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-
+NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
   if (VLOG_IS_ON(deopt)) {
-    LOG(INFO) << "Deopting:";
-    self->Dump(LOG(INFO));
+    if (single_frame) {
+      // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
+      // specialized visitor that will show whether a method is Quick or Shadow.
+    } else {
+      LOG(INFO) << "Deopting:";
+      self->Dump(LOG(INFO));
+    }
   }
 
   self->AssertHasDeoptimizationContext();
-  self->SetException(Thread::GetDeoptimizationException());
-  self->QuickDeliverException();
+  QuickExceptionHandler exception_handler(self, true);
+  if (single_frame) {
+    exception_handler.DeoptimizeSingleFrame();
+  } else {
+    exception_handler.DeoptimizeStack();
+  }
+  uintptr_t return_pc = exception_handler.UpdateInstrumentationStack();
+  if (exception_handler.IsFullFragmentDone()) {
+    exception_handler.DoLongJump(true);
+  } else {
+    exception_handler.DeoptimizePartialFragmentFixup(return_pc);
+    // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
+    // be caller-saved. This has the downside that we cannot track incorrect register usage down the
+    // line.
+    exception_handler.DoLongJump(false);
+  }
 }
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+  ScopedQuickEntrypointChecks sqec(self);
+  artDeoptimizeImpl(self, false);
+}
+
+// This is called directly from compiled code by an HDepptimize.
 extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-
-  // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the
-  // specialized visitor that will show whether a method is Quick or Shadow.
-
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
   self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
-
-  QuickExceptionHandler exception_handler(self, true);
-  exception_handler.DeoptimizeSingleFrame();
-  exception_handler.UpdateInstrumentationStack();
-  exception_handler.DeoptimizeSingleFrameArchDependentFixup();
-  // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would
-  // be caller-saved. This has the downside that we cannot track incorrect register usage down the
-  // line.
-  exception_handler.DoLongJump(false);
+  artDeoptimizeImpl(self, true);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index e9cdbb7..03771aa 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -654,7 +654,7 @@
 
   JValue tmp_value;
   ShadowFrame* deopt_frame = self->PopStackedShadowFrame(
-      StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false);
+      StackedShadowFrameType::kDeoptimizationShadowFrame, false);
   ManagedStack fragment;
 
   DCHECK(!method->IsNative()) << PrettyMethod(method);
@@ -667,7 +667,7 @@
   JValue result;
 
   if (deopt_frame != nullptr) {
-    // Coming from single-frame deopt.
+    // Coming from partial-fragment deopt.
 
     if (kIsDebugBuild) {
       // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom
@@ -681,7 +681,7 @@
     }
 
     if (VLOG_IS_ON(deopt)) {
-      // Print out the stack to verify that it was a single-frame deopt.
+      // Print out the stack to verify that it was a partial-fragment deopt.
       LOG(INFO) << "Continue-ing from deopt. Stack is:";
       QuickExceptionHandler::DumpFramesWithType(self, true);
     }
@@ -689,7 +689,6 @@
     mirror::Throwable* pending_exception = nullptr;
     bool from_code = false;
     self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
-    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -755,7 +754,12 @@
 
   // Request a stack deoptimization if needed
   ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
-  if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
+  uintptr_t caller_pc = QuickArgumentVisitor::GetCallingPc(sp);
+  // If caller_pc is the instrumentation exit stub, the stub will check to see if deoptimization
+  // should be done and it knows the real return pc.
+  if (UNLIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) &&
+               Dbg::IsForcedInterpreterNeededForUpcall(self, caller) &&
+               Runtime::Current()->IsDeoptimizeable(caller_pc))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
     self->PushDeoptimizationContext(
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 6aed61a..b0ca18e 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -49,6 +49,7 @@
     // done in the runnable state where suspension is expected.
     CHECK_EQ(self->GetState(), kRunnable);
     self->AssertThreadSuspensionIsAllowable();
+    self->AssertNoPendingException();
   }
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 2a1a4a1..6fb048a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -128,7 +128,7 @@
   static constexpr size_t kDefaultMinFree = kDefaultMaxFree / 4;
   static constexpr size_t kDefaultLongPauseLogThreshold = MsToNs(5);
   static constexpr size_t kDefaultLongGCLogThreshold = MsToNs(100);
-  static constexpr size_t kDefaultTLABSize = 256 * KB;
+  static constexpr size_t kDefaultTLABSize = 32 * KB;
   static constexpr double kDefaultTargetUtilization = 0.5;
   static constexpr double kDefaultHeapGrowthMultiplier = 2.0;
   // Primitive arrays larger than this size are put in the large object space.
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 4e56c4a..c6b2870 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -39,7 +39,7 @@
     int rc = call args; \
     if (UNLIKELY(rc != 0)) { \
       errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
+      PLOG(FATAL) << # call << " failed for " << (what); \
     } \
   } while (false)
 
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 5d710bf..2d71294 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -126,15 +126,20 @@
   } else {
     bool is_live_percent_valid = live_bytes_ != static_cast<size_t>(-1);
     if (is_live_percent_valid) {
-      uint live_percent = GetLivePercent();
+      DCHECK(IsInToSpace());
+      DCHECK(!IsLargeTail());
+      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
+      DCHECK_LE(live_bytes_, BytesAllocated());
+      const size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
+      DCHECK_LE(live_bytes_, bytes_allocated);
       if (IsAllocated()) {
         // Side node: live_percent == 0 does not necessarily mean
         // there's no live objects due to rounding (there may be a
         // few).
-        result = live_percent < kEvaculateLivePercentThreshold;
+        result = live_bytes_ * 100U < kEvaculateLivePercentThreshold * bytes_allocated;
       } else {
         DCHECK(IsLarge());
-        result = live_percent == 0U;
+        result = live_bytes_ == 0U;
       }
     } else {
       result = false;
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 4e8dfe8..823aa38 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -395,18 +395,6 @@
       return live_bytes_;
     }
 
-    uint GetLivePercent() const {
-      DCHECK(IsInToSpace());
-      DCHECK(!IsLargeTail());
-      DCHECK_NE(live_bytes_, static_cast<size_t>(-1));
-      DCHECK_LE(live_bytes_, BytesAllocated());
-      size_t bytes_allocated = RoundUp(BytesAllocated(), kRegionSize);
-      DCHECK_GE(bytes_allocated, 0U);
-      uint result = (live_bytes_ * 100U) / bytes_allocated;
-      DCHECK_LE(result, 100U);
-      return result;
-    }
-
     size_t BytesAllocated() const {
       if (IsLarge()) {
         DCHECK_LT(begin_ + kRegionSize, top_);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 61119f8..7dfc83f 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1088,7 +1088,7 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
-  if (deoptimize) {
+  if (deoptimize && Runtime::Current()->IsDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
                                 PrettyMethod(visitor.caller).c_str(),
@@ -1110,7 +1110,7 @@
   }
 }
 
-void Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
+uintptr_t Instrumentation::PopMethodForUnwind(Thread* self, bool is_deoptimization) const {
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1134,6 +1134,7 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  return instrumentation_frame.return_pc_;
 }
 
 std::string InstrumentationStackFrame::Dump() const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index ce6ead4..49dd060 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -402,7 +402,8 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
-  void PopMethodForUnwind(Thread* self, bool is_deoptimization) const
+  // Returns the return pc for the instrumentation frame that's popped.
+  uintptr_t PopMethodForUnwind(Thread* self, bool is_deoptimization) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Call back for configure stubs.
diff --git a/runtime/interpreter/mterp/mips/bincmp.S b/runtime/interpreter/mterp/mips/bincmp.S
index 70057f6..68df5c3 100644
--- a/runtime/interpreter/mterp/mips/bincmp.S
+++ b/runtime/interpreter/mterp/mips/bincmp.S
@@ -1,7 +1,6 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -9,29 +8,11 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    b${revcmp} a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_${opcode}_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
-%break
-
-.L_${opcode}_finish:
+    b${condition} a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
index 5771a4f..c806a67 100644
--- a/runtime/interpreter/mterp/mips/entry.S
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -60,6 +60,12 @@
     /* Starting ibase */
     lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST()                           # load rINST from rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
index 083dc15..1363751 100644
--- a/runtime/interpreter/mterp/mips/footer.S
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -112,20 +112,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
  */
-MterpCheckSuspendAndContinue:
-    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)  # refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
     and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, 1f
+    bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC()
     move    a0, rSELF
     JAL(MterpSuspendCheck)              # (self)
     bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
 
@@ -172,6 +262,26 @@
     sw      v1, 4(a2)
     li      v0, 1                       # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
 /* Restore from the stack and return. Frame size = STACK_SIZE */
     STACK_LOAD_FULL()
     jalr    zero, ra
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index 37ab21d..a3a6744 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -51,7 +51,11 @@
    s2   rSELF     self (Thread) pointer
    s3   rIBASE    interpreted instruction base pointer, used for computed goto
    s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
    s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
 */
 
 /* single-purpose registers, given names for clarity */
@@ -63,6 +67,7 @@
 #define rOBJ s5
 #define rREFS s6
 #define rTEMP s7
+#define rPROFILE s8
 
 #define rARG0 a0
 #define rARG1 a1
@@ -160,7 +165,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -482,3 +487,6 @@
     STACK_LOAD(s8, 120); \
     STACK_LOAD(ra, 124); \
     DELETE_STACK(STACK_SIZE)
+
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
diff --git a/runtime/interpreter/mterp/mips/op_goto.S b/runtime/interpreter/mterp/mips/op_goto.S
index d6f21c9..57182a5 100644
--- a/runtime/interpreter/mterp/mips/op_goto.S
+++ b/runtime/interpreter/mterp/mips/op_goto.S
@@ -5,34 +5,6 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-#if MTERP_PROFILE_BRANCHES
     sll       a0, rINST, 16                #  a0 <- AAxx0000
     sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a2, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    sll       a0, rINST, 16                #  a0 <- AAxx0000
-    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_16.S b/runtime/interpreter/mterp/mips/op_goto_16.S
index cec4432..06c96cd 100644
--- a/runtime/interpreter/mterp/mips/op_goto_16.S
+++ b/runtime/interpreter/mterp/mips/op_goto_16.S
@@ -5,30 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
index 083acd1..67f52e9 100644
--- a/runtime/interpreter/mterp/mips/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -8,36 +8,8 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
     sll       a1, a1, 16
     or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
-    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_if_eq.S b/runtime/interpreter/mterp/mips/op_if_eq.S
index e7190d8..d6f9987 100644
--- a/runtime/interpreter/mterp/mips/op_if_eq.S
+++ b/runtime/interpreter/mterp/mips/op_if_eq.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"ne" }
+%include "mips/bincmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_eqz.S b/runtime/interpreter/mterp/mips/op_if_eqz.S
index 0a78fd9..c52b76a 100644
--- a/runtime/interpreter/mterp/mips/op_if_eqz.S
+++ b/runtime/interpreter/mterp/mips/op_if_eqz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"ne" }
+%include "mips/zcmp.S" { "condition":"eq" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ge.S b/runtime/interpreter/mterp/mips/op_if_ge.S
index b2629ba..bd06ff5 100644
--- a/runtime/interpreter/mterp/mips/op_if_ge.S
+++ b/runtime/interpreter/mterp/mips/op_if_ge.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"lt" }
+%include "mips/bincmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gez.S b/runtime/interpreter/mterp/mips/op_if_gez.S
index b02f677..549231a 100644
--- a/runtime/interpreter/mterp/mips/op_if_gez.S
+++ b/runtime/interpreter/mterp/mips/op_if_gez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"lt" }
+%include "mips/zcmp.S" { "condition":"ge" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gt.S b/runtime/interpreter/mterp/mips/op_if_gt.S
index f620d4a..0be3091 100644
--- a/runtime/interpreter/mterp/mips/op_if_gt.S
+++ b/runtime/interpreter/mterp/mips/op_if_gt.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"le" }
+%include "mips/bincmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_gtz.S b/runtime/interpreter/mterp/mips/op_if_gtz.S
index 5e5dd70..5c7bcc4 100644
--- a/runtime/interpreter/mterp/mips/op_if_gtz.S
+++ b/runtime/interpreter/mterp/mips/op_if_gtz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"le" }
+%include "mips/zcmp.S" { "condition":"gt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_le.S b/runtime/interpreter/mterp/mips/op_if_le.S
index a4e8b1a..c35c1a2 100644
--- a/runtime/interpreter/mterp/mips/op_if_le.S
+++ b/runtime/interpreter/mterp/mips/op_if_le.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"gt" }
+%include "mips/bincmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lez.S b/runtime/interpreter/mterp/mips/op_if_lez.S
index af551a6..3dc6543 100644
--- a/runtime/interpreter/mterp/mips/op_if_lez.S
+++ b/runtime/interpreter/mterp/mips/op_if_lez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"gt" }
+%include "mips/zcmp.S" { "condition":"le" }
diff --git a/runtime/interpreter/mterp/mips/op_if_lt.S b/runtime/interpreter/mterp/mips/op_if_lt.S
index f33b9a4..3f3386c 100644
--- a/runtime/interpreter/mterp/mips/op_if_lt.S
+++ b/runtime/interpreter/mterp/mips/op_if_lt.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"ge" }
+%include "mips/bincmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ltz.S b/runtime/interpreter/mterp/mips/op_if_ltz.S
index 18fcb1d..e6d6ed6 100644
--- a/runtime/interpreter/mterp/mips/op_if_ltz.S
+++ b/runtime/interpreter/mterp/mips/op_if_ltz.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"ge" }
+%include "mips/zcmp.S" { "condition":"lt" }
diff --git a/runtime/interpreter/mterp/mips/op_if_ne.S b/runtime/interpreter/mterp/mips/op_if_ne.S
index e0a102b..3d7bf35 100644
--- a/runtime/interpreter/mterp/mips/op_if_ne.S
+++ b/runtime/interpreter/mterp/mips/op_if_ne.S
@@ -1 +1 @@
-%include "mips/bincmp.S" { "revcmp":"eq" }
+%include "mips/bincmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_if_nez.S b/runtime/interpreter/mterp/mips/op_if_nez.S
index d1866a0..d121eae 100644
--- a/runtime/interpreter/mterp/mips/op_if_nez.S
+++ b/runtime/interpreter/mterp/mips/op_if_nez.S
@@ -1 +1 @@
-%include "mips/zcmp.S" { "revcmp":"eq" }
+%include "mips/zcmp.S" { "condition":"ne" }
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
index 93fae97..ffa4f47 100644
--- a/runtime/interpreter/mterp/mips/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -9,7 +9,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -19,39 +18,4 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL($func)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .L${opcode}_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL($func)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
-%break
-
-.L${opcode}_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/zcmp.S b/runtime/interpreter/mterp/mips/zcmp.S
index 1fa1385..8d3a198 100644
--- a/runtime/interpreter/mterp/mips/zcmp.S
+++ b/runtime/interpreter/mterp/mips/zcmp.S
@@ -1,32 +1,16 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    b${revcmp} a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    b${condition} a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/bincmp.S b/runtime/interpreter/mterp/mips64/bincmp.S
index aa5e74b..07b1210 100644
--- a/runtime/interpreter/mterp/mips64/bincmp.S
+++ b/runtime/interpreter/mterp/mips64/bincmp.S
@@ -12,21 +12,9 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    b${condition}c a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    b${condition}c a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips64/entry.S b/runtime/interpreter/mterp/mips64/entry.S
index ae6c26b..cc48d45 100644
--- a/runtime/interpreter/mterp/mips64/entry.S
+++ b/runtime/interpreter/mterp/mips64/entry.S
@@ -57,6 +57,8 @@
     .cfi_rel_offset 20, STACK_OFFSET_S4
     sd      s5, STACK_OFFSET_S5(sp)
     .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
 
     /* Remember the return register */
     sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
@@ -77,6 +79,12 @@
     /* Starting ibase */
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GET_INST_OPCODE v0
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
index 14d5fe0..9994169 100644
--- a/runtime/interpreter/mterp/mips64/footer.S
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -71,23 +71,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in ra.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-    .extern MterpSuspendCheck
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     REFRESH_IBASE
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, check1
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
-check1:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
     EXPORT_PC
     move    a0, rSELF
-    jal     MterpSuspendCheck                       # (self)
-    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
 
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
@@ -143,6 +230,28 @@
 check2:
     li      v0, 1                                   # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
     ld      s5, STACK_OFFSET_S5(sp)
     .cfi_restore 21
     ld      s4, STACK_OFFSET_S4(sp)
@@ -169,4 +278,5 @@
     .cfi_adjust_cfa_offset -STACK_SIZE
 
     .cfi_endproc
+    .set    reorder
     .size ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/mips64/header.S b/runtime/interpreter/mterp/mips64/header.S
index dd0fbe0..b67df20 100644
--- a/runtime/interpreter/mterp/mips64/header.S
+++ b/runtime/interpreter/mterp/mips64/header.S
@@ -51,16 +51,18 @@
   s3  rINST     first 16-bit code unit of current instruction
   s4  rIBASE    interpreted instruction base pointer, used for computed goto
   s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
 */
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     s0
-#define rFP     s1
-#define rSELF   s2
-#define rINST   s3
-#define rIBASE  s4
-#define rREFS   s5
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
 
 /*
  * This is a #include, not a %include, because we want the C pre-processor
@@ -80,7 +82,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -121,6 +123,17 @@
 .endm
 
 /*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
  * Fetch the next instruction from the specified offset.  Advances rPC
  * to point to the next instruction.
  *
@@ -267,7 +280,8 @@
 #define STACK_OFFSET_S3 40
 #define STACK_OFFSET_S4 48
 #define STACK_OFFSET_S5 56
-#define STACK_SIZE      64
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
 
 /* Constants for float/double_to_int/long conversions */
 #define INT_MIN             0x80000000
diff --git a/runtime/interpreter/mterp/mips64/op_goto.S b/runtime/interpreter/mterp/mips64/op_goto.S
index 7c7d0ec..68fc83d 100644
--- a/runtime/interpreter/mterp/mips64/op_goto.S
+++ b/runtime/interpreter/mterp/mips64/op_goto.S
@@ -5,21 +5,6 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    .extern MterpProfileBranch
     srl     rINST, rINST, 8
     seb     rINST, rINST                # rINST <- offset (sign-extended AA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_16.S b/runtime/interpreter/mterp/mips64/op_goto_16.S
index 566e3a7..ae56066 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_16.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_16.S
@@ -5,20 +5,5 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_goto_32.S b/runtime/interpreter/mterp/mips64/op_goto_32.S
index b260083..498b6d6 100644
--- a/runtime/interpreter/mterp/mips64/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips64/op_goto_32.S
@@ -8,22 +8,7 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
     ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/op_packed_switch.S b/runtime/interpreter/mterp/mips64/op_packed_switch.S
index 2c6eb2f..27ce580 100644
--- a/runtime/interpreter/mterp/mips64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips64/op_packed_switch.S
@@ -19,18 +19,4 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     $func                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips64/zcmp.S b/runtime/interpreter/mterp/mips64/zcmp.S
index 0e0477f..75db49e 100644
--- a/runtime/interpreter/mterp/mips64/zcmp.S
+++ b/runtime/interpreter/mterp/mips64/zcmp.S
@@ -6,25 +6,12 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    b${condition}zc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    b${condition}zc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index b134129..daa6f2a 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -58,7 +58,11 @@
    s2   rSELF     self (Thread) pointer
    s3   rIBASE    interpreted instruction base pointer, used for computed goto
    s4   rINST     first 16-bit code unit of current instruction
+   s5   rOBJ      object pointer
    s6   rREFS     base of object references in shadow frame (ideally, we'll get rid of this later).
+   s7   rTEMP     used as temp storage that can survive a function call
+   s8   rPROFILE  branch profiling countdown
+
 */
 
 /* single-purpose registers, given names for clarity */
@@ -70,6 +74,7 @@
 #define rOBJ s5
 #define rREFS s6
 #define rTEMP s7
+#define rPROFILE s8
 
 #define rARG0 a0
 #define rARG1 a1
@@ -167,7 +172,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -490,6 +495,9 @@
     STACK_LOAD(ra, 124); \
     DELETE_STACK(STACK_SIZE)
 
+#define REFRESH_IBASE() \
+    lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
 /* File: mips/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -553,6 +561,12 @@
     /* Starting ibase */
     lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+    /* Set up for backwards branches & osr profiling */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    JAL(MterpSetUpHotnessCountdown)        # (method, shadow_frame)
+    move    rPROFILE, v0                   # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST()                           # load rINST from rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -1284,37 +1298,9 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-#if MTERP_PROFILE_BRANCHES
     sll       a0, rINST, 16                #  a0 <- AAxx0000
     sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a2, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    sll       a0, rINST, 16                #  a0 <- AAxx0000
-    sra       rINST, a0, 24                #  rINST <- ssssssAA (sign-extended)
-    addu      a2, rINST, rINST             #  a2 <- byte offset
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    /* If backwards branch refresh rIBASE */
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1327,33 +1313,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH_S(rINST, 1)                      #  rINST <- ssssAAAA (sign-extended)
-    addu      a1, rINST, rINST             #  a1 <- byte offset, flags set
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1369,39 +1330,11 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
     sll       a1, a1, 16
     or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#else
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
-    FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
+    b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1417,7 +1350,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -1427,37 +1359,7 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .Lop_packed_switch_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
+    b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1474,7 +1376,6 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
-#if MTERP_PROFILE_BRANCHES
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
@@ -1484,37 +1385,7 @@
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
     move      rINST, v0
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, .Lop_sparse_switch_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-#else
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
-    GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
-    GET_VREG(a1, a3)                       #  a1 <- vAA
-    EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
-    JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
-    move      rINST, v0
-    addu      a1, rINST, rINST             #  a1 <- byte offset
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgtz      a1, 1f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-1:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-#endif
-
+    b         MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1772,9 +1643,8 @@
 /* File: mips/op_if_eq.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1782,27 +1652,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bne a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_eq_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    beq a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1811,9 +1668,8 @@
 /* File: mips/op_if_ne.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1821,27 +1677,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    beq a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_ne_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bne a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1850,9 +1693,8 @@
 /* File: mips/op_if_lt.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1860,27 +1702,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bge a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_lt_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    blt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1889,9 +1718,8 @@
 /* File: mips/op_if_ge.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1899,27 +1727,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    blt a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_ge_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bge a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1928,9 +1743,8 @@
 /* File: mips/op_if_gt.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1938,27 +1752,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    ble a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_gt_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    bgt a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -1967,9 +1768,8 @@
 /* File: mips/op_if_le.S */
 /* File: mips/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic two-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1977,27 +1777,14 @@
     GET_OPA4(a0)                           #  a0 <- A+
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a3, a1)                       #  a3 <- vB
-    GET_VREG(a2, a0)                       #  a2 <- vA
-    bgt a2, a3, 1f                  #  branch to 1 if comparison failed
+    GET_VREG(a0, a0)                       #  a0 <- vA
     FETCH_S(rINST, 1)                      #  rINST<- branch offset, in code units
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a2, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a2)              #  update rPC, load rINST
-    bgez      a2, .L_op_if_le_finish
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-
+    ble a0, a3, MterpCommonTakenBranchNoFlags  #  compare (vA, vB)
+    li        t0, JIT_CHECK_OSR
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2006,35 +1793,19 @@
 /* File: mips/op_if_eqz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bne a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    beq a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2045,35 +1816,19 @@
 /* File: mips/op_if_nez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    beq a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bne a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2084,35 +1839,19 @@
 /* File: mips/op_if_ltz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bge a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    blt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2123,35 +1862,19 @@
 /* File: mips/op_if_gez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    blt a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bge a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2162,35 +1885,19 @@
 /* File: mips/op_if_gtz.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    ble a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    bgt a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -2201,35 +1908,19 @@
 /* File: mips/op_if_lez.S */
 /* File: mips/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
-     * fragment that specifies the *reverse* comparison to perform, e.g.
-     * for "if-le" you would use "gt".
+     * Generic one-operand compare-and-branch operation.  Provide a "condition"
+     * fragment that specifies the comparison to perform.
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     GET_OPA(a0)                            #  a0 <- AA
-    GET_VREG(a2, a0)                       #  a2 <- vAA
+    GET_VREG(a0, a0)                       #  a0 <- vAA
     FETCH_S(rINST, 1)                      #  rINST <- branch offset, in code units
-    bgt a2, zero, 1f                #  branch to 1 if comparison failed
-    b 2f
-1:
-    li        rINST, 2                     #  rINST- BYTE branch dist for not-taken
-2:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC()
-    move      a0, rSELF
-    addu      a1, rFP, OFF_FP_SHADOWFRAME
-    move      a2, rINST
-    JAL(MterpProfileBranch)                #  (self, shadow_frame, offset)
-    bnez      v0, MterpOnStackReplacement  #  Note: offset must be in rINST
-#endif
-    addu      a1, rINST, rINST             #  convert to bytes
-    FETCH_ADVANCE_INST_RB(a1)              #  update rPC, load rINST
-    bgez      a1, 3f
-    lw        ra, THREAD_FLAGS_OFFSET(rSELF)
-    b         MterpCheckSuspendAndContinue
-3:
+    ble a0, zero, MterpCommonTakenBranchNoFlags
+    li        t0, JIT_CHECK_OSR            # possible OSR re-entry?
+    beq       rPROFILE, t0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     GOTO_OPCODE(t0)                        #  jump to next instruction
 
@@ -7983,18 +7674,6 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_packed_switch */
-
-.Lop_packed_switch_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_sparse_switch */
-
-.Lop_sparse_switch_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_cmpl_float */
 
 .Lop_cmpl_float_nan:
@@ -8039,42 +7718,6 @@
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
-/* continuation for op_if_eq */
-
-.L_op_if_eq_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_ne */
-
-.L_op_if_ne_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_lt */
-
-.L_op_if_lt_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_ge */
-
-.L_op_if_ge_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_gt */
-
-.L_op_if_gt_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_if_le */
-
-.L_op_if_le_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_float_to_int */
 
 /*
@@ -13089,20 +12732,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in lr.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
  */
-MterpCheckSuspendAndContinue:
-    lw      rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)  # refresh rIBASE
+MterpCommonTakenBranchNoFlags:
+    bgtz    rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+#if JIT_CHECK_OSR != -1
+#  error "JIT_CHECK_OSR must be -1."
+#endif
+    li      t0, JIT_CHECK_OSR
+    beq     rPROFILE, t0, .L_osr_check
+    blt     rPROFILE, t0, .L_resume_backward_branch
+    subu    rPROFILE, 1
+    beqz    rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
+    REFRESH_IBASE()
+    addu    a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
     and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, 1f
+    bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
-1:
+
+.L_suspend_request_pending:
     EXPORT_PC()
     move    a0, rSELF
     JAL(MterpSuspendCheck)              # (self)
     bnez    v0, MterpFallback
+    REFRESH_IBASE()                     # might have changed during suspend
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_no_count_backwards:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bne     rPROFILE, t0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      t0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beq     rPROFILE, t0, .L_check_osr_forward
+.L_resume_forward_branch:
+    add     a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
+    GET_INST_OPCODE(t0)                 # extract opcode from rINST
+    GOTO_OPCODE(t0)                     # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    lw      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC()
+    JAL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
+    bnez    v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST(2)
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
 
@@ -13149,6 +12882,26 @@
     sw      v1, 4(a2)
     li      v0, 1                       # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    lw      a0, OFF_FP_METHOD(rFP)
+    addu    a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    JAL(MterpAddHotnessBatch)           # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
 /* Restore from the stack and return. Frame size = STACK_SIZE */
     STACK_LOAD_FULL()
     jalr    zero, ra
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index a17252b..29a12bf 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -58,16 +58,18 @@
   s3  rINST     first 16-bit code unit of current instruction
   s4  rIBASE    interpreted instruction base pointer, used for computed goto
   s5  rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  s6  rPROFILE  jit profile hotness countdown
 */
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC     s0
-#define rFP     s1
-#define rSELF   s2
-#define rINST   s3
-#define rIBASE  s4
-#define rREFS   s5
+#define rPC      s0
+#define rFP      s1
+#define rSELF    s2
+#define rINST    s3
+#define rIBASE   s4
+#define rREFS    s5
+#define rPROFILE s6
 
 /*
  * This is a #include, not a %include, because we want the C pre-processor
@@ -87,7 +89,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_SHADOWFRAME OFF_FP(0)
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -128,6 +130,17 @@
 .endm
 
 /*
+ * Fetch the next instruction from an offset specified by _reg and advance xPC.
+ * xPC to point to the next instruction.  "_reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.  Must not set flags.
+ *
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+    daddu   rPC, rPC, \reg
+    FETCH_INST
+.endm
+
+/*
  * Fetch the next instruction from the specified offset.  Advances rPC
  * to point to the next instruction.
  *
@@ -274,7 +287,8 @@
 #define STACK_OFFSET_S3 40
 #define STACK_OFFSET_S4 48
 #define STACK_OFFSET_S5 56
-#define STACK_SIZE      64
+#define STACK_OFFSET_S6 64
+#define STACK_SIZE      80    /* needs 16 byte alignment */
 
 /* Constants for float/double_to_int/long conversions */
 #define INT_MIN             0x80000000
@@ -344,6 +358,8 @@
     .cfi_rel_offset 20, STACK_OFFSET_S4
     sd      s5, STACK_OFFSET_S5(sp)
     .cfi_rel_offset 21, STACK_OFFSET_S5
+    sd      s6, STACK_OFFSET_S6(sp)
+    .cfi_rel_offset 22, STACK_OFFSET_S6
 
     /* Remember the return register */
     sd      a3, SHADOWFRAME_RESULT_REGISTER_OFFSET(a2)
@@ -364,6 +380,12 @@
     /* Starting ibase */
     REFRESH_IBASE
 
+    /* Set up for backwards branches & osr profiling */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    jal     MterpSetUpHotnessCountdown
+    move    rPROFILE, v0                # Starting hotness countdown to rPROFILE
+
     /* start executing the instruction at rPC */
     FETCH_INST
     GET_INST_OPCODE v0
@@ -1100,24 +1122,9 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    .extern MterpProfileBranch
     srl     rINST, rINST, 8
     seb     rINST, rINST                # rINST <- offset (sign-extended AA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1130,23 +1137,8 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended AAAA)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1162,25 +1154,10 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    .extern MterpProfileBranch
     lh      rINST, 2(rPC)               # rINST <- aaaa (low)
     lh      a1, 4(rPC)                  # a1 <- AAAA (high)
     ins     rINST, a1, 16, 16           # rINST <- offset (sign-extended AAAAaaaa)
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1206,21 +1183,7 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoPackedSwitch                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
     .balign 128
@@ -1247,21 +1210,7 @@
     dlsa    a0, a0, rPC, 1              # a0 <- PC + BBBBbbbb*2
     jal     MterpDoSparseSwitch                       # v0 <- code-unit branch offset
     move    rINST, v0
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    blez    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    GOTO_OPCODE v0                      # jump to next instruction
+    b       MterpCommonTakenBranchNoFlags
 
 
 /* ------------------------------ */
@@ -1453,22 +1402,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    beqc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    beqc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1492,22 +1429,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bnec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bnec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1531,22 +1456,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bltc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bltc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1570,22 +1483,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bgec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1609,22 +1510,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    bgtc a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgtc a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1648,22 +1537,10 @@
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended CCCC)
     GET_VREG a0, a2                     # a0 <- vA
     GET_VREG a1, a3                     # a1 <- vB
-    blec a0, a1, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    blec a0, a1, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1681,26 +1558,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    beqzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    beqzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1718,26 +1582,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bnezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bnezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1755,26 +1606,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bltzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bltzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1792,26 +1630,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bgezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1829,26 +1654,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    bgtzc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    bgtzc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -1866,26 +1678,13 @@
      * For: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
-    .extern MterpProfileBranch
     srl     a2, rINST, 8                # a2 <- AA
     lh      rINST, 2(rPC)               # rINST <- offset (sign-extended BBBB)
     GET_VREG a0, a2                     # a0 <- vAA
-    blezc a0, 1f
-    li      rINST, 2                    # offset if branch not taken
-1:
-#if MTERP_PROFILE_BRANCHES
-    EXPORT_PC
-    move    a0, rSELF
-    daddu   a1, rFP, OFF_FP_SHADOWFRAME
-    move    a2, rINST
-    jal     MterpProfileBranch          # (self, shadow_frame, offset)
-    bnezc   v0, MterpOnStackReplacement # Note: offset must be in rINST
-#endif
-    dlsa    rPC, rINST, rPC, 1          # rPC <- rPC + offset * 2
-    lw      ra, THREAD_FLAGS_OFFSET(rSELF)  # Preload flags for MterpCheckSuspendAndContinue
-    move    a0, rINST                   # a0 <- offset
-    FETCH_INST                          # load rINST
-    bltz    a0, MterpCheckSuspendAndContinue  # suspend check if backwards branch
+    blezc a0, MterpCommonTakenBranchNoFlags
+    li      v0, JIT_CHECK_OSR           # possible OSR re-entry?
+    beqc    rPROFILE, v0, .L_check_not_taken_osr
+    FETCH_ADVANCE_INST 2                # advance rPC, load rINST
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
 
@@ -12323,23 +12122,110 @@
     /* NOTE: no fallthrough */
 
 /*
- * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
- * still needs to get the opcode and branch to it, and flags are in ra.
+ * Common handling for branches with support for Jit profiling.
+ * On entry:
+ *    rINST          <= signed offset
+ *    rPROFILE       <= signed hotness countdown (expanded to 64 bits)
+ *
+ * We have quite a few different cases for branch profiling, OSR detection and
+ * suspend check support here.
+ *
+ * Taken backward branches:
+ *    If profiling active, do hotness countdown and report if we hit zero.
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *    Is there a pending suspend request?  If so, suspend.
+ *
+ * Taken forward branches and not-taken backward branches:
+ *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
+ *
+ * Our most common case is expected to be a taken backward branch with active jit profiling,
+ * but no full OSR check and no pending suspend request.
+ * Next most common case is not-taken branch with no full OSR check.
+ *
  */
-    .extern MterpSuspendCheck
-MterpCheckSuspendAndContinue:
+MterpCommonTakenBranchNoFlags:
+    bgtzc   rINST, .L_forward_branch    # don't add forward branches to hotness
+/*
+ * We need to subtract 1 from positive values and we should not see 0 here,
+ * so we may use the result of the comparison with -1.
+ */
+    li      v0, JIT_CHECK_OSR
+    beqc    rPROFILE, v0, .L_osr_check
+    bltc    rPROFILE, v0, .L_resume_backward_branch
+    dsubu   rPROFILE, 1
+    beqzc   rPROFILE, .L_add_batch      # counted down to zero - report
+.L_resume_backward_branch:
+    lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     REFRESH_IBASE
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bnez    ra, check1
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
-check1:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    bnezc   ra, .L_suspend_request_pending
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_suspend_request_pending:
     EXPORT_PC
     move    a0, rSELF
-    jal     MterpSuspendCheck                       # (self)
-    bnezc   v0, MterpFallback                       # Something in the environment changed, switch interpreters
-    GET_INST_OPCODE v0                              # extract opcode from rINST
-    GOTO_OPCODE v0                                  # jump to next instruction
+    jal     MterpSuspendCheck           # (self)
+    bnezc   v0, MterpFallback
+    REFRESH_IBASE                       # might have changed during suspend
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_no_count_backwards:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    bnec    rPROFILE, v0, .L_resume_backward_branch
+.L_osr_check:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal MterpMaybeDoOnStackReplacement  # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_backward_branch
+
+.L_forward_branch:
+    li      v0, JIT_CHECK_OSR           # check for possible OSR re-entry
+    beqc    rPROFILE, v0, .L_check_osr_forward
+.L_resume_forward_branch:
+    daddu   a2, rINST, rINST            # a2<- byte offset
+    FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
+
+.L_check_osr_forward:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rINST
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    b       .L_resume_forward_branch
+
+.L_add_batch:
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    ld      a0, OFF_FP_METHOD(rFP)
+    move    a2, rSELF
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    rPROFILE, v0                # restore new hotness countdown to rPROFILE
+    b       .L_no_count_backwards
+
+/*
+ * Entered from the conditional branch handlers when OSR check request active on
+ * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
+ */
+.L_check_not_taken_osr:
+    move    a0, rSELF
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    li      a2, 2
+    EXPORT_PC
+    jal     MterpMaybeDoOnStackReplacement # (self, shadow_frame, offset)
+    bnezc   v0, MterpOnStackReplacement
+    FETCH_ADVANCE_INST 2 
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    GOTO_OPCODE v0                      # jump to next instruction
 
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
@@ -12395,6 +12281,28 @@
 check2:
     li      v0, 1                                   # signal return to caller.
 MterpDone:
+/*
+ * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
+ * checking for OSR.  If greater than zero, we might have unreported hotness to register
+ * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
+ * should only reach zero immediately after a hotness decrement, and is then reset to either
+ * a negative special state or the new non-zero countdown value.
+ */
+    blez    rPROFILE, .L_pop_and_return # if > 0, we may have some counts to report.
+
+MterpProfileActive:
+    move    rINST, v0                   # stash return value
+    /* Report cached hotness counts */
+    ld      a0, OFF_FP_METHOD(rFP)
+    daddu   a1, rFP, OFF_FP_SHADOWFRAME
+    move    a2, rSELF
+    sh      rPROFILE, SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET(a1)
+    jal     MterpAddHotnessBatch        # (method, shadow_frame, self)
+    move    v0, rINST                   # restore return value
+
+.L_pop_and_return:
+    ld      s6, STACK_OFFSET_S6(sp)
+    .cfi_restore 22
     ld      s5, STACK_OFFSET_S5(sp)
     .cfi_restore 21
     ld      s4, STACK_OFFSET_S4(sp)
@@ -12421,5 +12329,6 @@
     .cfi_adjust_cfa_offset -STACK_SIZE
 
     .cfi_endproc
+    .set    reorder
     .size ExecuteMterpImpl, .-ExecuteMterpImpl
 
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index b0a786e..9822f6e 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -35,6 +35,8 @@
 // with all profile savers running at the same time.
 static constexpr const uint64_t kMinSavePeriodNs = MsToNs(20 * 1000);  // 20 seconds
 static constexpr const uint64_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+// Minimum number of JIT samples during launch to include a method into the profile.
+static constexpr const size_t kStartupMethodSamples = 1;
 
 static constexpr const uint32_t kMinimumNumberOfMethodsToSave = 10;
 static constexpr const uint32_t kMinimumNumberOfClassesToSave = 10;
@@ -108,7 +110,7 @@
     }
     total_ms_of_sleep_ += kSaveResolvedClassesDelayMs;
   }
-  FetchAndCacheResolvedClasses();
+  FetchAndCacheResolvedClassesAndMethods();
 
   // Loop for the profiled methods.
   while (!ShuttingDown(self)) {
@@ -118,7 +120,7 @@
       {
         MutexLock mu(self, wait_lock_);
         period_condition_.Wait(self);
-        sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
+        sleep_time = NanoTime() - sleep_start;
       }
       // Check if the thread was woken up for shutdown.
       if (ShuttingDown(self)) {
@@ -128,11 +130,11 @@
       // We might have been woken up by a huge number of notifications to guarantee saving.
       // If we didn't meet the minimum saving period go back to sleep (only if missed by
       // a reasonable margin).
-      while (kMinSavePeriodNs - sleep_time > (kMinSavePeriodNs / 10)) {
+      while (kMinSavePeriodNs * 0.9 > sleep_time) {
         {
           MutexLock mu(self, wait_lock_);
           period_condition_.TimedWait(self, NsToMs(kMinSavePeriodNs - sleep_time), 0);
-          sleep_time = NanoTime() - last_time_ns_saver_woke_up_;
+          sleep_time = NanoTime() - sleep_start;
         }
         // Check if the thread was woken up for shutdown.
         if (ShuttingDown(self)) {
@@ -204,11 +206,48 @@
   return &info_it->second;
 }
 
-void ProfileSaver::FetchAndCacheResolvedClasses() {
+// Get resolved methods that have a profile info or more than kStartupMethodSamples samples.
+// Excludes native methods and classes in the boot image.
+class GetMethodsVisitor : public ClassVisitor {
+ public:
+  explicit GetMethodsVisitor(std::vector<MethodReference>* methods) : methods_(methods) {}
+
+  virtual bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+      return true;
+    }
+    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+      if (!method.IsNative()) {
+        if (method.GetCounter() >= kStartupMethodSamples ||
+            method.GetProfilingInfo(sizeof(void*)) != nullptr) {
+          // Have samples, add to profile.
+          const DexFile* dex_file = method.GetInterfaceMethodIfProxy(sizeof(void*))->GetDexFile();
+          methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
+        }
+      }
+    }
+    return true;
+  }
+
+ private:
+  std::vector<MethodReference>* const methods_;
+};
+
+void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   std::set<DexCacheResolvedClasses> resolved_classes =
       class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
+
+  std::vector<MethodReference> methods;
+  {
+    ScopedTrace trace2("Get hot methods");
+    GetMethodsVisitor visitor(&methods);
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->VisitClasses(&visitor);
+    VLOG(profiler) << "Methods with samples greater than "
+                   << kStartupMethodSamples << " = " << methods.size();
+  }
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
 
@@ -216,11 +255,16 @@
     std::set<DexCacheResolvedClasses> resolved_classes_for_location;
     const std::string& filename = it.first;
     const std::set<std::string>& locations = it.second;
-
+    std::vector<MethodReference> methods_for_location;
+    for (const MethodReference& ref : methods) {
+      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
+        methods_for_location.push_back(ref);
+      }
+    }
     for (const DexCacheResolvedClasses& classes : resolved_classes) {
       if (locations.find(classes.GetBaseLocation()) != locations.end()) {
-        VLOG(profiler) << "Added classes for location " << classes.GetBaseLocation()
-                       << " (" << classes.GetDexLocation() << ")";
+        VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
+                       << classes.GetBaseLocation() << " (" << classes.GetDexLocation() << ")";
         resolved_classes_for_location.insert(classes);
       } else {
         VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
@@ -228,7 +272,7 @@
       }
     }
     ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
-    info->AddMethodsAndClasses(std::vector<MethodReference>(), resolved_classes_for_location);
+    info->AddMethodsAndClasses(methods_for_location, resolved_classes_for_location);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index c6da959..9c6d0fa 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -95,9 +95,9 @@
   // If no entry exists, a new empty one will be created, added to the cache and
   // then returned.
   ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
-  // Fetches the current resolved classes from the ClassLinker and stores them
-  // in the profile_cache_ for later save.
-  void FetchAndCacheResolvedClasses();
+  // Fetches the current resolved classes and methods from the ClassLinker and stores them in the
+  // profile_cache_ for later save.
+  void FetchAndCacheResolvedClassesAndMethods();
 
   static bool MaybeRecordDexUseInternal(
       const std::string& dex_location,
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 7bd85ec..8cdf96d 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -301,13 +301,13 @@
     CHECK_NON_NULL_ARGUMENT_FN_NAME(__FUNCTION__, value, return_val)
 
 #define CHECK_NON_NULL_ARGUMENT_FN_NAME(name, value, return_val) \
-  if (UNLIKELY(value == nullptr)) { \
+  if (UNLIKELY((value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(name, #value " == null"); \
     return return_val; \
   }
 
 #define CHECK_NON_NULL_MEMCPY_ARGUMENT(length, value) \
-  if (UNLIKELY(length != 0 && value == nullptr)) { \
+  if (UNLIKELY((length) != 0 && (value) == nullptr)) { \
     JavaVmExtFromEnv(env)->JniAbortF(__FUNCTION__, #value " == null"); \
     return; \
   }
diff --git a/runtime/lambda/shorty_field_type.h b/runtime/lambda/shorty_field_type.h
index 46ddaa9..c314fd2 100644
--- a/runtime/lambda/shorty_field_type.h
+++ b/runtime/lambda/shorty_field_type.h
@@ -391,7 +391,7 @@
 
  private:
 #define IS_VALID_TYPE_SPECIALIZATION(type, name) \
-  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { \
+  static inline constexpr bool Is ## name ## TypeImpl(type* const  = 0) { /*NOLINT*/ \
     return true; \
   } \
   \
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index dfb728f..fcdfc88 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -419,8 +419,6 @@
       }
       return false;
     }
-    DCHECK_EQ(this->CanAccessMember(access_to, method->GetAccessFlags()),
-              this->CanAccessMember(dex_access_to, method->GetAccessFlags()));
   }
   if (LIKELY(this->CanAccessMember(access_to, method->GetAccessFlags()))) {
     return true;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 2da3d84..2894b68 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -148,9 +148,7 @@
   for (size_t i = 0, count = NumStrings(); i < count; ++i) {
     mirror::String* source = src[i].Read<kReadBarrierOption>();
     mirror::String* new_source = visitor(source);
-    if (source != new_source) {
-      dest[i] = GcRoot<mirror::String>(new_source);
-    }
+    dest[i] = GcRoot<mirror::String>(new_source);
   }
 }
 
@@ -160,9 +158,7 @@
   for (size_t i = 0, count = NumResolvedTypes(); i < count; ++i) {
     mirror::Class* source = src[i].Read<kReadBarrierOption>();
     mirror::Class* new_source = visitor(source);
-    if (source != new_source) {
-      dest[i] = GcRoot<mirror::Class>(new_source);
-    }
+    dest[i] = GcRoot<mirror::Class>(new_source);
   }
 }
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 0126b4d..8c7c966 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -278,9 +278,7 @@
       StackHandleScope<1> hs(soa.Self());
       Handle<mirror::ClassLoader> class_loader(
           hs.NewHandle(soa.Decode<mirror::ClassLoader*>(javaLoader)));
-      class_linker->RegisterDexFile(
-          *dex_file,
-          class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()));
+      class_linker->RegisterDexFile(*dex_file, class_loader.Get());
       mirror::Class* result = class_linker->DefineClass(soa.Self(),
                                                         descriptor.c_str(),
                                                         hash,
@@ -475,15 +473,22 @@
 
 // public API
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
-  const char* instruction_set = GetInstructionSetString(kRuntimeISA);
-  ScopedUtfChars filename(env, javaFilename);
-  jint status = GetDexOptNeeded(
-      env,
-      filename.c_str(),
-      instruction_set,
-      "speed-profile",
-      /*profile_changed*/false);
-  return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
+  ScopedUtfChars filename_utf(env, javaFilename);
+  if (env->ExceptionCheck()) {
+    return JNI_FALSE;
+  }
+
+  const char* filename = filename_utf.c_str();
+  if ((filename == nullptr) || !OS::FileExists(filename)) {
+    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+    ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
+    const char* message = (filename == nullptr) ? "<empty file name>" : filename;
+    env->ThrowNew(fnfe.get(), message);
+    return JNI_FALSE;
+  }
+
+  OatFileAssistant oat_file_assistant(filename, kRuntimeISA, false, false);
+  return oat_file_assistant.IsUpToDate() ? JNI_FALSE : JNI_TRUE;
 }
 
 static jboolean DexFile_isValidCompilerFilter(JNIEnv* env,
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 6c943dc..79b18aa 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -504,8 +504,7 @@
     const DexFile* dex_file = boot_class_path[i];
     CHECK(dex_file != nullptr);
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(
-        hs.NewHandle(linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc())));
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->RegisterDexFile(*dex_file, nullptr)));
 
     if (kPreloadDexCachesStrings) {
       for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 1515630..6f735aa 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -41,6 +41,23 @@
   if (c != nullptr && c->IsResolved()) {
     return soa.AddLocalReference<jclass>(c);
   }
+  // If class is erroneous, throw the earlier failure, wrapped in certain cases. See b/28787733.
+  if (c != nullptr && c->IsErroneous()) {
+    cl->ThrowEarlierClassFailure(c);
+    Thread* self = soa.Self();
+    mirror::Class* eiie_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_ExceptionInInitializerError)->AsClass();
+    mirror::Class* iae_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_IllegalAccessError)->AsClass();
+    mirror::Class* ncdfe_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_NoClassDefFoundError)->AsClass();
+    mirror::Class* exception = self->GetException()->GetClass();
+    if (exception == eiie_class || exception == iae_class || exception == ncdfe_class) {
+      self->ThrowNewWrappedException("Ljava/lang/ClassNotFoundException;",
+                                     PrettyDescriptor(c).c_str());
+    }
+    return nullptr;
+  }
   if (loader != nullptr) {
     // Try the common case.
     StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index 2295cb4..e9b0d3c 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -46,6 +46,7 @@
       DCHECK(caller == nullptr);
       if (count == n) {
         caller = m;
+        caller_pc = GetCurrentQuickFramePc();
         return false;
       }
       count++;
@@ -57,6 +58,7 @@
   const bool include_runtime_and_upcalls_;
   size_t count;
   ArtMethod* caller;
+  uintptr_t caller_pc;
 };
 
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 80231f3..aab0e81 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -182,8 +182,12 @@
 
 void OatHeader::UpdateChecksum(const void* data, size_t length) {
   DCHECK(IsValid());
-  const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
-  adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  if (data != nullptr) {
+    const uint8_t* bytes = reinterpret_cast<const uint8_t*>(data);
+    adler32_checksum_ = adler32(adler32_checksum_, bytes, length);
+  } else {
+    DCHECK_EQ(0U, length);
+  }
 }
 
 InstructionSet OatHeader::GetInstructionSet() const {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index ec28685..62c723e 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -490,40 +490,24 @@
 // OatFile via dlopen //
 ////////////////////////
 
-static bool RegisterOatFileLocation(const std::string& location) {
-  if (!kIsTargetBuild) {
-    Runtime* const runtime = Runtime::Current();
-    if (runtime != nullptr && !runtime->IsAotCompiler()) {
-      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
-    }
-    return false;
-  }
-  return true;
-}
-
-static void UnregisterOatFileLocation(const std::string& location) {
-  if (!kIsTargetBuild) {
-    Runtime* const runtime = Runtime::Current();
-    if (runtime != nullptr && !runtime->IsAotCompiler()) {
-      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
-    }
-  }
-}
-
 class DlOpenOatFile FINAL : public OatFileBase {
  public:
   DlOpenOatFile(const std::string& filename, bool executable)
       : OatFileBase(filename, executable),
         dlopen_handle_(nullptr),
-        shared_objects_before_(0),
-        first_oat_(RegisterOatFileLocation(filename)) {
+        shared_objects_before_(0) {
   }
 
   ~DlOpenOatFile() {
     if (dlopen_handle_ != nullptr) {
-      dlclose(dlopen_handle_);
+      if (!kIsTargetBuild) {
+        MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+        host_dlopen_handles_.erase(dlopen_handle_);
+        dlclose(dlopen_handle_);
+      } else {
+        dlclose(dlopen_handle_);
+      }
     }
-    UnregisterOatFileLocation(GetLocation());
   }
 
  protected:
@@ -554,6 +538,17 @@
               uint8_t* oat_file_begin,
               std::string* error_msg);
 
+  // On the host, if the same library is loaded again with dlopen the same
+  // file handle is returned. This differs from the behavior of dlopen on the
+  // target, where dlopen reloads the library at a different address every
+  // time you load it. The runtime relies on the target behavior to ensure
+  // each instance of the loaded library has a unique dex cache. To avoid
+  // problems, we fall back to our own linker in the case when the same
+  // library is opened multiple times on host. dlopen_handles_ is used to
+  // detect that case.
+  // Guarded by host_dlopen_handles_lock_;
+  static std::unordered_set<void*> host_dlopen_handles_;
+
   // dlopen handle during runtime.
   void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
 
@@ -564,12 +559,11 @@
   // (optimistically) optimize the PreSetup stage (see comment there).
   size_t shared_objects_before_;
 
-  // Track the registration status (= was this the first oat file) for the location.
-  const bool first_oat_;
-
   DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
 };
 
+std::unordered_set<void*> DlOpenOatFile::host_dlopen_handles_;
+
 void DlOpenOatFile::PreLoad() {
 #ifdef __APPLE__
   UNUSED(shared_objects_before_);
@@ -628,12 +622,6 @@
       *error_msg = "DlOpen disabled for host.";
       return false;
     }
-    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
-    // stored in the first_oat_ flag.
-    if (!first_oat_) {
-      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
-      return false;
-    }
   }
 
   bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
@@ -671,8 +659,18 @@
     }                                                           //   (pic boot image).
     dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
 #else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
     UNUSED(oat_file_begin);
+    static_assert(!kIsTargetBuild, "host_dlopen_handles_ will leak handles");
+    MutexLock mu(Thread::Current(), *Locks::host_dlopen_handles_lock_);
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    if (dlopen_handle_ != nullptr) {
+      if (!host_dlopen_handles_.insert(dlopen_handle_).second) {
+        dlclose(dlopen_handle_);
+        dlopen_handle_ = nullptr;
+        *error_msg = StringPrintf("host dlopen re-opened '%s'", elf_filename.c_str());
+        return false;
+      }
+    }
 #endif  // ART_TARGET_ANDROID
   }
   if (dlopen_handle_ == nullptr) {
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index fba10ca..218c490 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -153,7 +153,7 @@
 }
 
 OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded(CompilerFilter::Filter target) {
-  bool compilation_desired = CompilerFilter::IsCompilationEnabled(target);
+  bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target);
 
   // See if the oat file is in good shape as is.
   bool oat_okay = OatFileCompilerFilterIsOkay(target);
@@ -220,6 +220,10 @@
   return true;
 }
 
+bool OatFileAssistant::IsUpToDate() {
+  return OatFileIsUpToDate() || OdexFileIsUpToDate();
+}
+
 OatFileAssistant::ResultOfAttemptToUpdate
 OatFileAssistant::MakeUpToDate(std::string* error_msg) {
   CompilerFilter::Filter target;
@@ -600,7 +604,7 @@
 
   CompilerFilter::Filter current_compiler_filter = file.GetCompilerFilter();
 
-  if (CompilerFilter::IsCompilationEnabled(current_compiler_filter)) {
+  if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) {
     if (!file.IsPic()) {
       const ImageInfo* image_info = GetImageInfo();
       if (image_info == nullptr) {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f48cdf3..bb7b408 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -149,6 +149,10 @@
   // given compiler filter.
   DexOptNeeded GetDexOptNeeded(CompilerFilter::Filter target_compiler_filter);
 
+  // Returns true if there is up-to-date code for this dex location,
+  // irrespective of the compiler filter of the up-to-date code.
+  bool IsUpToDate();
+
   // Return code used when attempting to generate updated code.
   enum ResultOfAttemptToUpdate {
     kUpdateFailed,        // We tried making the code up to date, but
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 15a1aa4..c79a9a6 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -233,7 +233,7 @@
     EXPECT_TRUE(odex_file->HasPatchInfo());
     EXPECT_EQ(filter, odex_file->GetCompilerFilter());
 
-    if (CompilerFilter::IsCompilationEnabled(filter)) {
+    if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) {
       const std::vector<gc::space::ImageSpace*> image_spaces =
         runtime->GetHeap()->GetBootImageSpaces();
       ASSERT_TRUE(!image_spaces.empty() && image_spaces[0] != nullptr);
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 0af6716..fbae1da 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -183,11 +183,6 @@
     return dex_file_;
   }
 
-  void DeleteDexFile() {
-    delete dex_file_;
-    dex_file_ = nullptr;
-  }
-
  private:
   static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
     DCHECK(IsUint<16>(index));
@@ -206,36 +201,25 @@
 
 static void AddDexFilesFromOat(const OatFile* oat_file,
                                bool already_loaded,
-                               /*out*/std::priority_queue<DexFileAndClassPair>* heap) {
+                               /*out*/std::priority_queue<DexFileAndClassPair>* heap,
+                               std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
   for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
     std::string error;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
     if (dex_file == nullptr) {
       LOG(WARNING) << "Could not create dex file from oat file: " << error;
     } else if (dex_file->NumClassDefs() > 0U) {
-      heap->emplace(dex_file.release(), /*current_class_index*/0U, already_loaded);
+      heap->emplace(dex_file.get(), /*current_class_index*/0U, already_loaded);
+      opened_dex_files->push_back(std::move(dex_file));
     }
   }
 }
 
 static void AddNext(/*inout*/DexFileAndClassPair* original,
-                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap,
-                    bool owning_dex_files) {
+                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
   if (original->DexFileHasMoreClasses()) {
     original->Next();
     heap->push(std::move(*original));
-  } else if (owning_dex_files) {
-    original->DeleteDexFile();
-  }
-}
-
-static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap,
-                               bool owning_dex_files) {
-  if (owning_dex_files) {
-    while (!heap->empty()) {
-      delete heap->top().GetDexFile();
-      heap->pop();
-    }
   }
 }
 
@@ -449,7 +433,6 @@
   DCHECK(error_msg != nullptr);
 
   std::priority_queue<DexFileAndClassPair> queue;
-  bool owning_dex_files = false;
 
   // Try to get dex files from the given class loader. If the class loader is null, or we do
   // not support one of the class loaders in the chain, conservatively compare against all
@@ -479,6 +462,9 @@
   // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
 
+  // Vector that holds the newly opened dex files live, this is done to prevent leaks.
+  std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+
   if (!class_loader_ok) {
     // Add dex files from already loaded oat files, but skip boot.
 
@@ -487,9 +473,6 @@
       queue.pop();
     }
 
-    // Anything we load now is something we own and must be released later.
-    owning_dex_files = true;
-
     std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
     // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
     // need to check both against each other since they would have resolved the same way at compile
@@ -502,7 +485,10 @@
           boot_oat_files.end() && location != oat_file->GetLocation() &&
           unique_locations.find(location) == unique_locations.end()) {
         unique_locations.insert(location);
-        AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+        AddDexFilesFromOat(loaded_oat_file.get(),
+                           /*already_loaded*/true,
+                           &queue,
+                           /*out*/&opened_dex_files);
       }
     }
   }
@@ -511,12 +497,13 @@
   const std::string
       shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
   if (AreSharedLibrariesOk(shared_libraries, queue)) {
-    FreeDexFilesInHeap(&queue, owning_dex_files);
     return false;
   }
 
+  ScopedTrace st("Collision check");
+
   // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue);
+  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue, &opened_dex_files);
 
   // Now drain the queue.
   while (!queue.empty()) {
@@ -536,17 +523,16 @@
                            compare_pop.GetCachedDescriptor(),
                            compare_pop.GetDexFile()->GetLocation().c_str(),
                            top.GetDexFile()->GetLocation().c_str());
-          FreeDexFilesInHeap(&queue, owning_dex_files);
           return true;
         }
         queue.pop();
-        AddNext(&top, &queue, owning_dex_files);
+        AddNext(&top, &queue);
       } else {
         // Something else. Done here.
         break;
       }
     }
-    AddNext(&compare_pop, &queue, owning_dex_files);
+    AddNext(&compare_pop, &queue);
   }
 
   return false;
@@ -586,23 +572,25 @@
 
   const OatFile* source_oat_file = nullptr;
 
-  // Update the oat file on disk if we can, based on the --compiler-filter
-  // option derived from the current runtime options.
-  // This may fail, but that's okay. Best effort is all that matters here.
-  switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
-    case OatFileAssistant::kUpdateFailed:
-      LOG(WARNING) << error_msg;
-      break;
+  if (!oat_file_assistant.IsUpToDate()) {
+    // Update the oat file on disk if we can, based on the --compiler-filter
+    // option derived from the current runtime options.
+    // This may fail, but that's okay. Best effort is all that matters here.
+    switch (oat_file_assistant.MakeUpToDate(/*out*/ &error_msg)) {
+      case OatFileAssistant::kUpdateFailed:
+        LOG(WARNING) << error_msg;
+        break;
 
-    case OatFileAssistant::kUpdateNotAttempted:
-      // Avoid spamming the logs if we decided not to attempt making the oat
-      // file up to date.
-      VLOG(oat) << error_msg;
-      break;
+      case OatFileAssistant::kUpdateNotAttempted:
+        // Avoid spamming the logs if we decided not to attempt making the oat
+        // file up to date.
+        VLOG(oat) << error_msg;
+        break;
 
-    case OatFileAssistant::kUpdateSucceeded:
-      // Nothing to do.
-      break;
+      case OatFileAssistant::kUpdateSucceeded:
+        // Nothing to do.
+        break;
+    }
   }
 
   // Get the oat file on disk.
@@ -728,28 +716,6 @@
   return dex_files;
 }
 
-bool OatFileManager::RegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    ++it->second;
-    return false;
-  }
-  oat_file_count_.insert(std::pair<std::string, size_t>(oat_location, 1u));
-  return true;
-}
-
-void OatFileManager::UnRegisterOatFileLocation(const std::string& oat_location) {
-  WriterMutexLock mu(Thread::Current(), *Locks::oat_file_count_lock_);
-  auto it = oat_file_count_.find(oat_location);
-  if (it != oat_file_count_.end()) {
-    --it->second;
-    if (it->second == 0) {
-      oat_file_count_.erase(it);
-    }
-  }
-}
-
 void OatFileManager::DumpForSigQuit(std::ostream& os) {
   ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
   std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
index a1d1275..45ac4b7 100644
--- a/runtime/oat_file_manager.h
+++ b/runtime/oat_file_manager.h
@@ -64,16 +64,6 @@
   const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_base_location) const
       REQUIRES(!Locks::oat_file_manager_lock_);
 
-  // Attempt to reserve a location, returns false if it is already reserved or already in used by
-  // an oat file.
-  bool RegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
-
-  // Unreserve oat file location, should only be used for error cases since RegisterOatFile will
-  // remove the reserved location.
-  void UnRegisterOatFileLocation(const std::string& oat_location)
-      REQUIRES(!Locks::oat_file_count_lock_);
-
   // Returns true if we have a non pic oat file.
   bool HaveNonPicOatFile() const {
     return have_non_pic_oat_file_;
@@ -132,7 +122,6 @@
       REQUIRES(Locks::oat_file_manager_lock_);
 
   std::set<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
-  std::unordered_map<std::string, size_t> oat_file_count_ GUARDED_BY(Locks::oat_file_count_lock_);
   bool have_non_pic_oat_file_;
 
   DISALLOW_COPY_AND_ASSIGN(OatFileManager);
diff --git a/runtime/openjdkjvm/OpenjdkJvm.cc b/runtime/openjdkjvm/OpenjdkJvm.cc
index ca5efe5..54ec5d3 100644
--- a/runtime/openjdkjvm/OpenjdkJvm.cc
+++ b/runtime/openjdkjvm/OpenjdkJvm.cc
@@ -58,11 +58,6 @@
 #include <sys/socket.h>
 #include <sys/ioctl.h>
 
-#ifdef ART_TARGET_ANDROID
-// This function is provided by android linker.
-extern "C" void android_update_LD_LIBRARY_PATH(const char* ld_library_path);
-#endif  // ART_TARGET_ANDROID
-
 #undef LOG_TAG
 #define LOG_TAG "artopenjdk"
 
@@ -74,11 +69,15 @@
 /* posix open() with extensions; used by e.g. ZipFile */
 JNIEXPORT jint JVM_Open(const char* fname, jint flags, jint mode) {
     /*
-     * The call is expected to handle JVM_O_DELETE, which causes the file
-     * to be removed after it is opened.  Also, some code seems to
-     * want the special return value JVM_EEXIST if the file open fails
-     * due to O_EXCL.
+     * Some code seems to want the special return value JVM_EEXIST if the
+     * file open fails due to O_EXCL.
      */
+    // Don't use JVM_O_DELETE, it's problematic with FUSE, see b/28901232.
+    if (flags & JVM_O_DELETE) {
+        LOG(FATAL) << "JVM_O_DELETE option is not supported (while opening: '"
+                   << fname << "')";
+    }
+
     int fd = TEMP_FAILURE_RETRY(open(fname, flags & ~JVM_O_DELETE, mode));
     if (fd < 0) {
         int err = errno;
@@ -89,12 +88,6 @@
         }
     }
 
-    if (flags & JVM_O_DELETE) {
-        if (unlink(fname) != 0) {
-            LOG(WARNING) << "Post-open deletion of '" << fname << "' failed: " << strerror(errno);
-        }
-    }
-
     return fd;
 }
 
@@ -324,22 +317,6 @@
   exit(status);
 }
 
-static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPath) {
-#ifdef ART_TARGET_ANDROID
-  if (javaLdLibraryPath != nullptr) {
-    ScopedUtfChars ldLibraryPath(env, javaLdLibraryPath);
-    if (ldLibraryPath.c_str() != nullptr) {
-      android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str());
-    }
-  }
-
-#else
-  LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!";
-  UNUSED(javaLdLibraryPath, env);
-#endif
-}
-
-
 JNIEXPORT jstring JVM_NativeLoad(JNIEnv* env,
                                  jstring javaFilename,
                                  jobject javaLoader,
@@ -349,17 +326,6 @@
     return NULL;
   }
 
-  int32_t target_sdk_version = art::Runtime::Current()->GetTargetSdkVersion();
-
-  // Starting with N nativeLoad uses classloader local
-  // linker namespace instead of global LD_LIBRARY_PATH
-  // (23 is Marshmallow). This call is here to preserve
-  // backwards compatibility for the apps targeting sdk
-  // version <= 23
-  if (target_sdk_version == 0) {
-    SetLdLibraryPath(env, javaLibrarySearchPath);
-  }
-
   std::string error_msg;
   {
     art::JavaVMExt* vm = art::Runtime::Current()->GetJavaVM();
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index a3e1f00..e9dd7aa 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -50,7 +50,8 @@
       handler_method_(nullptr),
       handler_dex_pc_(0),
       clear_exception_(false),
-      handler_frame_depth_(kInvalidFrameDepth) {}
+      handler_frame_depth_(kInvalidFrameDepth),
+      full_fragment_done_(false) {}
 
 // Finds catch handler.
 class CatchBlockStackVisitor FINAL : public StackVisitor {
@@ -290,7 +291,8 @@
         single_frame_deopt_(single_frame),
         single_frame_done_(false),
         single_frame_deopt_method_(nullptr),
-        single_frame_deopt_quick_method_header_(nullptr) {
+        single_frame_deopt_quick_method_header_(nullptr),
+        callee_method_(nullptr) {
   }
 
   ArtMethod* GetSingleFrameDeoptMethod() const {
@@ -301,23 +303,34 @@
     return single_frame_deopt_quick_method_header_;
   }
 
+  void FinishStackWalk() SHARED_REQUIRES(Locks::mutator_lock_) {
+    // This is the upcall, or the next full frame in single-frame deopt, or the
+    // code isn't deoptimizeable. We remember the frame and last pc so that we
+    // may long jump to them.
+    exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
+    exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
+    exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
+    if (!stacked_shadow_frame_pushed_) {
+      // In case there is no deoptimized shadow frame for this upcall, we still
+      // need to push a nullptr to the stack since there is always a matching pop after
+      // the long jump.
+      GetThread()->PushStackedShadowFrame(nullptr,
+                                          StackedShadowFrameType::kDeoptimizationShadowFrame);
+      stacked_shadow_frame_pushed_ = true;
+    }
+    if (GetMethod() == nullptr) {
+      exception_handler_->SetFullFragmentDone(true);
+    } else {
+      CHECK(callee_method_ != nullptr) << art::PrettyMethod(GetMethod(), false);
+      exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(callee_method_));
+    }
+  }
+
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     exception_handler_->SetHandlerFrameDepth(GetFrameDepth());
     ArtMethod* method = GetMethod();
     if (method == nullptr || single_frame_done_) {
-      // This is the upcall (or the next full frame in single-frame deopt), we remember the frame
-      // and last pc so that we may long jump to them.
-      exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc());
-      exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame());
-      exception_handler_->SetHandlerMethodHeader(GetCurrentOatQuickMethodHeader());
-      if (!stacked_shadow_frame_pushed_) {
-        // In case there is no deoptimized shadow frame for this upcall, we still
-        // need to push a nullptr to the stack since there is always a matching pop after
-        // the long jump.
-        GetThread()->PushStackedShadowFrame(nullptr,
-                                            StackedShadowFrameType::kDeoptimizationShadowFrame);
-        stacked_shadow_frame_pushed_ = true;
-      }
+      FinishStackWalk();
       return false;  // End stack walk.
     } else if (method->IsRuntimeMethod()) {
       // Ignore callee save method.
@@ -328,7 +341,14 @@
       // the native method.
       // The top method is a runtime method, the native method comes next.
       CHECK_EQ(GetFrameDepth(), 1U);
+      callee_method_ = method;
       return true;
+    } else if (!single_frame_deopt_ &&
+               !Runtime::Current()->IsDeoptimizeable(GetCurrentQuickFramePc())) {
+      // We hit some code that's not deoptimizeable. However, Single-frame deoptimization triggered
+      // from compiled code is always allowed since HDeoptimize always saves the full environment.
+      FinishStackWalk();
+      return false;  // End stack walk.
     } else {
       // Check if a shadow frame already exists for debugger's set-local-value purpose.
       const size_t frame_id = GetFrameId();
@@ -356,20 +376,17 @@
         // right before interpreter::EnterInterpreterFromDeoptimize().
         stacked_shadow_frame_pushed_ = true;
         GetThread()->PushStackedShadowFrame(
-            new_frame,
-            single_frame_deopt_
-                ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame
-                : StackedShadowFrameType::kDeoptimizationShadowFrame);
+            new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame);
       }
       prev_shadow_frame_ = new_frame;
 
       if (single_frame_deopt_ && !IsInInlinedFrame()) {
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
-        exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
         single_frame_deopt_method_ = method;
         single_frame_deopt_quick_method_header_ = GetCurrentOatQuickMethodHeader();
       }
+      callee_method_ = method;
       return true;
     }
   }
@@ -478,10 +495,30 @@
   bool single_frame_done_;
   ArtMethod* single_frame_deopt_method_;
   const OatQuickMethodHeader* single_frame_deopt_quick_method_header_;
+  ArtMethod* callee_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
 
+void QuickExceptionHandler::PrepareForLongJumpToInvokeStubOrInterpreterBridge() {
+  if (full_fragment_done_) {
+    // Restore deoptimization exception. When returning from the invoke stub,
+    // ArtMethod::Invoke() will see the special exception to know deoptimization
+    // is needed.
+    self_->SetException(Thread::GetDeoptimizationException());
+  } else {
+    // PC needs to be of the quick-to-interpreter bridge.
+    int32_t offset;
+    #ifdef __LP64__
+        offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
+    #else
+        offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
+    #endif
+    handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
+        reinterpret_cast<uint8_t*>(self_) + offset);
+  }
+}
+
 void QuickExceptionHandler::DeoptimizeStack() {
   DCHECK(is_deoptimization_);
   if (kDebugExceptionDelivery) {
@@ -490,9 +527,7 @@
 
   DeoptimizeStackVisitor visitor(self_, context_, this, false);
   visitor.WalkStack(true);
-
-  // Restore deoptimization exception
-  self_->SetException(Thread::GetDeoptimizationException());
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
 void QuickExceptionHandler::DeoptimizeSingleFrame() {
@@ -518,20 +553,21 @@
         deopt_method, GetQuickToInterpreterBridge());
   }
 
-  // PC needs to be of the quick-to-interpreter bridge.
-  int32_t offset;
-  #ifdef __LP64__
-      offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value();
-  #else
-      offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value();
-  #endif
-  handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(self_) + offset);
+  PrepareForLongJumpToInvokeStubOrInterpreterBridge();
 }
 
-void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() {
-  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
+void QuickExceptionHandler::DeoptimizePartialFragmentFixup(uintptr_t return_pc) {
+  // At this point, the instrumentation stack has been updated. We need to install
+  // the real return pc on stack, in case instrumentation stub is stored there,
+  // so that the interpreter bridge code can return to the right place.
+  if (return_pc != 0) {
+    uintptr_t* pc_addr = reinterpret_cast<uintptr_t*>(handler_quick_frame_);
+    CHECK(pc_addr != nullptr);
+    pc_addr--;
+    *reinterpret_cast<uintptr_t*>(pc_addr) = return_pc;
+  }
 
+  // Architecture-dependent work. This is to get the LR right for x86 and x86-64.
   if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) {
     // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to
     // change how longjump works.
@@ -581,7 +617,8 @@
   DISALLOW_COPY_AND_ASSIGN(InstrumentationStackVisitor);
 };
 
-void QuickExceptionHandler::UpdateInstrumentationStack() {
+uintptr_t QuickExceptionHandler::UpdateInstrumentationStack() {
+  uintptr_t return_pc = 0;
   if (method_tracing_active_) {
     InstrumentationStackVisitor visitor(self_, handler_frame_depth_);
     visitor.WalkStack(true);
@@ -589,9 +626,10 @@
     size_t instrumentation_frames_to_pop = visitor.GetInstrumentationFramesToPop();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     for (size_t i = 0; i < instrumentation_frames_to_pop; ++i) {
-      instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
+      return_pc = instrumentation->PopMethodForUnwind(self_, is_deoptimization_);
     }
   }
+  return return_pc;
 }
 
 void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) {
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index eedf83f..74b7d0d 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -46,15 +46,29 @@
   // Find the catch handler for the given exception.
   void FindCatch(mirror::Throwable* exception) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy"
-  // shadow frame that will be executed with the interpreter.
+  // Deoptimize the stack to the upcall/some code that's not deoptimizeable. For
+  // every compiled frame, we create a "copy" shadow frame that will be executed
+  // with the interpreter.
   void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Deoptimize a single frame. It's directly triggered from compiled code. It
+  // has the following properties:
+  // - It deoptimizes a single frame, which can include multiple inlined frames.
+  // - It doesn't have return result or pending exception at the deoptimization point.
+  // - It always deoptimizes, even if IsDeoptimizeable() returns false for the
+  //   code, since HDeoptimize always saves the full environment. So it overrides
+  //   the result of IsDeoptimizeable().
+  // - It can be either full-fragment, or partial-fragment deoptimization, depending
+  //   on whether that single frame covers full or partial fragment.
   void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_);
-  void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void DeoptimizePartialFragmentFixup(uintptr_t return_pc)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Update the instrumentation stack by removing all methods that will be unwound
   // by the exception being thrown.
-  void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  // Return the return pc of the last frame that's unwound.
+  uintptr_t UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Set up environment before delivering an exception to optimized code.
   void SetCatchEnvironmentForOptimizedHandler(StackVisitor* stack_visitor)
@@ -103,8 +117,16 @@
     handler_frame_depth_ = frame_depth;
   }
 
+  bool IsFullFragmentDone() const {
+    return full_fragment_done_;
+  }
+
+  void SetFullFragmentDone(bool full_fragment_done) {
+    full_fragment_done_ = full_fragment_done;
+  }
+
   // Walk the stack frames of the given thread, printing out non-runtime methods with their types
-  // of frames. Helps to verify that single-frame deopt really only deopted one frame.
+  // of frames. Helps to verify that partial-fragment deopt really works as expected.
   static void DumpFramesWithType(Thread* self, bool details = false)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -131,6 +153,13 @@
   bool clear_exception_;
   // Frame depth of the catch handler or the upcall.
   size_t handler_frame_depth_;
+  // Does the handler successfully walk the full fragment (not stopped
+  // by some code that's not deoptimizeable)? Even single-frame deoptimization
+  // can set this to true if the fragment contains only one quick frame.
+  bool full_fragment_done_;
+
+  void PrepareForLongJumpToInvokeStubOrInterpreterBridge()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(QuickExceptionHandler);
 };
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 63976d0..caf5545 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1972,6 +1972,11 @@
   return verify_ == verifier::VerifyMode::kSoftFail;
 }
 
+bool Runtime::IsDeoptimizeable(uintptr_t code) const
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return !heap_->IsInBootImageOatFile(reinterpret_cast<void *>(code));
+}
+
 LinearAlloc* Runtime::CreateLinearAlloc() {
   // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a
   // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 1394462..b7f377d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -648,6 +648,10 @@
     return zygote_no_threads_;
   }
 
+  // Returns if the code can be deoptimized. Code may be compiled with some
+  // optimization that makes it impossible to deoptimize.
+  bool IsDeoptimizeable(uintptr_t code) const SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   static void InitPlatformSignalHandlers();
 
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 4610f6f..ab69d4f 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -73,7 +73,7 @@
     using Key = RuntimeArgumentMapKey<TValue>;
 
     // List of key declarations, shorthand for 'static const Key<T> Name'
-#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> Name;
+#define RUNTIME_OPTIONS_KEY(Type, Name, ...) static const Key<Type> (Name);
 #include "runtime_options.def"
   };
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 4248944..f1f4a12 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
 #include "quick_exception_handler.h"
@@ -84,6 +85,8 @@
 
 namespace art {
 
+extern "C" NO_RETURN void artDeoptimize(Thread* self);
+
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
@@ -270,7 +273,6 @@
   StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
   if (must_be_present) {
     DCHECK(record != nullptr);
-    DCHECK_EQ(record->GetType(), type);
   } else {
     if (record == nullptr || record->GetType() != type) {
       return nullptr;
@@ -2411,8 +2413,8 @@
 template<size_t ptr_size>
 void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
 #define DO_THREAD_OFFSET(x, y) \
-    if (offset == x.Uint32Value()) { \
-      os << y; \
+    if (offset == (x).Uint32Value()) { \
+      os << (y); \
       return; \
     }
   DO_THREAD_OFFSET(ThreadFlagsOffset<ptr_size>(), "state_and_flags")
@@ -2583,38 +2585,42 @@
   // Get exception from thread.
   mirror::Throwable* exception = GetException();
   CHECK(exception != nullptr);
-  bool is_deoptimization = (exception == GetDeoptimizationException());
-  if (!is_deoptimization) {
-    // This is a real exception: let the instrumentation know about it.
-    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-    if (instrumentation->HasExceptionCaughtListeners() &&
-        IsExceptionThrownByCurrentMethod(exception)) {
-      // Instrumentation may cause GC so keep the exception object safe.
-      StackHandleScope<1> hs(this);
-      HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
-      instrumentation->ExceptionCaughtEvent(this, exception);
-    }
-    // Does instrumentation need to deoptimize the stack?
-    // Note: we do this *after* reporting the exception to instrumentation in case it
-    // now requires deoptimization. It may happen if a debugger is attached and requests
-    // new events (single-step, breakpoint, ...) when the exception is reported.
-    is_deoptimization = Dbg::IsForcedInterpreterNeededForException(this);
-    if (is_deoptimization) {
+  if (exception == GetDeoptimizationException()) {
+    artDeoptimize(this);
+    UNREACHABLE();
+  }
+
+  // This is a real exception: let the instrumentation know about it.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->HasExceptionCaughtListeners() &&
+      IsExceptionThrownByCurrentMethod(exception)) {
+    // Instrumentation may cause GC so keep the exception object safe.
+    StackHandleScope<1> hs(this);
+    HandleWrapper<mirror::Throwable> h_exception(hs.NewHandleWrapper(&exception));
+    instrumentation->ExceptionCaughtEvent(this, exception);
+  }
+  // Does instrumentation need to deoptimize the stack?
+  // Note: we do this *after* reporting the exception to instrumentation in case it
+  // now requires deoptimization. It may happen if a debugger is attached and requests
+  // new events (single-step, breakpoint, ...) when the exception is reported.
+  if (Dbg::IsForcedInterpreterNeededForException(this)) {
+    NthCallerVisitor visitor(this, 0, false);
+    visitor.WalkStack();
+    if (Runtime::Current()->IsDeoptimizeable(visitor.caller_pc)) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
       PushDeoptimizationContext(
           JValue(), /*is_reference */ false, /* from_code */ false, exception);
+      artDeoptimize(this);
+      UNREACHABLE();
     }
   }
+
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
-  QuickExceptionHandler exception_handler(this, is_deoptimization);
-  if (is_deoptimization) {
-    exception_handler.DeoptimizeStack();
-  } else {
-    exception_handler.FindCatch(exception);
-  }
+  QuickExceptionHandler exception_handler(this, false);
+  exception_handler.FindCatch(exception);
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
 }
@@ -3024,7 +3030,6 @@
   mirror::Throwable* pending_exception = nullptr;
   bool from_code = false;
   PopDeoptimizationContext(result, &pending_exception, &from_code);
-  CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
   SetTopOfStack(nullptr);
   SetTopOfShadowStack(shadow_frame);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 582a0cd..3c367ee 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -110,7 +110,6 @@
 enum class StackedShadowFrameType {
   kShadowFrameUnderConstruction,
   kDeoptimizationShadowFrame,
-  kSingleFrameDeoptimizationShadowFrame
 };
 
 // This should match RosAlloc::kNumThreadLocalSizeBrackets.
@@ -1119,7 +1118,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Avoid use, callers should use SetState. Used only by SignalCatcher::HandleSigQuit, ~Thread and
-  // Dbg::Disconnected.
+  // Dbg::ManageDeoptimization.
   ThreadState SetStateUnsafe(ThreadState new_state) {
     ThreadState old_state = GetState();
     if (old_state == kRunnable && new_state != kRunnable) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2b96328..f2ae85a 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -4528,7 +4528,7 @@
 
 ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
   const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
-  // Check access to class
+  // Check access to class.
   const RegType& klass_type = ResolveClassAndCheckAccess(field_id.class_idx_);
   if (klass_type.IsConflict()) {
     AppendToLastFailMessage(StringPrintf(" in attempt to access instance field %d (%s) in %s",
@@ -4549,20 +4549,11 @@
     DCHECK(self_->IsExceptionPending());
     self_->ClearException();
     return nullptr;
-  } else if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
-                                                  field->GetAccessFlags())) {
-    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
-                                    << " from " << GetDeclaringClass();
-    return nullptr;
-  } else if (field->IsStatic()) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
-                                    << " to not be static";
-    return nullptr;
   } else if (obj_type.IsZero()) {
-    // Cannot infer and check type, however, access will cause null pointer exception
-    return field;
+    // Cannot infer and check type, however, access will cause null pointer exception.
+    // Fall through into a few last soft failure checks below.
   } else if (!obj_type.IsReferenceTypes()) {
-    // Trying to read a field from something that isn't a reference
+    // Trying to read a field from something that isn't a reference.
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "instance field access on object that has "
                                       << "non-reference type " << obj_type;
     return nullptr;
@@ -4584,7 +4575,6 @@
                                           << " of " << PrettyMethod(dex_method_idx_, *dex_file_);
         return nullptr;
       }
-      return field;
     } else if (!field_klass.IsAssignableFrom(obj_type)) {
       // Trying to access C1.field1 using reference of type C2, which is neither C1 or a sub-class
       // of C1. For resolution to occur the declared class of the field must be compatible with
@@ -4602,10 +4592,22 @@
       Fail(type) << "cannot access instance field " << PrettyField(field)
                  << " from object of type " << obj_type;
       return nullptr;
-    } else {
-      return field;
     }
   }
+
+  // Few last soft failure checks.
+  if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
+                                           field->GetAccessFlags())) {
+    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
+                                    << " from " << GetDeclaringClass();
+    return nullptr;
+  } else if (field->IsStatic()) {
+    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
+                                    << " to not be static";
+    return nullptr;
+  }
+
+  return field;
 }
 
 template <MethodVerifier::FieldAccessType kAccType>
@@ -4652,7 +4654,7 @@
       if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
         Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
                                         << " from other class " << GetDeclaringClass();
-        return;
+        // Keep hunting for possible hard fails.
       }
     }
 
@@ -4928,6 +4930,10 @@
       // Initialize them as conflicts so they don't add to GC and deoptimization information.
       const Instruction* ret_inst = Instruction::At(code_item_->insns_ + next_insn);
       AdjustReturnLine(this, ret_inst, target_line);
+      // Directly bail if a hard failure was found.
+      if (have_pending_hard_failure_) {
+        return false;
+      }
     }
   } else {
     RegisterLineArenaUniquePtr copy;
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index d288943..355d552 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -41,6 +41,9 @@
 jclass WellKnownClasses::java_lang_ClassNotFoundException;
 jclass WellKnownClasses::java_lang_Daemons;
 jclass WellKnownClasses::java_lang_Error;
+jclass WellKnownClasses::java_lang_ExceptionInInitializerError;
+jclass WellKnownClasses::java_lang_IllegalAccessError;
+jclass WellKnownClasses::java_lang_NoClassDefFoundError;
 jclass WellKnownClasses::java_lang_Object;
 jclass WellKnownClasses::java_lang_OutOfMemoryError;
 jclass WellKnownClasses::java_lang_reflect_AbstractMethod;
@@ -228,6 +231,9 @@
   java_lang_Object = CacheClass(env, "java/lang/Object");
   java_lang_OutOfMemoryError = CacheClass(env, "java/lang/OutOfMemoryError");
   java_lang_Error = CacheClass(env, "java/lang/Error");
+  java_lang_ExceptionInInitializerError = CacheClass(env, "java/lang/ExceptionInInitializerError");
+  java_lang_IllegalAccessError = CacheClass(env, "java/lang/IllegalAccessError");
+  java_lang_NoClassDefFoundError = CacheClass(env, "java/lang/NoClassDefFoundError");
   java_lang_reflect_AbstractMethod = CacheClass(env, "java/lang/reflect/AbstractMethod");
   java_lang_reflect_Constructor = CacheClass(env, "java/lang/reflect/Constructor");
   java_lang_reflect_Field = CacheClass(env, "java/lang/reflect/Field");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 482ff0a..cc60b4d 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -52,6 +52,9 @@
   static jclass java_lang_ClassNotFoundException;
   static jclass java_lang_Daemons;
   static jclass java_lang_Error;
+  static jclass java_lang_ExceptionInInitializerError;
+  static jclass java_lang_IllegalAccessError;
+  static jclass java_lang_NoClassDefFoundError;
   static jclass java_lang_Object;
   static jclass java_lang_OutOfMemoryError;
   static jclass java_lang_reflect_AbstractMethod;
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index d36d43e..51254b4 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -159,7 +159,23 @@
         System.out.println("");
     }
 
-
+    public static void testVisibilityCompatibility() throws Exception {
+        if (!VMRuntime.isAndroid()) {
+            return;
+        }
+        Object runtime = VMRuntime.getRuntime();
+        int currentSdkVersion = VMRuntime.getTargetSdkVersion(runtime);
+        // SDK version 23 is M.
+        int oldSdkVersion = 23;
+        VMRuntime.setTargetSdkVersion(runtime, oldSdkVersion);
+        // This annotation has CLASS retention, but is visible to the runtime in M and earlier.
+        Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleTypeInvis.class);
+        if (anno == null) {
+            System.out.println("testVisibilityCompatibility failed: " +
+                    "SimplyNoted.get(AnnoSimpleTypeInvis) should not be null");
+        }
+        VMRuntime.setTargetSdkVersion(runtime, currentSdkVersion);
+    }
 
     public static void main(String[] args) {
         System.out.println("TestAnnotations...");
@@ -229,5 +245,55 @@
         } catch (NoSuchFieldError expected) {
             System.out.println("Got expected NoSuchFieldError");
         }
+
+        // Test if annotations marked VISIBILITY_BUILD are visible to runtime in M and earlier.
+        try {
+            testVisibilityCompatibility();
+        } catch (Exception e) {
+            System.out.println("testVisibilityCompatibility failed: " + e);
+        }
+    }
+
+    private static class VMRuntime {
+        private static Class vmRuntimeClass;
+        private static Method getRuntimeMethod;
+        private static Method getTargetSdkVersionMethod;
+        private static Method setTargetSdkVersionMethod;
+        static {
+            init();
+        }
+
+        private static void init() {
+            try {
+                vmRuntimeClass = Class.forName("dalvik.system.VMRuntime");
+            } catch (Exception e) {
+                return;
+            }
+            try {
+                getRuntimeMethod = vmRuntimeClass.getDeclaredMethod("getRuntime");
+                getTargetSdkVersionMethod =
+                        vmRuntimeClass.getDeclaredMethod("getTargetSdkVersion");
+                setTargetSdkVersionMethod =
+                        vmRuntimeClass.getDeclaredMethod("setTargetSdkVersion", Integer.TYPE);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        public static boolean isAndroid() {
+            return vmRuntimeClass != null;
+        }
+
+        public static Object getRuntime() throws Exception {
+            return getRuntimeMethod.invoke(null);
+        }
+
+        public static int getTargetSdkVersion(Object runtime) throws Exception {
+            return (int) getTargetSdkVersionMethod.invoke(runtime);
+        }
+
+        public static void setTargetSdkVersion(Object runtime, int version) throws Exception {
+            setTargetSdkVersionMethod.invoke(runtime, version);
+        }
     }
 }
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 0dab400..c6a2e9a 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -55,7 +55,7 @@
 
     const OatFile* oat_file = oat_dex_file->GetOatFile();
     return !oat_file->IsPic()
-        && CompilerFilter::IsCompilationEnabled(oat_file->GetCompilerFilter());
+        && CompilerFilter::IsBytecodeCompilationEnabled(oat_file->GetCompilerFilter());
   }
 };
 
diff --git a/test/142-classloader2/smali/B.smali b/test/142-classloader2/smali/B.smali
new file mode 100644
index 0000000..01bd593
--- /dev/null
+++ b/test/142-classloader2/smali/B.smali
@@ -0,0 +1,10 @@
+.class public LB;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {p1}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index 86c61eb..89dadce 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -71,6 +71,21 @@
             throw new IllegalStateException("Expected Ex-A, found " + exValue);
         }
 
+        // Try to load a dex file with bad dex code. Use new instance to force verification.
+        try {
+          Class<?> badClass = Main.class.getClassLoader().loadClass("B");
+          badClass.newInstance();
+          System.out.println("Should not be able to load class from bad dex file.");
+        } catch (VerifyError e) {
+        }
+
+        // Make sure the same error is rethrown when reloading the bad class.
+        try {
+          Class<?> badClass = Main.class.getClassLoader().loadClass("B");
+          System.out.println("Should not be able to load class from bad dex file.");
+        } catch (VerifyError e) {
+        }
+
         System.out.println("Everything OK.");
     }
 }
diff --git a/test/149-suspend-all-stress/expected.txt b/test/149-suspend-all-stress/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/149-suspend-all-stress/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/149-suspend-all-stress/info.txt b/test/149-suspend-all-stress/info.txt
new file mode 100644
index 0000000..29b414c
--- /dev/null
+++ b/test/149-suspend-all-stress/info.txt
@@ -0,0 +1 @@
+Stress test for multiple threads calling SuspendAll
diff --git a/test/149-suspend-all-stress/src/Main.java b/test/149-suspend-all-stress/src/Main.java
new file mode 100644
index 0000000..6a27c4b
--- /dev/null
+++ b/test/149-suspend-all-stress/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+public class Main implements Runnable {
+    static final int numberOfThreads = 8;
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        final Thread[] threads = new Thread[numberOfThreads];
+        for (int t = 0; t < threads.length; t++) {
+            threads[t] = new Thread(new Main());
+            threads[t].start();
+        }
+        for (Thread t : threads) {
+            t.join();
+        }
+        System.out.println("Finishing");
+    }
+
+    public void run() {
+        suspendAndResume();
+    }
+
+    private static native void suspendAndResume();
+}
diff --git a/test/149-suspend-all-stress/suspend_all.cc b/test/149-suspend-all-stress/suspend_all.cc
new file mode 100644
index 0000000..c22ddad
--- /dev/null
+++ b/test/149-suspend-all-stress/suspend_all.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+#include "runtime.h"
+#include "thread_list.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_suspendAndResume(JNIEnv*, jclass) {
+  usleep(100 * 1000);  // Leave some time for threads to get in here before we start suspending.
+  for (size_t i = 0; i < 500; ++i) {
+    Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
+    usleep(500);
+    Runtime::Current()->GetThreadList()->ResumeAll();
+  }
+}
+
+}  // namespace art
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 872fa6d..5a36ba5 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -114,6 +114,33 @@
     expectEquals(o, c.instanceObject);
   }
 
+  /// CHECK-START: void Main.callUnresolvedNull(UnresolvedClass) register (before)
+  /// CHECK-NOT: NullCheck
+  static public void callUnresolvedNull(UnresolvedClass c) {
+    int x = 0;
+    try {
+      x = c.instanceInt;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-1, x);
+    try {
+      c.instanceInt = -1;
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-2, x);
+    try {
+      c.virtualMethod();
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      x -= 1;
+    }
+    expectEquals(-3, x);
+  }
+
   static public void testInstanceOf(Object o) {
     if (o instanceof UnresolvedSuperClass) {
       System.out.println("instanceof ok");
@@ -136,6 +163,7 @@
     callInvokeUnresolvedSuper(m);
     callUnresolvedStaticFieldAccess();
     callUnresolvedInstanceFieldAccess(c);
+    callUnresolvedNull(null);
     testInstanceOf(m);
     testCheckCast(m);
     testLicm(2);
@@ -185,7 +213,7 @@
     }
   }
 
-    public static void expectEquals(float expected, float result) {
+  public static void expectEquals(float expected, float result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 15a9504..24ed2fe 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -107,8 +107,28 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeVirtual {{.*\.equals.*}}
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
   /// CHECK-NOT:      test
+
+  /// CHECK-START-X86_64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      test
+
+  /// CHECK-START-ARM: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  // CompareAndBranchIfZero() may emit either CBZ or CMP+BEQ.
+  /// CHECK-NOT:      cbz
+  /// CHECK-NOT:      cmp {{r\d+}}, #0
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      cbz
+  // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
+  // whichever comes first.
+  /// CHECK:          cmp {{w.*,}} {{w.*}}
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -116,12 +136,53 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeVirtual
-  /// CHECK:      test
-  /// CHECK:      jz/eq
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
   // Check that we don't try to compare the classes.
-  /// CHECK-NOT:  mov
-  /// CHECK:      cmp
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86_64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          test
+  /// CHECK:          jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      mov
+  /// CHECK:          cmp
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          {{cbz|cmp}}
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}]
+  /// CHECK-NOT:      ldr{{(|.w)}} {{r\d+}}, [{{r\d+}}, #0]
+  /// CHECK:          cmp {{r\d+}}, {{r\d+}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-ARM64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          cbz
+  // Check that we don't try to compare the classes.
+  // The dissassembler currently does not explicitly emits the offset 0 but don't rely on it.
+  // We want to terminate the CHECK-NOT search after two CMPs, one for reference
+  // equality and one for length comparison but these may be emitted in different order,
+  // so repeat the check twice.
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
+  /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
+  /// CHECK:          cmp {{w\d+}}, {{w\d+}}
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/562-bce-preheader/src/Main.java b/test/562-bce-preheader/src/Main.java
index 8b527b4..4397f67 100644
--- a/test/562-bce-preheader/src/Main.java
+++ b/test/562-bce-preheader/src/Main.java
@@ -90,6 +90,25 @@
     return a;
   }
 
+  /**
+   * Example shows that we can hoist ArrayGet to pre-header only if
+   * its execution is guaranteed.
+   */
+  public static int hoistcheck(int[] c) {
+    int i = 0, i2 = 0, i3 = 0, k = 0;
+    int n = c.length;
+    for (i = -100000000; i < 20; i += 10000000) {
+      i3 = i;
+      i2 = 0;
+      while (i2++ < 1) {
+        if (i3 >= 0 && i3 < n) {
+          k += c[i3];
+        }
+      }
+    }
+    return k;
+  }
+
   public static void main(String args[]) {
     int[][] x = new int[2][2];
     int y;
@@ -119,6 +138,9 @@
     int[] z = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     expectEquals(10, doit(z));
 
+    int c[] = { 1, 2, 3, 5 };
+    expectEquals(1, hoistcheck(c));
+
     System.out.println("passed");
   }
 
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 2fa5800..cf413ba 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -136,7 +136,7 @@
     if (m_name.compare(method_name_) == 0) {
       while (jit->GetCodeCache()->LookupOsrMethodHeader(m) == nullptr) {
         // Sleep to yield to the compiler thread.
-        sleep(0);
+        usleep(1000);
         // Will either ensure it's compiled or do the compilation itself.
         jit->CompileMethod(m, Thread::Current(), /* osr */ true);
       }
diff --git a/test/600-verifier-fails/expected.txt b/test/600-verifier-fails/expected.txt
new file mode 100644
index 0000000..8399969
--- /dev/null
+++ b/test/600-verifier-fails/expected.txt
@@ -0,0 +1,4 @@
+passed A
+passed B
+passed C
+passed D
diff --git a/test/600-verifier-fails/info.txt b/test/600-verifier-fails/info.txt
new file mode 100644
index 0000000..df2396e
--- /dev/null
+++ b/test/600-verifier-fails/info.txt
@@ -0,0 +1,20 @@
+The situations in these tests were discovered by running the mutating
+dexfuzz on the DEX files of fuzzingly random generated Java test.
+
+(A) b/28908555:
+    soft verification failure (on the final field modification) should
+    not hide the hard verification failure (on the type mismatch) to
+    avoid compiler crash later on
+(B) b/29070461:
+    hard verification failure (not calling super in constructor) should
+    bail immediately and not allow soft verification failures to pile up
+    behind it to avoid fatal message later on
+(C) b/29068831:
+    access validation on field should occur prior to null reference check
+(D) b/29126870:
+    soft verification failure (cannot access) should not hide the hard
+    verification failure (non-reference type) to avoid a compiler crash
+    later on
+(E) b/29068831:
+    access validation on method should occur prior to null reference check
+
diff --git a/test/600-verifier-fails/smali/construct.smali b/test/600-verifier-fails/smali/construct.smali
new file mode 100644
index 0000000..417ced9
--- /dev/null
+++ b/test/600-verifier-fails/smali/construct.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    if-eqz v0, :bail
+    invoke-direct {v0}, LB;->append(Ljava/lang/String;)V
+:bail
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/iget.smali b/test/600-verifier-fails/smali/iget.smali
new file mode 100644
index 0000000..5c045e6
--- /dev/null
+++ b/test/600-verifier-fails/smali/iget.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LD;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 2
+    iget v1, v0, LMain;->privateField:I
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/invoke.smali b/test/600-verifier-fails/smali/invoke.smali
new file mode 100644
index 0000000..616d63c
--- /dev/null
+++ b/test/600-verifier-fails/smali/invoke.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LE;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    invoke-virtual {v0}, LMain;->privateMethod()V
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/iput.smali b/test/600-verifier-fails/smali/iput.smali
new file mode 100644
index 0000000..bd8b928
--- /dev/null
+++ b/test/600-verifier-fails/smali/iput.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LC;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    iput-object v0, v0, LMain;->staticPrivateField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/smali/sput.smali b/test/600-verifier-fails/smali/sput.smali
new file mode 100644
index 0000000..e8e56ac
--- /dev/null
+++ b/test/600-verifier-fails/smali/sput.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LA;
+.super Ljava/lang/Object;
+
+.method public foo(I)V
+.registers 2
+    sput v1, LMain;->staticFinalField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/600-verifier-fails/src/Main.java b/test/600-verifier-fails/src/Main.java
new file mode 100644
index 0000000..64c3d5c
--- /dev/null
+++ b/test/600-verifier-fails/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static final String staticFinalField = null;
+
+  private static String staticPrivateField = null;
+
+  private int privateField = 0;
+
+  private void privateMethod() { }
+
+  private static void test(String name) throws Exception {
+    try {
+      Class<?> a = Class.forName(name);
+      a.newInstance();
+    } catch (java.lang.LinkageError e) {
+      System.out.println("passed " + name);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    test("A");
+    test("B");
+    test("C");
+    test("D");
+    // TODO: enable again
+    // test("E");
+  }
+}
diff --git a/test/601-method-access/expected.txt b/test/601-method-access/expected.txt
new file mode 100644
index 0000000..90fbab8
--- /dev/null
+++ b/test/601-method-access/expected.txt
@@ -0,0 +1 @@
+Got expected failure
diff --git a/test/601-method-access/info.txt b/test/601-method-access/info.txt
new file mode 100644
index 0000000..e38a336
--- /dev/null
+++ b/test/601-method-access/info.txt
@@ -0,0 +1 @@
+Regression test for method access checks.
diff --git a/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
new file mode 100644
index 0000000..7a896a2
--- /dev/null
+++ b/test/601-method-access/smali/SubClassUsingInaccessibleMethod.smali
@@ -0,0 +1,33 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LSubClassUsingInaccessibleMethod;
+
+.super Lother/PublicClass;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Lother/PublicClass;-><init>()V
+    return-void
+.end method
+
+# Regression test for compiler DCHECK() failure (bogus check) when referencing
+# a package-private method from an indirectly inherited package-private class,
+# using this very class as the declaring class in the MethodId, bug: 28771056.
+.method public test()I
+    .registers 2
+    invoke-virtual {p0}, LSubClassUsingInaccessibleMethod;->otherProtectedClassPackageIntInstanceMethod()I
+    move-result v0
+    return v0
+.end method
diff --git a/test/601-method-access/src/Main.java b/test/601-method-access/src/Main.java
new file mode 100644
index 0000000..838080a
--- /dev/null
+++ b/test/601-method-access/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+
+/*
+ * Test method access through reflection.
+ */
+public class Main {
+  public static void main(String[] args) {
+    try {
+      Class c = Class.forName("SubClassUsingInaccessibleMethod");
+      Object o = c.newInstance();
+      c.getMethod("test").invoke(o, null);
+    } catch (InvocationTargetException ite) {
+      if (ite.getCause() instanceof IllegalAccessError) {
+        System.out.println("Got expected failure");
+      } else {
+        System.out.println("Got unexpected failure " + ite.getCause());
+      }
+    } catch (Exception e) {
+      System.out.println("Got unexpected failure " + e);
+    }
+  }
+}
diff --git a/test/601-method-access/src/other/ProtectedClass.java b/test/601-method-access/src/other/ProtectedClass.java
new file mode 100644
index 0000000..9426884
--- /dev/null
+++ b/test/601-method-access/src/other/ProtectedClass.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that cannot be accessed outside of this package.
+class ProtectedClass {
+ /* package */ int otherProtectedClassPackageIntInstanceMethod() {
+   return 28;
+ }
+}
diff --git a/test/601-method-access/src/other/PublicClass.java b/test/601-method-access/src/other/PublicClass.java
new file mode 100644
index 0000000..d9f7961
--- /dev/null
+++ b/test/601-method-access/src/other/PublicClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+// Class that makes the ProtectedClass sub-classable by classes outside of package other.
+public class PublicClass extends ProtectedClass {
+}
diff --git a/test/601-verifier-fails/expected.txt b/test/601-verifier-fails/expected.txt
new file mode 100644
index 0000000..8399969
--- /dev/null
+++ b/test/601-verifier-fails/expected.txt
@@ -0,0 +1,4 @@
+passed A
+passed B
+passed C
+passed D
diff --git a/test/601-verifier-fails/info.txt b/test/601-verifier-fails/info.txt
new file mode 100644
index 0000000..f77de05
--- /dev/null
+++ b/test/601-verifier-fails/info.txt
@@ -0,0 +1,18 @@
+The situations in these tests were discovered by running the mutating
+dexfuzz on the DEX files of fuzzingly random generated Java test.
+
+(A) b/28908555:
+    soft verification failure (on the final field modification) should
+    not hide the hard verification failure (on the type mismatch) to
+    avoid compiler crash later on
+(B) b/29070461:
+    hard verification failure (not calling super in constructor) should
+    bail immediately and not allow soft verification failures to pile up
+    behind it to avoid fatal message later on
+(C) b/29068831:
+    access validation should occur prior to null reference check
+(D) b/29126870:
+    soft verification failure (cannot access) should not hide the hard
+    verification failure (non-reference type) to avoid a compiler crash
+    later on
+
diff --git a/test/601-verifier-fails/smali/construct.smali b/test/601-verifier-fails/smali/construct.smali
new file mode 100644
index 0000000..417ced9
--- /dev/null
+++ b/test/601-verifier-fails/smali/construct.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    if-eqz v0, :bail
+    invoke-direct {v0}, LB;->append(Ljava/lang/String;)V
+:bail
+    return-void
+.end method
diff --git a/test/601-verifier-fails/smali/iget.smali b/test/601-verifier-fails/smali/iget.smali
new file mode 100644
index 0000000..5c045e6
--- /dev/null
+++ b/test/601-verifier-fails/smali/iget.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LD;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 2
+    iget v1, v0, LMain;->privateField:I
+    return-void
+.end method
diff --git a/test/601-verifier-fails/smali/iput.smali b/test/601-verifier-fails/smali/iput.smali
new file mode 100644
index 0000000..bd8b928
--- /dev/null
+++ b/test/601-verifier-fails/smali/iput.smali
@@ -0,0 +1,25 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LC;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {v1}, Ljava/lang/Object;-><init>()V
+    const v0, 0
+    iput-object v0, v0, LMain;->staticPrivateField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/601-verifier-fails/smali/sput.smali b/test/601-verifier-fails/smali/sput.smali
new file mode 100644
index 0000000..e8e56ac
--- /dev/null
+++ b/test/601-verifier-fails/smali/sput.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LA;
+.super Ljava/lang/Object;
+
+.method public foo(I)V
+.registers 2
+    sput v1, LMain;->staticFinalField:Ljava/lang/String;
+    return-void
+.end method
diff --git a/test/601-verifier-fails/src/Main.java b/test/601-verifier-fails/src/Main.java
new file mode 100644
index 0000000..a6a07fd
--- /dev/null
+++ b/test/601-verifier-fails/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static final String staticFinalField = null;
+
+  private static String staticPrivateField = null;
+
+  private int privateField = 0;
+
+  private static void test(String name) throws Exception {
+    try {
+      Class<?> a = Class.forName(name);
+      a.newInstance();
+    } catch (java.lang.LinkageError e) {
+      System.out.println("passed " + name);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    test("A");
+    test("B");
+    test("C");
+    test("D");
+  }
+}
diff --git a/test/602-deoptimizeable/expected.txt b/test/602-deoptimizeable/expected.txt
new file mode 100644
index 0000000..f993efc
--- /dev/null
+++ b/test/602-deoptimizeable/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Finishing
diff --git a/test/602-deoptimizeable/info.txt b/test/602-deoptimizeable/info.txt
new file mode 100644
index 0000000..d0952f9
--- /dev/null
+++ b/test/602-deoptimizeable/info.txt
@@ -0,0 +1 @@
+Test various cases for full/partial-fragment deoptimization.
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
new file mode 100644
index 0000000..743a579
--- /dev/null
+++ b/test/602-deoptimizeable/src/Main.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+
+class DummyObject {
+    public static boolean sHashCodeInvoked = false;
+    private int i;
+
+    public DummyObject(int i) {
+        this.i = i;
+    }
+
+    public boolean equals(Object obj) {
+        return (obj instanceof DummyObject) && (i == ((DummyObject)obj).i);
+    }
+
+    public int hashCode() {
+        sHashCodeInvoked = true;
+        Main.assertIsManaged();
+        Main.deoptimizeAll();
+        Main.assertIsInterpreted();
+        Main.assertCallerIsManaged();  // Caller is from framework code HashMap.
+        return i % 64;
+    }
+}
+
+public class Main {
+    static boolean sFlag = false;
+
+    public static native void deoptimizeAll();
+    public static native void undeoptimizeAll();
+    public static native void assertIsInterpreted();
+    public static native void assertIsManaged();
+    public static native void assertCallerIsInterpreted();
+    public static native void assertCallerIsManaged();
+    public static native void disableStackFrameAsserts();
+    public static native boolean hasOatFile();
+    public static native boolean isInterpreted();
+
+    public static void execute(Runnable runnable) throws Exception {
+      Thread t = new Thread(runnable);
+      t.start();
+      t.join();
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+        // Only test stack frames in compiled mode.
+        if (!hasOatFile() || isInterpreted()) {
+          disableStackFrameAsserts();
+        }
+        final HashMap<DummyObject, Long> map = new HashMap<DummyObject, Long>();
+
+        // Single-frame deoptimization that covers partial fragment.
+        execute(new Runnable() {
+            public void run() {
+                int[] arr = new int[3];
+                assertIsManaged();
+                int res = $noinline$run1(arr);
+                assertIsManaged();  // Only single frame is deoptimized.
+                if (res != 79) {
+                    System.out.println("Failure 1!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        // Single-frame deoptimization that covers a full fragment.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    int[] arr = new int[3];
+                    assertIsManaged();
+                    // Use reflection to call $noinline$run2 so that it does
+                    // full-fragment deoptimization since that is an upcall.
+                    Class<?> cls = Class.forName("Main");
+                    Method method = cls.getDeclaredMethod("$noinline$run2", int[].class);
+                    double res = (double)method.invoke(Main.class, arr);
+                    assertIsManaged();  // Only single frame is deoptimized.
+                    if (res != 79.3d) {
+                        System.out.println("Failure 2!");
+                        System.exit(0);
+                    }
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        // Full-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                assertIsManaged();
+                float res = $noinline$run3B();
+                assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                if (res != 0.034f) {
+                    System.out.println("Failure 3!");
+                    System.exit(0);
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        // Partial-fragment deoptimization.
+        execute(new Runnable() {
+            public void run() {
+                try {
+                    assertIsManaged();
+                    map.put(new DummyObject(10), Long.valueOf(100));
+                    assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }
+        });
+
+        undeoptimizeAll();  // Make compiled code useable again.
+
+        if (!DummyObject.sHashCodeInvoked) {
+            System.out.println("hashCode() method not invoked!");
+        }
+        if (map.get(new DummyObject(10)) != 100) {
+            System.out.println("Wrong hashmap value!");
+        }
+        System.out.println("Finishing");
+    }
+
+    public static int $noinline$run1(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted(); // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79;
+    }
+
+    public static double $noinline$run2(int[] arr) {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        boolean caught = false;
+        // BCE will use deoptimization for the code below.
+        try {
+            arr[0] = 1;
+            arr[1] = 1;
+            arr[2] = 1;
+            // This causes AIOOBE and triggers deoptimization from compiled code.
+            arr[3] = 1;
+        } catch (ArrayIndexOutOfBoundsException e) {
+            assertIsInterpreted();  // Single-frame deoptimization triggered.
+            caught = true;
+        }
+        if (!caught) {
+            System.out.println("Expected exception");
+        }
+        assertIsInterpreted();
+        return 79.3d;
+    }
+
+    public static float $noinline$run3A() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        // Deoptimize callers.
+        deoptimizeAll();
+        assertIsInterpreted();
+        assertCallerIsInterpreted();  // $noinline$run3B is deoptimizeable.
+        return 0.034f;
+    }
+
+    public static float $noinline$run3B() {
+        assertIsManaged();
+        // Prevent inlining.
+        if (sFlag) {
+            throw new Error();
+        }
+        float res = $noinline$run3A();
+        assertIsInterpreted();
+        return res;
+    }
+}
diff --git a/test/603-checker-instanceof/expected.txt b/test/603-checker-instanceof/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/603-checker-instanceof/expected.txt
diff --git a/test/603-checker-instanceof/info.txt b/test/603-checker-instanceof/info.txt
new file mode 100644
index 0000000..5907abc
--- /dev/null
+++ b/test/603-checker-instanceof/info.txt
@@ -0,0 +1,2 @@
+Regression test for the compiler that used to wrongly optimize
+an instanceof.
diff --git a/test/603-checker-instanceof/src/Main.java b/test/603-checker-instanceof/src/Main.java
new file mode 100644
index 0000000..ddf4b92
--- /dev/null
+++ b/test/603-checker-instanceof/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class SuperClass {
+}
+
+class ChildClass extends SuperClass {
+}
+
+public class Main {
+
+  /// CHECK-START:    void Main.main(java.lang.String[]) builder (after)
+  /// CHECK:          BoundType  klass:SuperClass can_be_null:false exact:false
+
+  /// CHECK-START:    void Main.main(java.lang.String[]) builder (after)
+  /// CHECK-NOT:      BoundType  klass:SuperClass can_be_null:false exact:true
+  public static void main(String[] args) {
+    Object obj = new ChildClass();
+
+    // We need a fixed point iteration to hit the bogus type update
+    // of 'obj' below, so create a loop that updates the type of 'obj'.
+    for (int i = 1; i < 1; i++) {
+      obj = new Object();
+    }
+
+    if (obj instanceof SuperClass) {
+      // We used to wrongly type obj as an exact SuperClass from this point,
+      // meaning we were statically determining that the following instanceof
+      // would always fail.
+      if (!(obj instanceof ChildClass)) {
+        throw new Error("Expected a ChildClass, got " + obj.getClass());
+      }
+    }
+  }
+}
diff --git a/test/604-hot-static-interface/build b/test/604-hot-static-interface/build
new file mode 100755
index 0000000..1ca2daf
--- /dev/null
+++ b/test/604-hot-static-interface/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/604-hot-static-interface/expected.txt b/test/604-hot-static-interface/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/604-hot-static-interface/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/604-hot-static-interface/hot_static_interface.cc b/test/604-hot-static-interface/hot_static_interface.cc
new file mode 100644
index 0000000..475a11d
--- /dev/null
+++ b/test/604-hot-static-interface/hot_static_interface.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedUtfChars.h"
+#include "stack_map.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitUntilJitted(JNIEnv* env,
+                                                            jclass,
+                                                            jclass itf,
+                                                            jstring method_name) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return;
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+
+  mirror::Class* klass = soa.Decode<mirror::Class*>(itf);
+  ArtMethod* method = klass->FindDeclaredDirectMethodByName(chars.c_str(), sizeof(void*));
+
+  jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  OatQuickMethodHeader* header = nullptr;
+  // Make sure there is a profiling info, required by the compiler.
+  ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+  while (true) {
+    header = OatQuickMethodHeader::FromEntryPoint(method->GetEntryPointFromQuickCompiledCode());
+    if (code_cache->ContainsPc(header->GetCode())) {
+      break;
+    } else {
+      // Sleep to yield to the compiler thread.
+      usleep(1000);
+      // Will either ensure it's compiled or do the compilation itself.
+      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/test/604-hot-static-interface/info.txt b/test/604-hot-static-interface/info.txt
new file mode 100644
index 0000000..bc00bda
--- /dev/null
+++ b/test/604-hot-static-interface/info.txt
@@ -0,0 +1,2 @@
+Regression test for the JIT that used to crash when compiling
+a static method of an interface.
diff --git a/test/604-hot-static-interface/src/Main.java b/test/604-hot-static-interface/src/Main.java
new file mode 100644
index 0000000..559f15d
--- /dev/null
+++ b/test/604-hot-static-interface/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    // Loop enough to get Itf.foo JITed.
+    for (int i = 0; i < 100000; i++) {
+      Itf.foo(new Object());
+    }
+
+    waitUntilJitted(Itf.class, "foo");
+
+    if (!Itf.foo(new Object())) {
+      throw new Error("Unexpected result");
+    }
+  }
+
+  private static native void waitUntilJitted(Class itf, String method_name);
+}
+
+interface Itf {
+  public static boolean foo(Object o) {
+    return o.equals(o);
+  }
+}
diff --git a/test/605-new-string-from-bytes/expected.txt b/test/605-new-string-from-bytes/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/605-new-string-from-bytes/expected.txt
diff --git a/test/605-new-string-from-bytes/info.txt b/test/605-new-string-from-bytes/info.txt
new file mode 100644
index 0000000..be02c43
--- /dev/null
+++ b/test/605-new-string-from-bytes/info.txt
@@ -0,0 +1,2 @@
+Regression test for the newStringFromBytes entrypoint,
+which used to wrongly setup the stack.
diff --git a/test/605-new-string-from-bytes/src/Main.java b/test/605-new-string-from-bytes/src/Main.java
new file mode 100644
index 0000000..7dc0c15
--- /dev/null
+++ b/test/605-new-string-from-bytes/src/Main.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    Class c = Class.forName("java.lang.StringFactory");
+    Method m = c.getDeclaredMethod("newStringFromBytes", byte[].class, int.class);
+
+    // Loop over allocations to get more chances of doing GC while in the
+    // newStringFromBytes intrinsic.
+    for (int i = 0; i < 10; i++) {
+      try {
+        byte[] f = new byte[100000000];
+        f[0] = (byte)i;
+        f[1] = (byte)i;
+        m.invoke(null, f, 0);
+      } catch (InvocationTargetException e) {
+        if (e.getCause() instanceof OutOfMemoryError) {
+          // Ignore, this is a stress test.
+        } else {
+          throw e;
+        }
+      } catch (OutOfMemoryError e) {
+        // Ignore, this is a stress test.
+      }
+    }
+  }
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index 97204d3..feee7c2 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -35,6 +35,7 @@
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
   148-multithread-gc-annotations/gc_coverage.cc \
+  149-suspend-all-stress/suspend_all.cc \
   454-get-vreg/get_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
@@ -45,7 +46,8 @@
   570-checker-osr/osr.cc \
   595-profile-saving/profile-saving.cc \
   596-app-images/app_images.cc \
-  597-deopt-new-string/deopt.cc
+  597-deopt-new-string/deopt.cc \
+  604-hot-static-interface/hot_static_interface.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index 922eae6..85ea1c8 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -37,17 +37,20 @@
   asserts_enabled = false;
 }
 
-
-// public static native boolean isInterpreted();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) {
+static jboolean IsInterpreted(JNIEnv* env, jclass, size_t level) {
   ScopedObjectAccess soa(env);
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE;
 }
 
+// public static native boolean isInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 1);
+}
+
 // public static native void assertIsInterpreted();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) {
@@ -56,10 +59,7 @@
   }
 }
 
-
-// public static native boolean isManaged();
-
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+static jboolean IsManaged(JNIEnv* env, jclass cls, size_t level) {
   ScopedObjectAccess soa(env);
 
   mirror::Class* klass = soa.Decode<mirror::Class*>(cls);
@@ -71,13 +71,19 @@
     return JNI_FALSE;
   }
 
-  NthCallerVisitor caller(soa.Self(), 1, false);
+  NthCallerVisitor caller(soa.Self(), level, false);
   caller.WalkStack();
   CHECK(caller.caller != nullptr);
 
   return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE;
 }
 
+// public static native boolean isManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 1);
+}
+
 // public static native void assertIsManaged();
 
 extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) {
@@ -86,4 +92,32 @@
   }
 }
 
+// public static native boolean isCallerInterpreted();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerInterpreted(JNIEnv* env, jclass klass) {
+  return IsInterpreted(env, klass, 2);
+}
+
+// public static native void assertCallerIsInterpreted();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass klass) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerInterpreted(env, klass));
+  }
+}
+
+// public static native boolean isCallerManaged();
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isCallerManaged(JNIEnv* env, jclass cls) {
+  return IsManaged(env, cls, 2);
+}
+
+// public static native void assertCallerIsManaged();
+
+extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) {
+  if (asserts_enabled) {
+    CHECK(Java_Main_isCallerManaged(env, cls));
+  }
+}
+
 }  // namespace art
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index aa45d40..64bf4f3 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -42,9 +42,9 @@
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
 if [ "$ART_USE_READ_BARRIER" = "true" ]; then
-  TIME_OUT_VALUE=900  # 15 minutes.
+  TIME_OUT_VALUE=1800  # 30 minutes.
 else
-  TIME_OUT_VALUE=600  # 10 minutes.
+  TIME_OUT_VALUE=1200  # 20 minutes.
 fi
 USE_GDB="n"
 USE_JVM="n"
@@ -199,6 +199,10 @@
         shift
         INSTRUCTION_SET_FEATURES="$1"
         shift
+    elif [ "x$1" = "x--timeout" ]; then
+        shift
+        TIME_OUT_VALUE="$1"
+        shift
     elif [ "x$1" = "x--" ]; then
         shift
         break
@@ -380,14 +384,14 @@
 fi
 
 dex2oat_cmdline="true"
-mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 
 # Pick a base that will force the app image to get relocated.
 app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
 
 if [ "$PREBUILD" = "y" ]; then
-  mkdir_cmdline="${mkdir_cmdline} && mkdir -p ${DEX_LOCATION}/oat/$ISA"
+  mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
   dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
@@ -483,7 +487,7 @@
              export ANDROID_ADDITIONAL_PUBLIC_LIBRARIES=$PUBLIC_LIBS && \
              export DEX_LOCATION=$DEX_LOCATION && \
              export ANDROID_ROOT=$ANDROID_ROOT && \
-             $mkdir_cmdline && \
+             mkdir -p ${mkdir_locations} && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
@@ -558,13 +562,13 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      echo "$mkdir_cmdline && $dex2oat_cmdline && $strip_cmdline && $cmdline"
+      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
 
     rm -rf ${DEX_LOCATION}/dalvik-cache/
-    $mkdir_cmdline || exit 1
+    mkdir -p ${mkdir_locations} || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
diff --git a/test/run-test b/test/run-test
index 2710ea3..3ae063a 100755
--- a/test/run-test
+++ b/test/run-test
@@ -241,7 +241,7 @@
         shift
     elif [ "x$1" = "x--strace" ]; then
         strace="yes"
-        run_args="${run_args} --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
+        run_args="${run_args} --timeout 1800 --invoke-with strace --invoke-with -o --invoke-with $tmp_dir/$strace_output"
         shift
     elif [ "x$1" = "x--zygote" ]; then
         run_args="${run_args} --zygote"
@@ -908,4 +908,8 @@
 
 ) 2>&${real_stderr} 1>&2
 
-exit 1
+if [ "$never_clean" = "yes" ] && [ "$good" = "yes" ]; then
+  exit 0
+else
+  exit 1
+fi
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
new file mode 100644
index 0000000..16bb8fd
--- /dev/null
+++ b/test/valgrind-target-suppressions.txt
@@ -0,0 +1,31 @@
+# Valgrind does not recognize the ashmen ioctl() calls on ARM64, so it assumes that a size
+# parameter is a pointer.
+{
+   ashmem ioctl
+   Memcheck:Param
+   ioctl(generic)
+   ...
+   fun:ioctl
+   fun:ashmem_create_region
+}
+
+# It seems that on ARM64 Valgrind considers the canary value used by the Clang stack protector to
+# be an uninitialized value.
+{
+   jemalloc chunk_alloc_cache
+   Memcheck:Cond
+   fun:je_chunk_alloc_cache
+}
+
+# The VectorImpl class does not hold a pointer to the allocated SharedBuffer structure, but to the
+# beginning of the data, which is effectively an interior pointer. Valgrind has limitations when
+# dealing with interior pointers.
+{
+   VectorImpl
+   Memcheck:Leak
+   match-leak-kinds:possible
+   fun:malloc
+   # The wildcards make this rule work both for 32-bit and 64-bit environments.
+   fun:_ZN7android12SharedBuffer5allocE?
+   fun:_ZN7android10VectorImpl5_growE??
+}
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Device.java b/tools/dexfuzz/src/dexfuzz/executors/Device.java
index 4a53957..45538fe 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Device.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Device.java
@@ -68,7 +68,13 @@
     return envVars.get(key);
   }
 
-  private String getHostCoreImagePath() {
+  private String getHostCoreImagePathWithArch() {
+    // TODO: Using host currently implies x86 (see Options.java), change this when generalized.
+    assert(Options.useArchX86);
+    return androidHostOut + "/framework/x86/core.art";
+  }
+
+  private String getHostCoreImagePathNoArch() {
     return androidHostOut + "/framework/core.art";
   }
 
@@ -80,7 +86,7 @@
     androidHostOut = checkForEnvVar(envVars, "ANDROID_HOST_OUT");
 
     if (Options.executeOnHost) {
-      File coreImage = new File(getHostCoreImagePath());
+      File coreImage = new File(getHostCoreImagePathWithArch());
       if (!coreImage.exists()) {
         Log.errorAndQuit("Host core image not found at " + coreImage.getPath()
             + ". Did you forget to build it?");
@@ -156,7 +162,7 @@
    * Get any extra flags required to execute ART on the host.
    */
   public String getHostExecutionFlags() {
-    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePath());
+    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePathNoArch());
   }
 
   public String getAndroidHostOut() {
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index bdcf86d..f25fb98 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -248,11 +248,6 @@
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
 },
 {
-  description: "Investigate whether the test is making a wrong assumption with the newly enforced classpath.",
-  result: EXEC_FAILED,
-  names: ["dalvik.system.DexClassLoaderTest#testDexThenPathClassLoader"]
-},
-{
   description: "Made for extending, shouldn't be run",
   result: EXEC_FAILED,
   names: ["jsr166.CollectionTest#testEmptyMeansEmpty",