Merge "ahat: Target Java 1.7."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 7ada749..3b7b1e6 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -81,6 +81,7 @@
 	optimizing/x86_memory_gen.cc \
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
+	utils/jni_macro_assembler.cc \
 	utils/swap_space.cc \
 	compiler.cc \
 	elf_writer.cc \
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index f20dba3..a522e0c 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,8 @@
       init_failure_output_(nullptr),
       dump_cfg_file_name_(""),
       dump_cfg_append_(false),
-      force_determinism_(false) {
+      force_determinism_(false),
+      register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault) {
 }
 
 CompilerOptions::~CompilerOptions() {
@@ -74,7 +75,8 @@
                                  bool abort_on_hard_verifier_failure,
                                  const std::string& dump_cfg_file_name,
                                  bool dump_cfg_append,
-                                 bool force_determinism
+                                 bool force_determinism,
+                                 RegisterAllocator::Strategy regalloc_strategy
                                  ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -99,7 +101,8 @@
     init_failure_output_(init_failure_output),
     dump_cfg_file_name_(dump_cfg_file_name),
     dump_cfg_append_(dump_cfg_append),
-    force_determinism_(force_determinism) {
+    force_determinism_(force_determinism),
+    register_allocation_strategy_(regalloc_strategy) {
 }
 
 void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -144,6 +147,19 @@
   }
 }
 
+void CompilerOptions::ParseRegisterAllocationStrategy(const StringPiece& option,
+                                                      UsageFn Usage) {
+  DCHECK(option.starts_with("--register-allocation-strategy="));
+  StringPiece choice = option.substr(strlen("--register-allocation-strategy=")).data();
+  if (choice == "linear-scan") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
+  } else if (choice == "graph-color") {
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+  } else {
+    Usage("Unrecognized register allocation strategy. Try linear-scan, or graph-color.");
+  }
+}
+
 bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usage) {
   if (option.starts_with("--compiler-filter=")) {
     const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data();
@@ -190,6 +206,8 @@
     dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
   } else if (option.starts_with("--dump-cfg-append")) {
     dump_cfg_append_ = true;
+  } else if (option.starts_with("--register-allocation-strategy=")) {
+    ParseRegisterAllocationStrategy(option, Usage);
   } else {
     // Option not recognized.
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 60b700a..cc66d7a 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -24,6 +24,7 @@
 #include "base/macros.h"
 #include "compiler_filter.h"
 #include "globals.h"
+#include "optimizing/register_allocator.h"
 #include "utils.h"
 
 namespace art {
@@ -74,7 +75,8 @@
                   bool abort_on_hard_verifier_failure,
                   const std::string& dump_cfg_file_name,
                   bool dump_cfg_append,
-                  bool force_determinism);
+                  bool force_determinism,
+                  RegisterAllocator::Strategy regalloc_strategy);
 
   CompilerFilter::Filter GetCompilerFilter() const {
     return compiler_filter_;
@@ -244,6 +246,10 @@
     return force_determinism_;
   }
 
+  RegisterAllocator::Strategy GetRegisterAllocationStrategy() const {
+    return register_allocation_strategy_;
+  }
+
  private:
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -254,6 +260,7 @@
   void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage);
+  void ParseRegisterAllocationStrategy(const StringPiece& option, UsageFn Usage);
 
   CompilerFilter::Filter compiler_filter_;
   size_t huge_method_threshold_;
@@ -297,6 +304,8 @@
   // outcomes.
   bool force_determinism_;
 
+  RegisterAllocator::Strategy register_allocation_strategy_;
+
   friend class Dex2Oat;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 1785338..2dd87a8 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -32,6 +32,7 @@
 #include "oat_file-inl.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
+#include "optimizing/register_allocator.h"
 #include "thread_list.h"
 
 namespace art {
@@ -110,7 +111,8 @@
       /* abort_on_hard_verifier_failure */ false,
       /* dump_cfg_file_name */ "",
       /* dump_cfg_append */ false,
-      /* force_determinism */ false));
+      /* force_determinism */ false,
+      RegisterAllocator::kRegisterAllocatorDefault));
   for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
     compiler_options_->ParseCompilerOption(argument, Usage);
   }
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 3526802..524ce4d 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -19,10 +19,12 @@
 
 #include "arch/instruction_set.h"
 #include "base/arena_allocator.h"
+#include "base/enums.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 #include "jni/jni_cfi_test_expected.inc"
 
@@ -36,9 +38,23 @@
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
+  void TestImpl(InstructionSet isa,
+                const char* isa_str,
                 const std::vector<uint8_t>& expected_asm,
                 const std::vector<uint8_t>& expected_cfi) {
+    if (Is64BitInstructionSet(isa)) {
+      TestImplSized<PointerSize::k64>(isa, isa_str, expected_asm, expected_cfi);
+    } else {
+      TestImplSized<PointerSize::k32>(isa, isa_str, expected_asm, expected_cfi);
+    }
+  }
+
+ private:
+  template <PointerSize kPointerSize>
+  void TestImplSized(InstructionSet isa,
+                     const char* isa_str,
+                     const std::vector<uint8_t>& expected_asm,
+                     const std::vector<uint8_t>& expected_cfi) {
     // Description of simple method.
     const bool is_static = true;
     const bool is_synchronized = false;
@@ -55,7 +71,8 @@
     ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
+    std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm(
+        JNIMacroAssembler<kPointerSize>::Create(&arena, isa));
     jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 277b794..f99f6a8 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -26,6 +26,7 @@
 #include "base/enums.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "memory_region.h"
 #include "calling_convention.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -34,7 +35,9 @@
 #include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
+#include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/managed_register.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -47,22 +50,32 @@
 
 namespace art {
 
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size);
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg);
 
+template <PointerSize kPointerSize>
+static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler(
+    ArenaAllocator* arena, InstructionSet isa, const InstructionSetFeatures* features) {
+  return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
+}
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
 //   convention.
 //
-CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
-                                            uint32_t access_flags, uint32_t method_idx,
-                                            const DexFile& dex_file) {
+template <PointerSize kPointerSize>
+static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
+                                                   uint32_t access_flags,
+                                                   uint32_t method_idx,
+                                                   const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -70,7 +83,6 @@
   const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
-  const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
@@ -101,8 +113,8 @@
       &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(
-      Assembler::Create(&arena, instruction_set, instruction_set_features));
+  std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
+      GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
@@ -124,21 +136,12 @@
                            main_jni_conv->ReferenceCount(),
                            mr_conv->InterproceduralScratchRegister());
 
-  if (is_64_bit_target) {
-    __ CopyRawPtrFromThread64(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<PointerSize::k64>(),
+  __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
+                          Thread::TopHandleScopeOffset<kPointerSize>(),
+                          mr_conv->InterproceduralScratchRegister());
+  __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
+                              main_jni_conv->HandleScopeOffset(),
                               mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread64(Thread::TopHandleScopeOffset<PointerSize::k64>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  } else {
-    __ CopyRawPtrFromThread32(main_jni_conv->HandleScopeLinkOffset(),
-                              Thread::TopHandleScopeOffset<PointerSize::k32>(),
-                              mr_conv->InterproceduralScratchRegister());
-    __ StoreStackOffsetToThread32(Thread::TopHandleScopeOffset<PointerSize::k32>(),
-                                  main_jni_conv->HandleScopeOffset(),
-                                  mr_conv->InterproceduralScratchRegister());
-  }
 
   // 3. Place incoming reference arguments into handle scope
   main_jni_conv->Next();  // Skip JNIEnv*
@@ -188,11 +191,7 @@
   }
 
   // 4. Write out the end of the quick frames.
-  if (is_64_bit_target) {
-    __ StoreStackPointerToThread64(Thread::TopOfManagedStackOffset<PointerSize::k64>());
-  } else {
-    __ StoreStackPointerToThread32(Thread::TopOfManagedStackOffset<PointerSize::k32>());
-  }
+  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -202,10 +201,8 @@
   // Call the read barrier for the declaring class loaded from the method for a static call.
   // Note that we always have outgoing param space available for at least two params.
   if (kUseReadBarrier && is_static) {
-    ThreadOffset32 read_barrier32 =
-        QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pReadBarrierJni);
-    ThreadOffset64 read_barrier64 =
-        QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pReadBarrierJni);
+    ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
+                                                                      pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     main_jni_conv->Next();  // Skip JNIEnv.
     FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
@@ -225,21 +222,13 @@
     // Pass the current thread as the second argument and call.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      if (is_64_bit_target) {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
-                main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
-                main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ Call(main_jni_conv->CurrentParamRegister(),
+              Offset(read_barrier),
+              main_jni_conv->InterproceduralScratchRegister());
     } else {
       __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                           main_jni_conv->InterproceduralScratchRegister());
-      if (is_64_bit_target) {
-        __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
-      } else {
-        __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
-      }
+      __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
   }
@@ -248,14 +237,10 @@
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset32 jni_start32 =
+  ThreadOffset<kPointerSize> jni_start =
       is_synchronized
-          ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodStartSynchronized)
-          : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodStart);
-  ThreadOffset64 jni_start64 =
-      is_synchronized
-          ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodStartSynchronized)
-          : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodStart);
+          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+          : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
   FrameOffset locked_object_handle_scope_offset(0);
   if (is_synchronized) {
@@ -276,21 +261,13 @@
   }
   if (main_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start64),
-              main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start32),
-              main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(main_jni_conv->CurrentParamRegister(),
+            Offset(jni_start),
+            main_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
                         main_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(jni_start64, main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(jni_start32, main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
   }
   if (is_synchronized) {  // Check for exceptions from monitor enter.
     __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
@@ -352,20 +329,12 @@
   if (main_jni_conv->IsCurrentParamInRegister()) {
     ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
     DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    if (is_64_bit_target) {
-      __ LoadRawPtrFromThread64(jni_env, Thread::JniEnvOffset<PointerSize::k64>());
-    } else {
-      __ LoadRawPtrFromThread32(jni_env, Thread::JniEnvOffset<PointerSize::k32>());
-    }
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
   } else {
     FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    if (is_64_bit_target) {
-      __ CopyRawPtrFromThread64(jni_env, Thread::JniEnvOffset<PointerSize::k64>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CopyRawPtrFromThread32(jni_env, Thread::JniEnvOffset<PointerSize::k32>(),
-                                main_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CopyRawPtrFromThread(jni_env,
+                            Thread::JniEnvOffset<kPointerSize>(),
+                            main_jni_conv->InterproceduralScratchRegister());
   }
 
   // 9. Plant call to native code associated with method.
@@ -398,7 +367,9 @@
                                              + static_cast<size_t>(kMipsPointerSize));
     }
     CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
-    __ Store(return_save_location, main_jni_conv->ReturnRegister(), main_jni_conv->SizeOfReturnValue());
+    __ Store(return_save_location,
+             main_jni_conv->ReturnRegister(),
+             main_jni_conv->SizeOfReturnValue());
   }
 
   // Increase frame size for out args if needed by the end_jni_conv.
@@ -414,27 +385,18 @@
   }
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-  ThreadOffset32 jni_end32(-1);
-  ThreadOffset64 jni_end64(-1);
+  ThreadOffset<kPointerSize> jni_end(-1);
   if (reference_return) {
     // Pass result.
-    jni_end32 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32,
-                                  pJniMethodEndWithReferenceSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEndWithReference);
-    jni_end64 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64,
-                                  pJniMethodEndWithReferenceSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEndWithReference);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
     SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
     end_jni_conv->Next();
   } else {
-    jni_end32 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEndSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k32, pJniMethodEnd);
-    jni_end64 = is_synchronized
-        ? QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEndSynchronized)
-        : QUICK_ENTRYPOINT_OFFSET(PointerSize::k64, pJniMethodEnd);
+    jni_end = is_synchronized
+                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
   }
   // Pass saved local reference state.
   if (end_jni_conv->IsCurrentParamOnStack()) {
@@ -461,23 +423,13 @@
   }
   if (end_jni_conv->IsCurrentParamInRegister()) {
     __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    if (is_64_bit_target) {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end64),
-              end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end32),
-              end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ Call(end_jni_conv->CurrentParamRegister(),
+            Offset(jni_end),
+            end_jni_conv->InterproceduralScratchRegister());
   } else {
     __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
                         end_jni_conv->InterproceduralScratchRegister());
-    if (is_64_bit_target) {
-      __ CallFromThread64(ThreadOffset64(jni_end64),
-                          end_jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ CallFromThread32(ThreadOffset32(jni_end32),
-                          end_jni_conv->InterproceduralScratchRegister());
-    }
+    __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
   }
 
   // 13. Reload return value
@@ -517,7 +469,8 @@
 }
 
 // Copy a single parameter from the managed to the JNI calling convention.
-static void CopyParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
                           size_t frame_size, size_t out_arg_size) {
@@ -606,7 +559,8 @@
   }
 }
 
-static void SetNativeParameter(Assembler* jni_asm,
+template <PointerSize kPointerSize>
+static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                                JniCallingConvention* jni_conv,
                                ManagedRegister in_reg) {
   if (jni_conv->IsCurrentParamOnStack()) {
@@ -621,7 +575,13 @@
 
 CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler, uint32_t access_flags,
                                          uint32_t method_idx, const DexFile& dex_file) {
-  return ArtJniCompileMethodInternal(compiler, access_flags, method_idx, dex_file);
+  if (Is64BitInstructionSet(compiler->GetInstructionSet())) {
+    return ArtJniCompileMethodInternal<PointerSize::k64>(
+        compiler, access_flags, method_idx, dex_file);
+  } else {
+    return ArtJniCompileMethodInternal<PointerSize::k32>(
+        compiler, access_flags, method_idx, dex_file);
+  }
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index c105940..5eaf11e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -432,6 +432,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -512,6 +517,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -4467,8 +4477,6 @@
   Primitive::Type type = instruction->GetType();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4503,6 +4511,11 @@
     }
 
     case Primitive::kPrimNot: {
+      // The read barrier instrumentation of object ArrayGet
+      // instructions does not support the HIntermediateAddress
+      // instruction.
+      DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
@@ -4645,8 +4658,6 @@
   Location value_loc = locations->InAt(2);
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -4911,8 +4922,6 @@
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
 
@@ -4927,9 +4936,6 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
-
   if (second.IsRegister()) {
     __ add(out.AsRegister<Register>(),
            first.AsRegister<Register>(),
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 54c9efc..9ceb310 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -598,6 +598,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
@@ -680,7 +685,9 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
@@ -1983,8 +1990,6 @@
 }
 
 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1992,10 +1997,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2091,11 +2093,15 @@
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
+  // The read barrier instrumentation of object ArrayGet instructions
+  // does not support the HIntermediateAddress instruction.
+  DCHECK(!((type == Primitive::kPrimNot) &&
+           instruction->GetArray()->IsIntermediateAddress() &&
+           kEmitCompilerReadBarrier));
+
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2109,9 +2115,6 @@
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2201,9 +2204,6 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2223,7 +2223,6 @@
     codegen_->Store(value_type, value, destination);
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   } else {
-    DCHECK(needs_write_barrier);
     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     vixl::aarch64::Label done;
     SlowPathCodeARM64* slow_path = nullptr;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 8f7778f..6632cd9 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -231,15 +231,6 @@
                                   HInstruction* array,
                                   HInstruction* index,
                                   size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HIntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 and ARM code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return false;
-  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -251,6 +242,13 @@
     // The access may require a runtime call or the original array pointer.
     return false;
   }
+  if (kEmitCompilerReadBarrier &&
+      access->IsArrayGet() &&
+      access->AsArrayGet()->GetType() == Primitive::kPrimNot) {
+    // For object arrays, the read barrier instrumentation requires
+    // the original array pointer.
+    return false;
+  }
 
   // Proceed to extract the base address computation.
   HGraph* graph = access->GetBlock()->GetGraph();
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 2e94ad0..5fdfb9b 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -653,8 +653,7 @@
   // Whether these are locations for an intrinsified call.
   bool intrinsified_;
 
-  ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
-  ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
+  friend class RegisterAllocatorTest;
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d5b0d77..30da69f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -546,7 +546,8 @@
 NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
+                              PassObserver* pass_observer,
+                              RegisterAllocator::Strategy strategy) {
   {
     PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
                     pass_observer);
@@ -559,7 +560,7 @@
   }
   {
     PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+    RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters();
   }
 }
 
@@ -626,7 +627,9 @@
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
-  AllocateRegisters(graph, codegen, pass_observer);
+  RegisterAllocator::Strategy regalloc_strategy =
+      driver->GetCompilerOptions().GetRegisterAllocationStrategy();
+  AllocateRegisters(graph, codegen, pass_observer, regalloc_strategy);
 }
 
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index e3c44c6..55ea99e 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -31,12 +31,29 @@
 
 namespace art {
 
+using Strategy = RegisterAllocator::Strategy;
+
 // Note: the register allocator tests rely on the fact that constants have live
 // intervals and registers get allocated to them.
 
-class RegisterAllocatorTest : public CommonCompilerTest {};
+class RegisterAllocatorTest : public CommonCompilerTest {
+ protected:
+  // These functions need to access private variables of LocationSummary, so we declare it
+  // as a member of RegisterAllocatorTest, which we make a friend class.
+  static void SameAsFirstInputHint(Strategy strategy);
+  static void ExpectedInRegisterHint(Strategy strategy);
+};
 
-static bool Check(const uint16_t* data) {
+// This macro should include all register allocation strategies that should be tested.
+#define TEST_ALL_STRATEGIES(test_name)\
+TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
+  test_name(Strategy::kRegisterAllocatorLinearScan);\
+}\
+TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+  test_name(Strategy::kRegisterAllocatorGraphColor);\
+}
+
+static bool Check(const uint16_t* data, Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HGraph* graph = CreateCFG(&allocator, data);
@@ -45,7 +62,8 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   return register_allocator->Validate(false);
 }
@@ -143,7 +161,7 @@
   }
 }
 
-TEST_F(RegisterAllocatorTest, CFG1) {
+static void CFG1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  return 0;
@@ -160,10 +178,12 @@
     Instruction::CONST_4 | 0 | 0,
     Instruction::RETURN);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop1) {
+TEST_ALL_STRATEGIES(CFG1);
+
+static void Loop1(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -199,10 +219,12 @@
     Instruction::CONST_4 | 5 << 12 | 1 << 8,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop2) {
+TEST_ALL_STRATEGIES(Loop1);
+
+static void Loop2(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0;
@@ -248,10 +270,12 @@
     Instruction::ADD_INT, 1 << 8 | 0,
     Instruction::RETURN | 1 << 8);
 
-  ASSERT_TRUE(Check(data));
+  ASSERT_TRUE(Check(data, strategy));
 }
 
-TEST_F(RegisterAllocatorTest, Loop3) {
+TEST_ALL_STRATEGIES(Loop2);
+
+static void Loop3(Strategy strategy) {
   /*
    * Test the following snippet:
    *  int a = 0
@@ -296,7 +320,8 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   ASSERT_TRUE(register_allocator->Validate(false));
 
@@ -314,6 +339,8 @@
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
 
+TEST_ALL_STRATEGIES(Loop3);
+
 TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
   const uint16_t data[] = THREE_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -354,7 +381,7 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition());
 }
 
-TEST_F(RegisterAllocatorTest, DeadPhi) {
+static void DeadPhi(Strategy strategy) {
   /* Test for a dead loop phi taking as back-edge input a phi that also has
    * this loop phi as input. Walking backwards in SsaDeadPhiElimination
    * does not solve the problem because the loop phi will be visited last.
@@ -385,11 +412,14 @@
   x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
-  RegisterAllocator* register_allocator = RegisterAllocator::Create(&allocator, &codegen, liveness);
+  RegisterAllocator* register_allocator =
+      RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
   register_allocator->AllocateRegisters();
   ASSERT_TRUE(register_allocator->Validate(false));
 }
 
+TEST_ALL_STRATEGIES(DeadPhi);
+
 /**
  * Test that the TryAllocateFreeReg method works in the presence of inactive intervals
  * that share the same register. It should split the interval it is currently
@@ -508,15 +538,15 @@
                                               graph->GetDexFile(),
                                               dex_cache,
                                               0);
-*input2 = new (allocator) HInstanceFieldGet(parameter,
-                                            Primitive::kPrimInt,
-                                            MemberOffset(42),
-                                            false,
-                                            kUnknownFieldIndex,
-                                            kUnknownClassDefIndex,
-                                            graph->GetDexFile(),
-                                            dex_cache,
-                                            0);
+  *input2 = new (allocator) HInstanceFieldGet(parameter,
+                                              Primitive::kPrimInt,
+                                              MemberOffset(42),
+                                              false,
+                                              kUnknownFieldIndex,
+                                              kUnknownClassDefIndex,
+                                              graph->GetDexFile(),
+                                              dex_cache,
+                                              0);
   then->AddInstruction(*input1);
   else_->AddInstruction(*input2);
   join->AddInstruction(new (allocator) HExit());
@@ -528,7 +558,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, PhiHint) {
+static void PhiHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HPhi *phi;
@@ -544,7 +574,7 @@
 
     // Check that the register allocator is deterministic.
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
@@ -564,7 +594,7 @@
     // the same register.
     phi->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -584,7 +614,7 @@
     // the same register.
     input1->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -604,7 +634,7 @@
     // the same register.
     input2->GetLocations()->UpdateOut(Location::RegisterLocation(2));
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
@@ -613,6 +643,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) {
+  PhiHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
                                 HInstruction** field,
                                 HInstruction** ret) {
@@ -651,7 +687,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) {
+void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *field, *ret;
@@ -665,7 +701,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
@@ -685,13 +721,19 @@
     ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2);
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) {
+  ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
                             HInstruction** first_sub,
                             HInstruction** second_sub) {
@@ -721,7 +763,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) {
+void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *first_sub, *second_sub;
@@ -735,7 +777,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // Sanity check that in normal conditions, the registers are the same.
@@ -758,7 +800,7 @@
     ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2);
@@ -766,6 +808,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) {
+  SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 static HGraph* BuildDiv(ArenaAllocator* allocator,
                         HInstruction** div) {
   HGraph* graph = CreateGraph(allocator);
@@ -792,7 +840,7 @@
   return graph;
 }
 
-TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
+static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
   HInstruction *div;
@@ -806,7 +854,7 @@
     liveness.Analyze();
 
     RegisterAllocator* register_allocator =
-        RegisterAllocator::Create(&allocator, &codegen, liveness);
+        RegisterAllocator::Create(&allocator, &codegen, liveness, strategy);
     register_allocator->AllocateRegisters();
 
     // div on x86 requires its first input in eax and the output be the same as the first input.
@@ -814,6 +862,12 @@
   }
 }
 
+// TODO: Enable this test for graph coloring register allocation when iterative move
+//       coalescing is merged.
+TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearScan) {
+  ExpectedExactInRegisterAndSameOutputHint(Strategy::kRegisterAllocatorLinearScan);
+}
+
 // Test a bug in the register allocator, where allocating a blocked
 // register would lead to spilling an inactive interval at the wrong
 // position.
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 1796b39..aadc43f 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -568,15 +568,6 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void ArmAssembler::StoreImmediateToThread32(ThreadOffset32 dest,
-                                            uint32_t imm,
-                                            ManagedRegister mscratch) {
-  ArmManagedRegister scratch = mscratch.AsArm();
-  CHECK(scratch.IsCoreRegister()) << scratch;
-  LoadImmediate(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), TR, dest.Int32Value());
-}
-
 static void EmitLoad(ArmAssembler* assembler, ManagedRegister m_dst,
                      Register src_register, int32_t src_offset, size_t size) {
   ArmManagedRegister dst = m_dst.AsArm();
@@ -601,19 +592,19 @@
   return EmitLoad(this, m_dst, SP, src.Int32Value(), size);
 }
 
-void ArmAssembler::LoadFromThread32(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
+void ArmAssembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset32 src, size_t size) {
   return EmitLoad(this, m_dst, TR, src.Int32Value(), size);
 }
 
-void ArmAssembler::LoadRawPtrFromThread32(ManagedRegister m_dst, ThreadOffset32 offs) {
+void ArmAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
   ArmManagedRegister dst = m_dst.AsArm();
   CHECK(dst.IsCoreRegister()) << dst;
   LoadFromOffset(kLoadWord, dst.AsCoreRegister(), TR, offs.Int32Value());
 }
 
-void ArmAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                          ThreadOffset32 thr_offs,
-                                          ManagedRegister mscratch) {
+void ArmAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                        ThreadOffset32 thr_offs,
+                                        ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -622,9 +613,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void ArmAssembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister mscratch) {
+void ArmAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                      FrameOffset fr_offs,
+                                      ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -633,9 +624,9 @@
                 TR, thr_offs.Int32Value());
 }
 
-void ArmAssembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister mscratch) {
+void ArmAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                            FrameOffset fr_offs,
+                                            ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
   CHECK(scratch.IsCoreRegister()) << scratch;
   AddConstant(scratch.AsCoreRegister(), SP, fr_offs.Int32Value(), AL);
@@ -643,7 +634,7 @@
                 TR, thr_offs.Int32Value());
 }
 
-void ArmAssembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
+void ArmAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   StoreToOffset(kStoreWord, SP, TR, thr_offs.Int32Value());
 }
 
@@ -832,8 +823,8 @@
   // TODO: place reference map on call
 }
 
-void ArmAssembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                    ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void ArmAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                  ManagedRegister scratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
 }
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 2b7414d..bb88e6f 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -23,12 +23,14 @@
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "offsets.h"
 
 namespace art {
@@ -433,10 +435,19 @@
 // This is an abstract ARM assembler.  Subclasses provide assemblers for the individual
 // instruction sets (ARM32, Thumb2, etc.)
 //
-class ArmAssembler : public Assembler {
+class ArmAssembler : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
   virtual ~ArmAssembler() {}
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+  void FinalizeCode() OVERRIDE {
+    Assembler::FinalizeCode();
+  }
+  void FinalizeInstructions(const MemoryRegion& region) {
+    Assembler::FinalizeInstructions(region);
+  }
+
   // Is this assembler for the thumb instruction set?
   virtual bool IsThumb() const = 0;
 
@@ -904,13 +915,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
                      ManagedRegister scratch) OVERRIDE;
@@ -918,7 +927,7 @@
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -927,15 +936,16 @@
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset32 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
 
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
@@ -990,7 +1000,7 @@
   // Call to address held at [base+offset]
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index dc1f24a..53685bf 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -164,25 +164,16 @@
                  offs.Int32Value());
 }
 
-void Arm64Assembler::StoreImmediateToThread64(ThreadOffset64 offs,
-                                              uint32_t imm,
+void Arm64Assembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
+                                              FrameOffset fr_offs,
                                               ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
-}
-
-void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset64 tr_offs,
-                                                FrameOffset fr_offs,
-                                                ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
   AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
-void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset64 tr_offs) {
+void Arm64Assembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
   UseScratchRegisterScope temps(&vixl_masm_);
   Register temp = temps.AcquireX();
   ___ Mov(temp, reg_x(SP));
@@ -286,7 +277,7 @@
   return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
 }
 
-void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset64 src, size_t size) {
+void Arm64Assembler::LoadFromThread(ManagedRegister m_dst, ThreadOffset64 src, size_t size) {
   return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
 }
 
@@ -319,7 +310,7 @@
   ___ Ldr(reg_x(dst.AsXRegister()), MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
 }
 
-void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset64 offs) {
+void Arm64Assembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset64 offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsXRegister()) << dst;
   LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
@@ -355,18 +346,18 @@
   }
 }
 
-void Arm64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                            ThreadOffset64 tr_offs,
-                                            ManagedRegister m_scratch) {
+void Arm64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                          ThreadOffset64 tr_offs,
+                                          ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
   StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
 }
 
-void Arm64Assembler::CopyRawPtrToThread64(ThreadOffset64 tr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister m_scratch) {
+void Arm64Assembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
@@ -543,8 +534,8 @@
   ___ Blr(reg_x(scratch.AsXRegister()));
 }
 
-void Arm64Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                      ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void Arm64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                    ManagedRegister scratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index b8434b9..d7084da 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -22,9 +22,11 @@
 #include <vector>
 
 #include "base/arena_containers.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "offsets.h"
 
 // TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn
@@ -81,7 +83,7 @@
   DISALLOW_COPY_AND_ASSIGN(Arm64Exception);
 };
 
-class Arm64Assembler FINAL : public Assembler {
+class Arm64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
   explicit Arm64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
@@ -91,6 +93,8 @@
 
   vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; }
 
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   // Finalize the code.
   void FinalizeCode() OVERRIDE;
 
@@ -122,28 +126,28 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE;
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
-  void StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
                      ManagedRegister scratch) OVERRIDE;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
                bool unpoison_reference) OVERRIDE;
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset64 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       OVERRIDE;
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE;
@@ -196,7 +200,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
 
   // Jump to address (not setting link register)
   void JumpTo(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch);
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 0a1b733..81159e6 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -121,137 +121,4 @@
   }
 }
 
-std::unique_ptr<Assembler> Assembler::Create(
-    ArenaAllocator* arena,
-    InstructionSet instruction_set,
-    const InstructionSetFeatures* instruction_set_features) {
-  switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
-    case kArm:
-      return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena));
-    case kThumb2:
-      return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-    case kArm64:
-      return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips
-    case kMips:
-      return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler(
-          arena,
-          instruction_set_features != nullptr
-              ? instruction_set_features->AsMipsInstructionSetFeatures()
-              : nullptr));
-#endif
-#ifdef ART_ENABLE_CODEGEN_mips64
-    case kMips64:
-      return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86
-    case kX86:
-      return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena));
-#endif
-#ifdef ART_ENABLE_CODEGEN_x86_64
-    case kX86_64:
-      return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena));
-#endif
-    default:
-      LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
-  }
-}
-
-void Assembler::StoreImmediateToThread32(ThreadOffset32 dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreImmediateToThread64(ThreadOffset64 dest ATTRIBUTE_UNUSED,
-                                         uint32_t imm ATTRIBUTE_UNUSED,
-                                         ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread32(
-    ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-    FrameOffset fr_offs ATTRIBUTE_UNUSED,
-    ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackOffsetToThread64(
-    ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-    FrameOffset fr_offs ATTRIBUTE_UNUSED,
-    ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread32(
-    ThreadOffset32 thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::StoreStackPointerToThread64(
-    ThreadOffset64 thr_offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset32 src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                 ThreadOffset64 src ATTRIBUTE_UNUSED,
-                                 size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread32(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset32 offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::LoadRawPtrFromThread64(ManagedRegister dest ATTRIBUTE_UNUSED,
-                                       ThreadOffset64 offs ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                       ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-                                       ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs ATTRIBUTE_UNUSED,
-                                     FrameOffset fr_offs ATTRIBUTE_UNUSED,
-                                     ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
-void Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                 ManagedRegister scratch ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
-}
-
 }  // namespace art
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 89f7947..8981776 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -356,11 +356,6 @@
 
 class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static std::unique_ptr<Assembler> Create(
-      ArenaAllocator* arena,
-      InstructionSet instruction_set,
-      const InstructionSetFeatures* instruction_set_features = nullptr);
-
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
 
@@ -376,144 +371,6 @@
   // TODO: Implement with disassembler.
   virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
 
-  // Emit code that will create an activation on the stack
-  virtual void BuildFrame(size_t frame_size,
-                          ManagedRegister method_reg,
-                          ArrayRef<const ManagedRegister> callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
-
-  // Emit code that will remove an activation from the stack
-  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
-
-  virtual void IncreaseFrameSize(size_t adjust) = 0;
-  virtual void DecreaseFrameSize(size_t adjust) = 0;
-
-  // Store routines
-  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
-  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
-  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
-
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
-
-  virtual void StoreImmediateToThread32(ThreadOffset32 dest,
-                                        uint32_t imm,
-                                        ManagedRegister scratch);
-  virtual void StoreImmediateToThread64(ThreadOffset64 dest,
-                                        uint32_t imm,
-                                        ManagedRegister scratch);
-
-  virtual void StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-  virtual void StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                          FrameOffset fr_offs,
-                                          ManagedRegister scratch);
-
-  virtual void StoreStackPointerToThread32(ThreadOffset32 thr_offs);
-  virtual void StoreStackPointerToThread64(ThreadOffset64 thr_offs);
-
-  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
-                             FrameOffset in_off, ManagedRegister scratch) = 0;
-
-  // Load routines
-  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
-
-  virtual void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size);
-  virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size);
-
-  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
-  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
-  virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-                       bool unpoison_reference) = 0;
-
-  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
-
-  virtual void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs);
-  virtual void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs);
-
-  // Copying routines
-  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
-
-  virtual void CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                      ThreadOffset32 thr_offs,
-                                      ManagedRegister scratch);
-  virtual void CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                      ThreadOffset64 thr_offs,
-                                      ManagedRegister scratch);
-
-  virtual void CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                    FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-  virtual void CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                                    FrameOffset fr_offs,
-                                    ManagedRegister scratch);
-
-  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(ManagedRegister dest, Offset dest_offset,
-                    ManagedRegister src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
-                    ManagedRegister scratch, size_t size) = 0;
-
-  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
-
-  // Sign extension
-  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Zero extension
-  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
-
-  // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
-
-  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed. in_reg holds a possibly stale reference
-  // that can be used to avoid loading the handle scope entry to see if the value is
-  // null.
-  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
-                                      FrameOffset handlescope_offset,
-                                      ManagedRegister in_reg,
-                                      bool null_allowed) = 0;
-
-  // Set up out_off to hold a Object** into the handle scope, or to be null if the
-  // value is null and null_allowed.
-  virtual void CreateHandleScopeEntry(FrameOffset out_off,
-                                      FrameOffset handlescope_offset,
-                                      ManagedRegister scratch,
-                                      bool null_allowed) = 0;
-
-  // src holds a handle scope entry (Object**) load this into dst
-  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
-
-  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
-  // know that src may not be null.
-  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
-  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
-
-  // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch);
-  virtual void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch);
-
-  // Generate code to check if Thread::Current()->exception_ is non-null
-  // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
-
   virtual void Bind(Label* label) = 0;
   virtual void Jump(Label* label) = 0;
 
@@ -525,13 +382,17 @@
    */
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
- protected:
-  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
-
   ArenaAllocator* GetArena() {
     return buffer_.GetArena();
   }
 
+  AssemblerBuffer* GetBuffer() {
+    return &buffer_;
+  }
+
+ protected:
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
+
   AssemblerBuffer buffer_;
 
   DebugFrameOpCodeWriterForAssembler cfi_;
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
new file mode 100644
index 0000000..6c14888
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler.h"
+
+#include <algorithm>
+#include <vector>
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "arm/assembler_arm32.h"
+#include "arm/assembler_thumb2.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "arm64/assembler_arm64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "mips/assembler_mips.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "mips64/assembler_mips64.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "x86/assembler_x86.h"
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "x86_64/assembler_x86_64.h"
+#endif
+#include "base/casts.h"
+#include "globals.h"
+#include "memory_region.h"
+
+namespace art {
+
+using MacroAsm32UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k32>>;
+
+template <>
+MacroAsm32UniquePtr JNIMacroAssembler<PointerSize::k32>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips
+  UNUSED(instruction_set_features);
+#endif
+
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kArm:
+      return MacroAsm32UniquePtr(new (arena) arm::Arm32Assembler(arena));
+    case kThumb2:
+      return MacroAsm32UniquePtr(new (arena) arm::Thumb2Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+    case kMips:
+      return MacroAsm32UniquePtr(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+    case kX86:
+      return MacroAsm32UniquePtr(new (arena) x86::X86Assembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 4B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+using MacroAsm64UniquePtr = std::unique_ptr<JNIMacroAssembler<PointerSize::k64>>;
+
+template <>
+MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+  switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+    case kArm64:
+      return MacroAsm64UniquePtr(new (arena) arm64::Arm64Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+    case kMips64:
+      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+    case kX86_64:
+      return MacroAsm64UniquePtr(new (arena) x86_64::X86_64Assembler(arena));
+#endif
+    default:
+      LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set;
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
new file mode 100644
index 0000000..6f45bd6
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
+#include "base/enums.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "managed_register.h"
+#include "offsets.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class ArenaAllocator;
+class DebugFrameOpCodeWriterForAssembler;
+class InstructionSetFeatures;
+class MemoryRegion;
+
+template <PointerSize kPointerSize>
+class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
+ public:
+  static std::unique_ptr<JNIMacroAssembler<kPointerSize>> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
+
+  // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
+  virtual void FinalizeCode() = 0;
+
+  // Size of generated code
+  virtual size_t CodeSize() const = 0;
+
+  // Copy instructions out of assembly buffer into the given region of memory
+  virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+
+  // Emit code that will create an activation on the stack
+  virtual void BuildFrame(size_t frame_size,
+                          ManagedRegister method_reg,
+                          ArrayRef<const ManagedRegister> callee_save_regs,
+                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+
+  // Emit code that will remove an activation from the stack
+  virtual void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs) = 0;
+
+  virtual void IncreaseFrameSize(size_t adjust) = 0;
+  virtual void DecreaseFrameSize(size_t adjust) = 0;
+
+  // Store routines
+  virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
+  virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
+  virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
+
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+
+  virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
+                                        FrameOffset fr_offs,
+                                        ManagedRegister scratch) = 0;
+
+  virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
+
+  virtual void StoreSpanning(FrameOffset dest,
+                             ManagedRegister src,
+                             FrameOffset in_off,
+                             ManagedRegister scratch) = 0;
+
+  // Load routines
+  virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
+
+  virtual void LoadFromThread(ManagedRegister dest,
+                              ThreadOffset<kPointerSize> src,
+                              size_t size) = 0;
+
+  virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
+  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
+  virtual void LoadRef(ManagedRegister dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
+
+  virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
+
+  virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
+
+  // Copying routines
+  virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
+
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
+                                    ThreadOffset<kPointerSize> thr_offs,
+                                    ManagedRegister scratch) = 0;
+
+  virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
+                                  FrameOffset fr_offs,
+                                  ManagedRegister scratch) = 0;
+
+  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+
+  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    ManagedRegister src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest_base,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    FrameOffset src_base,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(ManagedRegister dest,
+                    Offset dest_offset,
+                    ManagedRegister src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void Copy(FrameOffset dest,
+                    Offset dest_offset,
+                    FrameOffset src,
+                    Offset src_offset,
+                    ManagedRegister scratch,
+                    size_t size) = 0;
+
+  virtual void MemoryBarrier(ManagedRegister scratch) = 0;
+
+  // Sign extension
+  virtual void SignExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Zero extension
+  virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
+
+  // Exploit fast access in managed code to Thread::Current()
+  virtual void GetCurrentThread(ManagedRegister tr) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+
+  // Set up out_reg to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed. in_reg holds a possibly stale reference
+  // that can be used to avoid loading the handle scope entry to see if the value is
+  // null.
+  virtual void CreateHandleScopeEntry(ManagedRegister out_reg,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister in_reg,
+                                      bool null_allowed) = 0;
+
+  // Set up out_off to hold a Object** into the handle scope, or to be null if the
+  // value is null and null_allowed.
+  virtual void CreateHandleScopeEntry(FrameOffset out_off,
+                                      FrameOffset handlescope_offset,
+                                      ManagedRegister scratch,
+                                      bool null_allowed) = 0;
+
+  // src holds a handle scope entry (Object**) load this into dst
+  virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) = 0;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
+  virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
+
+  // Call to address held at [base+offset]
+  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to a ExceptionSlowPath if it is.
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+
+  virtual ~JNIMacroAssembler() {}
+
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
+
+ protected:
+  explicit JNIMacroAssembler() {}
+};
+
+template <typename T, PointerSize kPointerSize>
+class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
+ public:
+  void FinalizeCode() OVERRIDE {
+    asm_.FinalizeCode();
+  }
+
+  size_t CodeSize() const OVERRIDE {
+    return asm_.CodeSize();
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    asm_.FinalizeInstructions(region);
+  }
+
+  DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE {
+    return asm_.cfi();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerFwd(ArenaAllocator* arena) : asm_(arena) {}
+
+  T asm_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
new file mode 100644
index 0000000..829f34b
--- /dev/null
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+#define ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
+
+#include "jni_macro_assembler.h"
+
+#include "assembler_test_base.h"
+#include "common_runtime_test.h"  // For ScratchFile
+
+#include <cstdio>
+#include <cstdlib>
+#include <fstream>
+#include <iterator>
+#include <sys/stat.h>
+
+namespace art {
+
+template<typename Ass>
+class JNIMacroAssemblerTest : public testing::Test {
+ public:
+  Ass* GetAssembler() {
+    return assembler_.get();
+  }
+
+  typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
+
+  void DriverFn(TestFn f, std::string test_name) {
+    DriverWrapper(f(this, assembler_.get()), test_name);
+  }
+
+  // This driver assumes the assembler has already been called.
+  void DriverStr(std::string assembly_string, std::string test_name) {
+    DriverWrapper(assembly_string, test_name);
+  }
+
+  // This is intended to be run as a test.
+  bool CheckTools() {
+    return test_helper_->CheckTools();
+  }
+
+ protected:
+  explicit JNIMacroAssemblerTest() {}
+
+  void SetUp() OVERRIDE {
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(CreateAssembler(arena_.get()));
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+
+    SetUpHelpers();
+  }
+
+  void TearDown() OVERRIDE {
+    test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
+  }
+
+  // Override this to set up any architecture-specific things, e.g., CPU revision.
+  virtual Ass* CreateAssembler(ArenaAllocator* arena) {
+    return new (arena) Ass(arena);
+  }
+
+  // Override this to set up any architecture-specific things, e.g., register vectors.
+  virtual void SetUpHelpers() {}
+
+  // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
+  virtual std::string GetArchitectureString() = 0;
+
+  // Get the name of the assembler, e.g., "as" by default.
+  virtual std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command. Default none.
+  virtual std::string GetAssemblerParameters() {
+    return "";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. Default is " -h".
+  virtual std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump, e.g., "objdump" by default.
+  virtual std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command. As it's a binary, one needs to push the architecture and
+  // such to objdump, so it's architecture-specific and there is no default.
+  virtual std::string GetDisassembleParameters() = 0;
+
+  // If the assembly file needs a header, return it in a sub-class.
+  virtual const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+ private:
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
+  void DriverWrapper(std::string assembly_text, std::string test_name) {
+    assembler_->FinalizeCode();
+    size_t cs = assembler_->CodeSize();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    Pad(*data);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
+  std::unique_ptr<Ass> assembler_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(JNIMacroAssemblerTest);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_TEST_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index e6b32de..8b7da3f 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2799,27 +2799,17 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::StoreImmediateToThread32(ThreadOffset32 dest,
-                                             uint32_t imm,
+void MipsAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                             FrameOffset fr_offs,
                                              ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsCoreRegister(), imm);
-  StoreToOffset(kStoreWord, scratch.AsCoreRegister(), S1, dest.Int32Value());
-}
-
-void MipsAssembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                               FrameOffset fr_offs,
-                                               ManagedRegister mscratch) {
-  MipsManagedRegister scratch = mscratch.AsMips();
-  CHECK(scratch.IsCoreRegister()) << scratch;
   Addiu32(scratch.AsCoreRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(),
                 S1, thr_offs.Int32Value());
 }
 
-void MipsAssembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
+void MipsAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   StoreToOffset(kStoreWord, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2836,7 +2826,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void MipsAssembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+void MipsAssembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2864,7 +2854,7 @@
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
 }
 
-void MipsAssembler::LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset32 offs) {
+void MipsAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(), S1, offs.Int32Value());
@@ -2918,9 +2908,9 @@
   StoreToOffset(kStoreWord, scratch.AsCoreRegister(), SP, dest.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                           ThreadOffset32 thr_offs,
-                                           ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                         ThreadOffset32 thr_offs,
+                                         ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -2929,9 +2919,9 @@
                 SP, fr_offs.Int32Value());
 }
 
-void MipsAssembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                         FrameOffset fr_offs,
-                                         ManagedRegister mscratch) {
+void MipsAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                       FrameOffset fr_offs,
+                                       ManagedRegister mscratch) {
   MipsManagedRegister scratch = mscratch.AsMips();
   CHECK(scratch.IsCoreRegister()) << scratch;
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
@@ -3103,8 +3093,8 @@
   // TODO: place reference map on call.
 }
 
-void MipsAssembler::CallFromThread32(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void MipsAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
+                                   ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "no mips implementation";
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 852ced6..41b6c6b 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -23,12 +23,14 @@
 
 #include "arch/mips/instruction_set_features_mips.h"
 #include "base/arena_containers.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -145,7 +147,7 @@
   DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath);
 };
 
-class MipsAssembler FINAL : public Assembler {
+class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
   explicit MipsAssembler(ArenaAllocator* arena,
                          const MipsInstructionSetFeatures* instruction_set_features = nullptr)
@@ -160,6 +162,9 @@
     cfi().DelayEmittingAdvancePCs();
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
       CHECK(branch.IsResolved());
@@ -500,15 +505,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset32 dest,
-                                uint32_t imm,
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
                                 ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                  FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest,
                      ManagedRegister msrc,
@@ -518,7 +519,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -529,19 +530,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs,
-                              ThreadOffset32 thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                            FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -617,7 +618,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 3fd77a0..a2621cb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -2115,16 +2115,16 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                                 FrameOffset fr_offs,
-                                                 ManagedRegister mscratch) {
+void Mips64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                               FrameOffset fr_offs,
+                                               ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset64 thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -2141,7 +2141,7 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
+void Mips64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -2174,7 +2174,7 @@
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
-void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) {
+void Mips64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -2218,18 +2218,18 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset64 thr_offs,
-                                             ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                           ThreadOffset64 thr_offs,
+                                           ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                                           FrameOffset fr_offs,
-                                           ManagedRegister mscratch) {
+void Mips64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                                         FrameOffset fr_offs,
+                                         ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
@@ -2431,8 +2431,8 @@
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+void Mips64Assembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
+                                     ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 1ad05b0..a7d350c 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -20,12 +20,14 @@
 #include <utility>
 #include <vector>
 
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
 #include "offsets.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 #include "utils/label.h"
 
 namespace art {
@@ -100,7 +102,7 @@
   DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
 };
 
-class Mips64Assembler FINAL : public Assembler {
+class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
   explicit Mips64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
@@ -118,6 +120,9 @@
     }
   }
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
   // Emit Machine Instructions.
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -383,11 +388,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
-                                  FrameOffset fr_offs,
-                                  ManagedRegister mscratch) OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
@@ -395,7 +400,7 @@
   // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -404,19 +409,19 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
 
   // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs,
-                              ThreadOffset64 thr_offs,
-                              ManagedRegister mscratch) OVERRIDE;
-
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs,
-                            FrameOffset fr_offs,
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs,
+                          FrameOffset fr_offs,
+                          ManagedRegister mscratch) OVERRIDE;
+
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
 
   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
@@ -471,7 +476,7 @@
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index bd5fc40..89b3c3f 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -2068,20 +2068,16 @@
   movl(Address(ESP, dest), Immediate(imm));
 }
 
-void X86Assembler::StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister) {
-  fs()->movl(Address::Absolute(dest), Immediate(imm));
-}
-
-void X86Assembler::StoreStackOffsetToThread32(ThreadOffset32 thr_offs,
-                                              FrameOffset fr_offs,
-                                              ManagedRegister mscratch) {
+void X86Assembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                            FrameOffset fr_offs,
+                                            ManagedRegister mscratch) {
   X86ManagedRegister scratch = mscratch.AsX86();
   CHECK(scratch.IsCpuRegister());
   leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
   fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
-void X86Assembler::StoreStackPointerToThread32(ThreadOffset32 thr_offs) {
+void X86Assembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
   fs()->movl(Address::Absolute(thr_offs), ESP);
 }
 
@@ -2117,7 +2113,7 @@
   }
 }
 
-void X86Assembler::LoadFromThread32(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
+void X86Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) {
   X86ManagedRegister dest = mdest.AsX86();
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
@@ -2167,8 +2163,7 @@
   movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
 }
 
-void X86Assembler::LoadRawPtrFromThread32(ManagedRegister mdest,
-                                          ThreadOffset32 offs) {
+void X86Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) {
   X86ManagedRegister dest = mdest.AsX86();
   CHECK(dest.IsCpuRegister());
   fs()->movl(dest.AsCpuRegister(), Address::Absolute(offs));
@@ -2230,18 +2225,18 @@
   movl(Address(ESP, dest), scratch.AsCpuRegister());
 }
 
-void X86Assembler::CopyRawPtrFromThread32(FrameOffset fr_offs,
-                                          ThreadOffset32 thr_offs,
-                                          ManagedRegister mscratch) {
+void X86Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
+                                        ThreadOffset32 thr_offs,
+                                        ManagedRegister mscratch) {
   X86ManagedRegister scratch = mscratch.AsX86();
   CHECK(scratch.IsCpuRegister());
   fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
   Store(fr_offs, scratch, 4);
 }
 
-void X86Assembler::CopyRawPtrToThread32(ThreadOffset32 thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister mscratch) {
+void X86Assembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
+                                      FrameOffset fr_offs,
+                                      ManagedRegister mscratch) {
   X86ManagedRegister scratch = mscratch.AsX86();
   CHECK(scratch.IsCpuRegister());
   Load(scratch, fr_offs, 4);
@@ -2387,7 +2382,7 @@
   call(Address(scratch, offset));
 }
 
-void X86Assembler::CallFromThread32(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+void X86Assembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
   fs()->call(Address::Absolute(offset));
 }
 
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6d519e4..b6442fe 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -21,6 +21,7 @@
 
 #include "base/arena_containers.h"
 #include "base/bit_utils.h"
+#include "base/enums.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
@@ -28,6 +29,7 @@
 #include "offsets.h"
 #include "utils/array_ref.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 namespace art {
 namespace x86 {
@@ -302,11 +304,18 @@
   ArenaVector<int32_t> buffer_;
 };
 
-class X86Assembler FINAL : public Assembler {
+class X86Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
   explicit X86Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86Assembler() {}
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+  void FinalizeCode() { Assembler::FinalizeCode(); }
+  void FinalizeInstructions(const MemoryRegion& region) {
+    Assembler::FinalizeInstructions(region);
+  }
+
   /*
    * Emit Machine Instructions.
    */
@@ -654,13 +663,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
 
-  void StoreImmediateToThread32(ThreadOffset32 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
 
-  void StoreStackOffsetToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread32(ThreadOffset32 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
                      ManagedRegister scratch) OVERRIDE;
@@ -668,7 +675,7 @@
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread32(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -677,15 +684,16 @@
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread32(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE;
 
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread32(FrameOffset fr_offs, ThreadOffset32 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset32 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
 
-  void CopyRawPtrToThread32(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+  void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
@@ -742,7 +750,7 @@
   // Call to address held at [base+offset]
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread32(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 977ce9d..ce4ea1d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2804,11 +2804,7 @@
   movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
 }
 
-void X86_64Assembler::StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister) {
-  gs()->movl(Address::Absolute(dest, true), Immediate(imm));  // TODO(64) movq?
-}
-
-void X86_64Assembler::StoreStackOffsetToThread64(ThreadOffset64 thr_offs,
+void X86_64Assembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   X86_64ManagedRegister scratch = mscratch.AsX86_64();
@@ -2817,7 +2813,7 @@
   gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
 }
 
-void X86_64Assembler::StoreStackPointerToThread64(ThreadOffset64 thr_offs) {
+void X86_64Assembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
   gs()->movq(Address::Absolute(thr_offs, true), CpuRegister(RSP));
 }
 
@@ -2858,7 +2854,7 @@
   }
 }
 
-void X86_64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
+void X86_64Assembler::LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) {
   X86_64ManagedRegister dest = mdest.AsX86_64();
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
@@ -2907,7 +2903,7 @@
   movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
 }
 
-void X86_64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset64 offs) {
+void X86_64Assembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) {
   X86_64ManagedRegister dest = mdest.AsX86_64();
   CHECK(dest.IsCpuRegister());
   gs()->movq(dest.AsCpuRegister(), Address::Absolute(offs, true));
@@ -2968,7 +2964,7 @@
   movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
 }
 
-void X86_64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
+void X86_64Assembler::CopyRawPtrFromThread(FrameOffset fr_offs,
                                              ThreadOffset64 thr_offs,
                                              ManagedRegister mscratch) {
   X86_64ManagedRegister scratch = mscratch.AsX86_64();
@@ -2977,7 +2973,7 @@
   Store(fr_offs, scratch, 8);
 }
 
-void X86_64Assembler::CopyRawPtrToThread64(ThreadOffset64 thr_offs,
+void X86_64Assembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   X86_64ManagedRegister scratch = mscratch.AsX86_64();
@@ -3130,7 +3126,7 @@
   call(Address(scratch, offset));
 }
 
-void X86_64Assembler::CallFromThread64(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+void X86_64Assembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
   gs()->call(Address::Absolute(offset, true));
 }
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 52e39cf..d298da2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -28,6 +28,7 @@
 #include "offsets.h"
 #include "utils/array_ref.h"
 #include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
 
 namespace art {
 namespace x86_64 {
@@ -332,11 +333,20 @@
 };
 
 
-class X86_64Assembler FINAL : public Assembler {
+class X86_64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
   explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86_64Assembler() {}
 
+  size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+  void FinalizeCode() OVERRIDE {
+    Assembler::FinalizeCode();
+  }
+  void FinalizeInstructions(const MemoryRegion& region) {
+    Assembler::FinalizeInstructions(region);
+  }
+
   /*
    * Emit Machine Instructions.
    */
@@ -723,13 +733,11 @@
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE;
 
-  void StoreImmediateToThread64(ThreadOffset64 dest, uint32_t imm, ManagedRegister scratch)
-      OVERRIDE;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
+                                FrameOffset fr_offs,
+                                ManagedRegister scratch) OVERRIDE;
 
-  void StoreStackOffsetToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs,
-                                  ManagedRegister scratch) OVERRIDE;
-
-  void StoreStackPointerToThread64(ThreadOffset64 thr_offs) OVERRIDE;
+  void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
                      ManagedRegister scratch) OVERRIDE;
@@ -737,7 +745,7 @@
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
+  void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
 
@@ -746,15 +754,16 @@
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE;
 
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size);
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset64 thr_offs,
-                              ManagedRegister scratch) OVERRIDE;
+  void CopyRawPtrFromThread(FrameOffset fr_offs,
+                            ThreadOffset64 thr_offs,
+                            ManagedRegister scratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
+  void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE;
@@ -812,7 +821,7 @@
   // Call to address held at [base+offset]
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
+  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 9c813e2..7a37f60 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -132,6 +132,7 @@
   mirror/throwable.cc \
   monitor.cc \
   native_bridge_art_interface.cc \
+  native_stack_dump.cc \
   native/dalvik_system_DexFile.cc \
   native/dalvik_system_VMDebug.cc \
   native/dalvik_system_VMRuntime.cc \
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index c8181ba..415bb71 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1788,20 +1788,7 @@
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_ALLOCATORS
-// Comment out allocators that have arm64 specific asm.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
@@ -1908,63 +1895,6 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
-
-// The common fast path code for art_quick_alloc_array_region_tlab.
-.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
-    // Check null class
-    cbz    \wClass, \slowPathLabel
-    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
-.endm
-
-// The common fast path code for art_quick_alloc_array_region_tlab.
-.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
-    // Arrays are never finalizable, no need to check.
-    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
-    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
-    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
-                                                              // bits.
-    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
-    // Add array data offset and alignment.
-    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
-    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
-                                                              // component size shift is 3
-                                                              // (for 64 bit alignment).
-    and    \xTemp0, \xTemp0, #4
-    add    \xTemp1, \xTemp1, \xTemp0
-    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
-                                                              // we use (end - begin) to handle
-                                                              // negative size arrays.
-    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
-    sub    \xTemp2, \xTemp2, \xTemp0
-    cmp    \xTemp1, \xTemp2
-    bhi    \slowPathLabel
-
-    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
-                                                              // Round up the object size by the
-                                                              // object alignment. (addr + 7) & ~7.
-    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED
-                                                              // Move old thread_local_pos to x0
-                                                              // for the return value.
-    mov    x0, \xTemp0
-    add    \xTemp0, \xTemp0, \xTemp1
-    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
-    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
-    add    \xTemp0, \xTemp0, #1
-    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF \wClass
-    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
-    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
-    ret
-.endm
-
 // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 //
 // x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
@@ -1972,11 +1902,8 @@
 // Need to preserve x0 and x1 to the slow path.
 .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
     cbz    x2, \slowPathLabel                                 // Check null class
-    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
-.endm
-
-.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
-    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
+                                                              // Check class status.
+    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
     cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
     bne    \slowPathLabel
                                                               // Add a fake dependence from the
@@ -1989,10 +1916,6 @@
                                                               // a load-acquire for the status).
     eor    x3, x3, x3
     add    x2, x2, x3
-    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
-.endm
-
-.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
                                                               // Check access flags has
                                                               // kAccClassIsFinalizable.
     ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
@@ -2055,8 +1978,7 @@
 END art_quick_alloc_object_tlab
 
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
-ENTRY \name
+ENTRY art_quick_alloc_object_region_tlab
     // Fast path region tlab allocation.
     // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
     // x2-x7: free.
@@ -2064,26 +1986,23 @@
     mvn    x0, xzr                                            // Read barrier must be enabled here.
     ret                                                       // Return -1.
 #endif
-.if \is_resolved
-    mov    x2, x0 // class is actually stored in x0 already
-.else
     ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
                                                               // Load the class (x2)
     ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-.endif
+
     // Most common case: GC is not marking.
     ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x3, .Lmarking\name
-.Ldo_allocation\name:
-    \fast_path .Lslow_path\name
-.Lmarking\name:
+    cbnz   x3, .Lart_quick_alloc_object_region_tlab_marking
+.Lart_quick_alloc_object_region_tlab_do_allocation:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_marking:
     // GC is marking, check the lock word of the class for the mark bit.
     // If the class is null, go slow path. The check is required to read the lock word.
-    cbz    w2, .Lslow_path\name
+    cbz    w2, .Lart_quick_alloc_object_region_tlab_slow_path
     // Class is not null, check mark bit in lock word.
     ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
     // If the bit is not zero, do the allocation.
-    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_alloc_object_region_tlab_do_allocation
                                                               // The read barrier slow path. Mark
                                                               // the class.
     stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
@@ -2094,82 +2013,14 @@
     ldp    x0, x1, [sp, #0]                                   // Restore registers.
     ldr    xLR, [sp, #16]
     add    sp, sp, #32
-    b      .Ldo_allocation\name
-.Lslow_path\name:
+    b      .Lart_quick_alloc_object_region_tlab_do_allocation
+.Lart_quick_alloc_object_region_tlab_slow_path:
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME          // Save callee saves in case of GC.
     mov    x2, xSELF                           // Pass Thread::Current.
-    bl     \entrypoint    // (uint32_t type_idx, Method* method, Thread*)
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END \name
-.endm
-
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
-GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY*(_region_tlab, RegionTLAB)
-.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
-ENTRY \name
-    // Fast path array allocation for region tlab allocation.
-    // x0: uint32_t type_idx
-    // x1: int32_t component_count
-    // x2: ArtMethod* method
-    // x3: Thread* self
-    // x2-x7: free.
-#if !defined(USE_READ_BARRIER)
-    mvn    x0, xzr                                            // Read barrier must be enabled here.
-    ret                                                       // Return -1.
-#endif
-.if \is_resolved
-    mov    x3, x0
-    // If already resolved, class is stored in x0
-.else
-    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
-                                                              // Load the class (x2)
-    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-.endif
-    // Most common case: GC is not marking.
-    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x4, .Lmarking\name
-.Ldo_allocation\name:
-    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
-.Lmarking\name:
-    // GC is marking, check the lock word of the class for the mark bit.
-    // If the class is null, go slow path. The check is required to read the lock word.
-    cbz    w3, .Lslow_path\name
-    // Class is not null, check mark bit in lock word.
-    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    // If the bit is not zero, do the allocation.
-    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
-    b .Lslow_path\name
-                                                              // The read barrier slow path. Mark
-                                                              // the class.
-    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
-    stp    x2, xLR, [sp, #16]
-    mov    x0, x3                                             // Pass the class as the first param.
-    bl     artReadBarrierMark
-    mov    x3, x0                                             // Get the (marked) class back.
-    ldp    x0, x1, [sp, #0]                                   // Restore registers.
-    ldp    x2, xLR, [sp, #16]
-    add    sp, sp, #32
-    b      .Ldo_allocation\name
-.Lslow_path\name:
-    // x0: uint32_t type_idx
-    // x1: int32_t component_count
-    // x2: ArtMethod* method
-    // x3: Thread* self
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC
-    mov    x3, xSELF                  // pass Thread::Current
-    bl     \entrypoint
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END \name
-.endm
-
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
-// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
-GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
+END art_quick_alloc_object_region_tlab
 
     /*
      * Called by managed code when the thread has been asked to suspend.
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index 34e74a1..290769b 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -87,27 +87,6 @@
   ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_ALLOCATORS
-GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_ALLOCATOR
-.endm
-
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_ALLOCATOR
-// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-.endm
-
-.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_ALLOCATORS
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
@@ -240,6 +219,20 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 87b0bcb..0619af8 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -174,17 +174,10 @@
 #define MIRROR_CLASS_OBJECT_SIZE_OFFSET (100 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
-ADD_TEST_EQ(MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET,
-            art::mirror::Class::PrimitiveTypeOffset().Int32Value())
 #define MIRROR_CLASS_STATUS_OFFSET (112 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
-#define PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT 16
-ADD_TEST_EQ(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT,
-            static_cast<int>(art::mirror::Class::kPrimitiveTypeSizeShiftShift))
-
 // Array offsets.
 #define MIRROR_ARRAY_LENGTH_OFFSET      MIRROR_OBJECT_HEADER_SIZE
 ADD_TEST_EQ(MIRROR_ARRAY_LENGTH_OFFSET, art::mirror::Array::LengthOffset().Int32Value())
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 8ad47eb..8f5419c 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -636,9 +636,8 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  Primitive::Type type = static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask);
-  DCHECK_EQ(static_cast<size_t>(v32 >> kPrimitiveTypeSizeShiftShift),
-            Primitive::ComponentSizeShift(type));
+  Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF);
+  DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type));
   return type;
 }
 
@@ -647,9 +646,8 @@
   static_assert(sizeof(Primitive::Type) == sizeof(int32_t),
                 "art::Primitive::Type and int32_t have different sizes.");
   int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_));
-  size_t size_shift = static_cast<Primitive::Type>(v32 >> kPrimitiveTypeSizeShiftShift);
-  DCHECK_EQ(size_shift,
-            Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & kPrimitiveTypeMask)));
+  size_t size_shift = static_cast<Primitive::Type>(v32 >> 16);
+  DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF)));
   return size_shift;
 }
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 8f6ce44..5c490de 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -64,12 +64,6 @@
   // 2 ref instance fields.]
   static constexpr uint32_t kClassWalkSuper = 0xC0000000;
 
-  // Shift primitive type by kPrimitiveTypeSizeShiftShift to get the component type size shift
-  // Used for computing array size as follows:
-  // array_bytes = header_size + (elements << (primitive_type >> kPrimitiveTypeSizeShiftShift))
-  static constexpr uint32_t kPrimitiveTypeSizeShiftShift = 16;
-  static constexpr uint32_t kPrimitiveTypeMask = (1u << kPrimitiveTypeSizeShiftShift) - 1;
-
   // Class Status
   //
   // kStatusRetired: Class that's temporarily used till class linking time
@@ -377,10 +371,10 @@
 
   void SetPrimitiveType(Primitive::Type new_type) SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
-    uint32_t v32 = static_cast<uint32_t>(new_type);
-    DCHECK_EQ(v32 & kPrimitiveTypeMask, v32) << "upper 16 bits aren't zero";
+    int32_t v32 = static_cast<int32_t>(new_type);
+    DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero";
     // Store the component size shift in the upper 16 bits.
-    v32 |= Primitive::ComponentSizeShift(new_type) << kPrimitiveTypeSizeShiftShift;
+    v32 |= Primitive::ComponentSizeShift(new_type) << 16;
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32);
   }
 
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
new file mode 100644
index 0000000..8c5b386
--- /dev/null
+++ b/runtime/native_stack_dump.cc
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "native_stack_dump.h"
+
+#include <ostream>
+
+#include <stdio.h>
+
+#include "art_method.h"
+
+// For DumpNativeStack.
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
+
+#if defined(__linux__)
+
+#include <memory>
+#include <vector>
+
+#include <linux/unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include "arch/instruction_set.h"
+#include "base/memory_tool.h"
+#include "base/mutex.h"
+#include "base/stringprintf.h"
+#include "oat_quick_method_header.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+#endif
+
+namespace art {
+
+#if defined(__linux__)
+
+static constexpr bool kUseAddr2line = !kIsTargetBuild;
+
+ALWAYS_INLINE
+static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
+  if (prefix != nullptr) {
+    *os << prefix;
+  }
+  *os << "  ";
+  if (!odd) {
+    *os << " ";
+  }
+}
+
+static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
+  FILE* stream = popen(cmd.c_str(), "r");
+  if (stream) {
+    if (os != nullptr) {
+      bool odd_line = true;               // We indent them differently.
+      bool wrote_prefix = false;          // Have we already written a prefix?
+      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
+                                          // alt stack, but just to be sure...
+      char buffer[kMaxBuffer];
+      while (!feof(stream)) {
+        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
+          // Split on newlines.
+          char* tmp = buffer;
+          for (;;) {
+            char* new_line = strchr(tmp, '\n');
+            if (new_line == nullptr) {
+              // Print the rest.
+              if (*tmp != 0) {
+                if (!wrote_prefix) {
+                  WritePrefix(os, prefix, odd_line);
+                }
+                wrote_prefix = true;
+                *os << tmp;
+              }
+              break;
+            }
+            if (!wrote_prefix) {
+              WritePrefix(os, prefix, odd_line);
+            }
+            char saved = *(new_line + 1);
+            *(new_line + 1) = 0;
+            *os << tmp;
+            *(new_line + 1) = saved;
+            tmp = new_line + 1;
+            odd_line = !odd_line;
+            wrote_prefix = false;
+          }
+        }
+      }
+    }
+    pclose(stream);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
+                      const char* prefix) {
+  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
+                                   map_src.c_str(), offset));
+  RunCommand(cmdline.c_str(), &os, prefix);
+}
+
+static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
+  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
+      method->GetEntryPointFromQuickCompiledCode()));
+  if (code == 0) {
+    return pc == 0;
+  }
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return code <= pc && pc <= (code + code_size);
+}
+
+void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
+    ArtMethod* current_method, void* ucontext_ptr) {
+  // b/18119146
+  if (RUNNING_ON_MEMORY_TOOL != 0) {
+    return;
+  }
+
+  BacktraceMap* map = existing_map;
+  std::unique_ptr<BacktraceMap> tmp_map;
+  if (map == nullptr) {
+    tmp_map.reset(BacktraceMap::Create(getpid()));
+    map = tmp_map.get();
+  }
+  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
+  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
+    os << prefix << "(backtrace::Unwind failed for thread " << tid
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
+    return;
+  } else if (backtrace->NumFrames() == 0) {
+    os << prefix << "(no native stack frames for thread " << tid << ")\n";
+    return;
+  }
+
+  // Check whether we have and should use addr2line.
+  bool use_addr2line;
+  if (kUseAddr2line) {
+    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
+    // and print to stderr.
+    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
+  } else {
+    use_addr2line = false;
+  }
+
+  for (Backtrace::const_iterator it = backtrace->begin();
+       it != backtrace->end(); ++it) {
+    // We produce output like this:
+    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
+    // In order for parsing tools to continue to function, the stack dump
+    // format must at least adhere to this format:
+    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
+    // The parsers require a single space before and after pc, and two spaces
+    // after the <RELATIVE_ADDR>. There can be any prefix data before the
+    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
+    os << prefix << StringPrintf("#%02zu pc ", it->num);
+    bool try_addr2line = false;
+    if (!BacktraceMap::IsValid(it->map)) {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
+                                                            : "%08" PRIxPTR "  ???",
+                         it->pc);
+    } else {
+      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
+                                                            : "%08" PRIxPTR "  ",
+                         BacktraceMap::GetRelativePc(it->map, it->pc));
+      os << it->map.name;
+      os << " (";
+      if (!it->func_name.empty()) {
+        os << it->func_name;
+        if (it->func_offset != 0) {
+          os << "+" << it->func_offset;
+        }
+        try_addr2line = true;
+      } else if (current_method != nullptr &&
+          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
+          PcIsWithinQuickCode(current_method, it->pc)) {
+        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
+        os << JniLongName(current_method) << "+"
+           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
+      } else {
+        os << "???";
+      }
+      os << ")";
+    }
+    os << "\n";
+    if (try_addr2line && use_addr2line) {
+      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
+    }
+  }
+}
+
+void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
+  if (tid == GetTid()) {
+    // There's no point showing that we're reading our stack out of /proc!
+    return;
+  }
+
+  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
+  std::string kernel_stack;
+  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
+    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
+    return;
+  }
+
+  std::vector<std::string> kernel_stack_frames;
+  Split(kernel_stack, '\n', &kernel_stack_frames);
+  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
+  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
+  kernel_stack_frames.pop_back();
+  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
+    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
+    // into "futex_wait_queue_me+0xcd/0x110".
+    const char* text = kernel_stack_frames[i].c_str();
+    const char* close_bracket = strchr(text, ']');
+    if (close_bracket != nullptr) {
+      text = close_bracket + 2;
+    }
+    os << prefix;
+    if (include_count) {
+      os << StringPrintf("#%02zd ", i);
+    }
+    os << text << "\n";
+  }
+}
+
+#elif defined(__APPLE__)
+
+void DumpNativeStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     BacktraceMap* existing_map ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     ArtMethod* current_method ATTRIBUTE_UNUSED,
+                     void* ucontext_ptr ATTRIBUTE_UNUSED) {
+}
+
+void DumpKernelStack(std::ostream& os ATTRIBUTE_UNUSED,
+                     pid_t tid ATTRIBUTE_UNUSED,
+                     const char* prefix ATTRIBUTE_UNUSED,
+                     bool include_count ATTRIBUTE_UNUSED) {
+}
+
+#else
+#error "Unsupported architecture for native stack dumps."
+#endif
+
+}  // namespace art
diff --git a/runtime/native_stack_dump.h b/runtime/native_stack_dump.h
new file mode 100644
index 0000000..d64bc82
--- /dev/null
+++ b/runtime/native_stack_dump.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_STACK_DUMP_H_
+#define ART_RUNTIME_NATIVE_STACK_DUMP_H_
+
+#include <unistd.h>
+
+#include <iosfwd>
+
+#include "base/macros.h"
+
+class BacktraceMap;
+
+namespace art {
+
+class ArtMethod;
+
+// Dumps the native stack for thread 'tid' to 'os'.
+void DumpNativeStack(std::ostream& os,
+                     pid_t tid,
+                     BacktraceMap* map = nullptr,
+                     const char* prefix = "",
+                     ArtMethod* current_method = nullptr,
+                     void* ucontext = nullptr)
+    NO_THREAD_SAFETY_ANALYSIS;
+
+// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
+void DumpKernelStack(std::ostream& os,
+                     pid_t tid,
+                     const char* prefix = "",
+                     bool include_count = true);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_STACK_DUMP_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 9f0ef7c..50bea65 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -115,6 +115,7 @@
 #include "native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
+#include "native_stack_dump.h"
 #include "oat_file.h"
 #include "oat_file_manager.h"
 #include "os.h"
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index bc963c5..60ebabc 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -28,6 +28,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
+#include "native_stack_dump.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 76f3161..7482d93 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/stack_trace_element.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "nth_caller_visitor.h"
 #include "oat_quick_method_header.h"
 #include "object_lock.h"
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 16ef0ff..419ecec 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -35,6 +35,7 @@
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
+#include "native_stack_dump.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "trace.h"
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 3f779df..515ba9f 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -46,20 +46,12 @@
 #include <sys/syscall.h>
 #endif
 
-// For DumpNativeStack.
-#include <backtrace/Backtrace.h>
-#include <backtrace/BacktraceMap.h>
-
 #if defined(__linux__)
 #include <linux/unistd.h>
 #endif
 
 namespace art {
 
-#if defined(__linux__)
-static constexpr bool kUseAddr2line = !kIsTargetBuild;
-#endif
-
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
@@ -1026,210 +1018,6 @@
   return "";
 }
 
-#if defined(__linux__)
-
-ALWAYS_INLINE
-static inline void WritePrefix(std::ostream* os, const char* prefix, bool odd) {
-  if (prefix != nullptr) {
-    *os << prefix;
-  }
-  *os << "  ";
-  if (!odd) {
-    *os << " ";
-  }
-}
-
-static bool RunCommand(std::string cmd, std::ostream* os, const char* prefix) {
-  FILE* stream = popen(cmd.c_str(), "r");
-  if (stream) {
-    if (os != nullptr) {
-      bool odd_line = true;               // We indent them differently.
-      bool wrote_prefix = false;          // Have we already written a prefix?
-      constexpr size_t kMaxBuffer = 128;  // Relatively small buffer. Should be OK as we're on an
-                                          // alt stack, but just to be sure...
-      char buffer[kMaxBuffer];
-      while (!feof(stream)) {
-        if (fgets(buffer, kMaxBuffer, stream) != nullptr) {
-          // Split on newlines.
-          char* tmp = buffer;
-          for (;;) {
-            char* new_line = strchr(tmp, '\n');
-            if (new_line == nullptr) {
-              // Print the rest.
-              if (*tmp != 0) {
-                if (!wrote_prefix) {
-                  WritePrefix(os, prefix, odd_line);
-                }
-                wrote_prefix = true;
-                *os << tmp;
-              }
-              break;
-            }
-            if (!wrote_prefix) {
-              WritePrefix(os, prefix, odd_line);
-            }
-            char saved = *(new_line + 1);
-            *(new_line + 1) = 0;
-            *os << tmp;
-            *(new_line + 1) = saved;
-            tmp = new_line + 1;
-            odd_line = !odd_line;
-            wrote_prefix = false;
-          }
-        }
-      }
-    }
-    pclose(stream);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void Addr2line(const std::string& map_src, uintptr_t offset, std::ostream& os,
-                      const char* prefix) {
-  std::string cmdline(StringPrintf("addr2line --functions --inlines --demangle -e %s %zx",
-                                   map_src.c_str(), offset));
-  RunCommand(cmdline.c_str(), &os, prefix);
-}
-
-static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFETY_ANALYSIS {
-  uintptr_t code = reinterpret_cast<uintptr_t>(EntryPointToCodePointer(
-      method->GetEntryPointFromQuickCompiledCode()));
-  if (code == 0) {
-    return pc == 0;
-  }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
-  return code <= pc && pc <= (code + code_size);
-}
-#endif
-
-void DumpNativeStack(std::ostream& os, pid_t tid, BacktraceMap* existing_map, const char* prefix,
-    ArtMethod* current_method, void* ucontext_ptr) {
-#if __linux__
-  // b/18119146
-  if (RUNNING_ON_MEMORY_TOOL != 0) {
-    return;
-  }
-
-  BacktraceMap* map = existing_map;
-  std::unique_ptr<BacktraceMap> tmp_map;
-  if (map == nullptr) {
-    tmp_map.reset(BacktraceMap::Create(getpid()));
-    map = tmp_map.get();
-  }
-  std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
-  if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
-    os << prefix << "(backtrace::Unwind failed for thread " << tid
-       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
-    return;
-  } else if (backtrace->NumFrames() == 0) {
-    os << prefix << "(no native stack frames for thread " << tid << ")\n";
-    return;
-  }
-
-  // Check whether we have and should use addr2line.
-  bool use_addr2line;
-  if (kUseAddr2line) {
-    // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
-    // and print to stderr.
-    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
-  } else {
-    use_addr2line = false;
-  }
-
-  for (Backtrace::const_iterator it = backtrace->begin();
-       it != backtrace->end(); ++it) {
-    // We produce output like this:
-    // ]    #00 pc 000075bb8  /system/lib/libc.so (unwind_backtrace_thread+536)
-    // In order for parsing tools to continue to function, the stack dump
-    // format must at least adhere to this format:
-    //  #XX pc <RELATIVE_ADDR>  <FULL_PATH_TO_SHARED_LIBRARY> ...
-    // The parsers require a single space before and after pc, and two spaces
-    // after the <RELATIVE_ADDR>. There can be any prefix data before the
-    // #XX. <RELATIVE_ADDR> has to be a hex number but with no 0x prefix.
-    os << prefix << StringPrintf("#%02zu pc ", it->num);
-    bool try_addr2line = false;
-    if (!BacktraceMap::IsValid(it->map)) {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  ???"
-                                                            : "%08" PRIxPTR "  ???",
-                         it->pc);
-    } else {
-      os << StringPrintf(Is64BitInstructionSet(kRuntimeISA) ? "%016" PRIxPTR "  "
-                                                            : "%08" PRIxPTR "  ",
-                         BacktraceMap::GetRelativePc(it->map, it->pc));
-      os << it->map.name;
-      os << " (";
-      if (!it->func_name.empty()) {
-        os << it->func_name;
-        if (it->func_offset != 0) {
-          os << "+" << it->func_offset;
-        }
-        try_addr2line = true;
-      } else if (current_method != nullptr &&
-          Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
-          PcIsWithinQuickCode(current_method, it->pc)) {
-        const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
-        os << JniLongName(current_method) << "+"
-           << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
-      } else {
-        os << "???";
-      }
-      os << ")";
-    }
-    os << "\n";
-    if (try_addr2line && use_addr2line) {
-      Addr2line(it->map.name, it->pc - it->map.start, os, prefix);
-    }
-  }
-#else
-  UNUSED(os, tid, existing_map, prefix, current_method, ucontext_ptr);
-#endif
-}
-
-#if defined(__APPLE__)
-
-// TODO: is there any way to get the kernel stack on Mac OS?
-void DumpKernelStack(std::ostream&, pid_t, const char*, bool) {}
-
-#else
-
-void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix, bool include_count) {
-  if (tid == GetTid()) {
-    // There's no point showing that we're reading our stack out of /proc!
-    return;
-  }
-
-  std::string kernel_stack_filename(StringPrintf("/proc/self/task/%d/stack", tid));
-  std::string kernel_stack;
-  if (!ReadFileToString(kernel_stack_filename, &kernel_stack)) {
-    os << prefix << "(couldn't read " << kernel_stack_filename << ")\n";
-    return;
-  }
-
-  std::vector<std::string> kernel_stack_frames;
-  Split(kernel_stack, '\n', &kernel_stack_frames);
-  // We skip the last stack frame because it's always equivalent to "[<ffffffff>] 0xffffffff",
-  // which looking at the source appears to be the kernel's way of saying "that's all, folks!".
-  kernel_stack_frames.pop_back();
-  for (size_t i = 0; i < kernel_stack_frames.size(); ++i) {
-    // Turn "[<ffffffff8109156d>] futex_wait_queue_me+0xcd/0x110"
-    // into "futex_wait_queue_me+0xcd/0x110".
-    const char* text = kernel_stack_frames[i].c_str();
-    const char* close_bracket = strchr(text, ']');
-    if (close_bracket != nullptr) {
-      text = close_bracket + 2;
-    }
-    os << prefix;
-    if (include_count) {
-      os << StringPrintf("#%02zd ", i);
-    }
-    os << text << "\n";
-  }
-}
-
-#endif
-
 const char* GetAndroidRoot() {
   const char* android_root = getenv("ANDROID_ROOT");
   if (android_root == nullptr) {
diff --git a/runtime/utils.h b/runtime/utils.h
index b2746ee..699b732 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -242,21 +242,6 @@
 // implementation-defined limit.
 void SetThreadName(const char* thread_name);
 
-// Dumps the native stack for thread 'tid' to 'os'.
-void DumpNativeStack(std::ostream& os,
-                     pid_t tid,
-                     BacktraceMap* map = nullptr,
-                     const char* prefix = "",
-                     ArtMethod* current_method = nullptr,
-                     void* ucontext = nullptr)
-    NO_THREAD_SAFETY_ANALYSIS;
-
-// Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
-void DumpKernelStack(std::ostream& os,
-                     pid_t tid,
-                     const char* prefix = "",
-                     bool include_count = true);
-
 // Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
 
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 8d7d70d..2d77722 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -535,13 +535,10 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
-#      instruction yet (b/26601270).
 # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
 #      not yet handled in the read barrier configuration.
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
   484-checker-register-hints \
-  527-checker-array-access-split \
   537-checker-arraycopy
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index d88a4a0..12e0338 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests ${out_dir}/host/linux-x86/bin/jack"
+common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 8d87e4f..e5d7597 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -180,12 +180,6 @@
   bug: 25437292
 },
 {
-  description: "Failing tests after OpenJDK move.",
-  result: EXEC_FAILED,
-  bug: 26326992,
-  names: ["libcore.java.lang.OldSystemTest#test_load"]
-},
-{
   description: "Missing resource in classpath",
   result: EXEC_FAILED,
   modes: [device],
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 3605aa0..2a6e172 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -25,17 +25,22 @@
   JAVA_LIBRARIES=${ANDROID_PRODUCT_OUT}/../../common/obj/JAVA_LIBRARIES
 fi
 
-# Jar containing jsr166 tests.
-jsr166_test_jack=${JAVA_LIBRARIES}/jsr166-tests_intermediates/classes.jack
+function cparg {
+  for var
+  do
+    printf -- "--classpath ${JAVA_LIBRARIES}/${var}_intermediates/classes.jack ";
+  done
+}
 
-# Jar containing all the other tests.
-test_jack=${JAVA_LIBRARIES}/core-tests_intermediates/classes.jack
+DEPS="core-tests jsr166-tests mockito-target"
 
-if [ ! -f $test_jack ]; then
-  echo "Before running, you must build core-tests, jsr166-tests and vogar: \
-        make core-tests jsr166-tests vogar"
-  exit 1
-fi
+for lib in $DEPS
+do
+  if [ ! -f "${JAVA_LIBRARIES}/${lib}_intermediates/classes.jack" ]; then
+    echo "${lib} is missing. Before running, you must run art/tools/buildbot-build.sh"
+    exit 1
+  fi
+done
 
 expectations="--expectations art/tools/libcore_failures.txt"
 if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
@@ -133,4 +138,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $vogar_args $expectations --classpath $jsr166_test_jack --classpath $test_jack ${working_packages[@]}
+vogar $vogar_args $expectations $(cparg $DEPS) ${working_packages[@]}