Merge changes I5dd60978,I0c2b804c

* changes:
  Remove code replacing core.jar -> core-libart.jar.
  Remove obsolete code related to patchoat.
diff --git a/dt_fd_forward/Android.bp b/dt_fd_forward/Android.bp
index 1ba2323..2a2aa18 100644
--- a/dt_fd_forward/Android.bp
+++ b/dt_fd_forward/Android.bp
@@ -41,6 +41,7 @@
     header_libs: [
         "javavm_headers",
         "dt_fd_forward_export",
+        "art_libartbase_headers",  // For strlcpy emulation.
     ],
     multilib: {
         lib32: {
diff --git a/dt_fd_forward/dt_fd_forward.cc b/dt_fd_forward/dt_fd_forward.cc
index a99f785..d5b6de5 100644
--- a/dt_fd_forward/dt_fd_forward.cc
+++ b/dt_fd_forward/dt_fd_forward.cc
@@ -50,6 +50,8 @@
 #include <jni.h>
 #include <jdwpTransport.h>
 
+#include <base/strlcpy.h>
+
 namespace dt_fd_forward {
 
 // Helper that puts line-number in error message.
@@ -651,7 +653,7 @@
 jdwpTransportError FdForwardTransport::GetLastError(/*out*/char** err) {
   std::string data = global_last_error_;
   *err = reinterpret_cast<char*>(Alloc(data.size() + 1));
-  strcpy(*err, data.c_str());
+  strlcpy(*err, data.c_str(), data.size() + 1);
   return OK;
 }
 
diff --git a/openjdkjvmti/ti_logging.cc b/openjdkjvmti/ti_logging.cc
index 1d24d3b..60f4340 100644
--- a/openjdkjvmti/ti_logging.cc
+++ b/openjdkjvmti/ti_logging.cc
@@ -34,6 +34,7 @@
 #include "art_jvmti.h"
 
 #include "base/mutex.h"
+#include "base/strlcpy.h"
 #include "thread-current-inl.h"
 
 namespace openjdkjvmti {
@@ -47,13 +48,13 @@
   if (tienv->last_error_.empty()) {
     return ERR(ABSENT_INFORMATION);
   }
+  const size_t size = tienv->last_error_.size() + 1;
   char* out;
-  jvmtiError err = tienv->Allocate(tienv->last_error_.size() + 1,
-                                   reinterpret_cast<unsigned char**>(&out));
+  jvmtiError err = tienv->Allocate(size, reinterpret_cast<unsigned char**>(&out));
   if (err != OK) {
     return err;
   }
-  strcpy(out, tienv->last_error_.c_str());
+  strlcpy(out, tienv->last_error_.c_str(), size);
   *data = out;
   return OK;
 }
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index c5fb7d5..53e4c11 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -399,6 +399,10 @@
   return FindFieldWithOffset<kExactOffset>(klass->GetSFields(), field_offset);
 }
 
+inline mirror::ClassLoader* ArtField::GetClassLoader() {
+  return GetDeclaringClass()->GetClassLoader();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 0a4aa7e..99f2a1c 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -34,6 +34,7 @@
 
 namespace mirror {
 class Class;
+class ClassLoader;
 class DexCache;
 class Object;
 class String;
@@ -44,6 +45,8 @@
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjPtr<mirror::Class> GetDeclaringClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::ClassLoader* GetClassLoader() REQUIRES_SHARED(Locks::mutator_lock_);
+
   void SetDeclaringClass(ObjPtr<mirror::Class> new_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index f254116..c240017 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -31,6 +31,7 @@
 #include "dex/invoke_type.h"
 #include "dex/primitive.h"
 #include "gc_root-inl.h"
+#include "imtable-inl.h"
 #include "intrinsics_enum.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
@@ -421,6 +422,31 @@
   return CodeItemDebugInfoAccessor(*GetDexFile(), GetCodeItem(), GetDexMethodIndex());
 }
 
+inline void ArtMethod::SetCounter(int16_t hotness_count) {
+  DCHECK(!IsAbstract()) << PrettyMethod();
+  hotness_count_ = hotness_count;
+}
+
+inline uint16_t ArtMethod::GetCounter() {
+  DCHECK(!IsAbstract()) << PrettyMethod();
+  return hotness_count_;
+}
+
+inline uint32_t ArtMethod::GetImtIndex() {
+  if (LIKELY(IsAbstract() && imt_index_ != 0)) {
+    uint16_t imt_index = ~imt_index_;
+    DCHECK_EQ(imt_index, ImTable::GetImtIndex(this)) << PrettyMethod();
+    return imt_index;
+  } else {
+    return ImTable::GetImtIndex(this);
+  }
+}
+
+inline void ArtMethod::CalculateAndSetImtIndex() {
+  DCHECK(IsAbstract()) << PrettyMethod();
+  imt_index_ = ~ImTable::GetImtIndex(this);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_METHOD_INL_H_
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5bbee92..cc214f7 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -650,24 +650,13 @@
   void CopyFrom(ArtMethod* src, PointerSize image_pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
-  // given that the counter is only 16 bits wide we can expect wrap-around in some
-  // situations.  Consumers of hotness_count_ must be able to deal with that.
-  uint16_t IncrementCounter() {
-    return ++hotness_count_;
-  }
+  ALWAYS_INLINE void SetCounter(int16_t hotness_count) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void ClearCounter() {
-    hotness_count_ = 0;
-  }
+  ALWAYS_INLINE uint16_t GetCounter() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetCounter(int16_t hotness_count) {
-    hotness_count_ = hotness_count;
-  }
+  ALWAYS_INLINE uint32_t GetImtIndex() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint16_t GetCounter() const {
-    return hotness_count_;
-  }
+  void CalculateAndSetImtIndex() REQUIRES_SHARED(Locks::mutator_lock_);
 
   static constexpr MemberOffset HotnessCountOffset() {
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, hotness_count_));
@@ -772,9 +761,14 @@
   // ifTable.
   uint16_t method_index_;
 
-  // The hotness we measure for this method. Not atomic, as we allow
-  // missing increments: if the method is hot, we will see it eventually.
-  uint16_t hotness_count_;
+  union {
+    // Non-abstract methods: The hotness we measure for this method. Not atomic,
+    // as we allow missing increments: if the method is hot, we will see it eventually.
+    uint16_t hotness_count_;
+    // Abstract methods: IMT index (bitwise negated) or zero if it was not cached.
+    // The negation is needed to distinguish zero index and missing cached entry.
+    uint16_t imt_index_;
+  };
 
   // Fake padding field gets inserted here.
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 0c8fe58..e391a15 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -73,13 +73,17 @@
   // level lock, it is permitted to acquire a second one - with internal safeguards to ensure that
   // the second lock acquisition does not result in deadlock. This is implemented in the lock
   // order by treating the second acquisition of a kThreadWaitLock as a kThreadWaitWakeLock
-  // acquisition. Thus, acquiring kThreadWaitWakeLock requires holding kThreadWaitLock.
+  // acquisition. Thus, acquiring kThreadWaitWakeLock requires holding kThreadWaitLock. This entry
+  // is here near the bottom of the hierarchy because other locks should not be
+  // acquired while it is held. kThreadWaitLock cannot be moved here because GC
+  // activity acquires locks while holding the wait lock.
   kThreadWaitWakeLock,
-  kThreadWaitLock,
   kJdwpAdbStateLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
   kMarkSweepMarkStackLock,
+  // Can be held while GC related work is done, and thus must be above kMarkSweepMarkStackLock
+  kThreadWaitLock,
   kCHALock,
   kJitCodeCacheLock,
   kRosAllocGlobalLock,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 7e01137..6703205 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -135,7 +135,7 @@
   ObjPtr<mirror::Class> resolved_type =
       referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == nullptr)) {
-    resolved_type = DoResolveType(type_idx, referrer->GetDeclaringClass());
+    resolved_type = DoResolveType(type_idx, referrer);
   }
   return resolved_type;
 }
@@ -149,7 +149,7 @@
   ObjPtr<mirror::Class> resolved_type =
       referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == nullptr)) {
-    resolved_type = DoResolveType(type_idx, referrer->GetDeclaringClass());
+    resolved_type = DoResolveType(type_idx, referrer);
   }
   return resolved_type;
 }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 03cb95c..991faa2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3634,6 +3634,10 @@
         dex_file, dst->GetClassDef(), dex_method_idx);
   }
   dst->SetAccessFlags(access_flags);
+  // Must be done after SetAccessFlags since IsAbstract depends on it.
+  if (klass->IsInterface() && dst->IsAbstract()) {
+    dst->CalculateAndSetImtIndex();
+  }
 }
 
 void ClassLinker::AppendToBootClassPath(Thread* self, const DexFile& dex_file) {
@@ -6723,7 +6727,7 @@
       // or interface methods in the IMT here they will not create extra conflicts since we compare
       // names and signatures in SetIMTRef.
       ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-      const uint32_t imt_index = ImTable::GetImtIndex(interface_method);
+      const uint32_t imt_index = interface_method->GetImtIndex();
 
       // There is only any conflicts if all of the interface methods for an IMT slot don't have
       // the same implementation method, keep track of this to avoid creating a conflict table in
@@ -6777,7 +6781,7 @@
         }
         DCHECK(implementation_method != nullptr);
         ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size_);
-        const uint32_t imt_index = ImTable::GetImtIndex(interface_method);
+        const uint32_t imt_index = interface_method->GetImtIndex();
         if (!imt[imt_index]->IsRuntimeMethod() ||
             imt[imt_index] == unimplemented_method ||
             imt[imt_index] == imt_conflict_method) {
@@ -7703,7 +7707,7 @@
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
         MethodNameAndSignatureComparator interface_name_comparator(
             interface_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-        uint32_t imt_index = ImTable::GetImtIndex(interface_method);
+        uint32_t imt_index = interface_method->GetImtIndex();
         ArtMethod** imt_ptr = &out_imt[imt_index];
         // For each method listed in the interface's method list, find the
         // matching method in our class's method list.  We want to favor the
@@ -8180,14 +8184,22 @@
   return type;
 }
 
-ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
-                                                 ObjPtr<mirror::Class> referrer) {
+template <typename T>
+ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx, T referrer) {
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
   return DoResolveType(type_idx, dex_cache, class_loader);
 }
 
+// Instantiate the above.
+template ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
+                                                          ArtField* referrer);
+template ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
+                                                          ArtMethod* referrer);
+template ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
+                                                          ObjPtr<mirror::Class> referrer);
+
 ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
                                                  Handle<mirror::DexCache> dex_cache,
                                                  Handle<mirror::ClassLoader> class_loader) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 47931fe..15a7204 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -931,8 +931,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Implementation of ResolveType() called when the type was not found in the dex cache.
-  ObjPtr<mirror::Class> DoResolveType(dex::TypeIndex type_idx,
-                                      ObjPtr<mirror::Class> referrer)
+  template <typename T>
+  ObjPtr<mirror::Class> DoResolveType(dex::TypeIndex type_idx, T referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
   ObjPtr<mirror::Class> DoResolveType(dex::TypeIndex type_idx,
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 2236e61..a18cca4 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -533,13 +533,7 @@
       UNREACHABLE();
     }
     case kInterface: {
-      size_t imt_index;
-      InterpreterCache* tls_cache = self->GetInterpreterCache();
-      if (UNLIKELY(!tls_cache->Get(resolved_method, &imt_index))) {
-        imt_index = ImTable::GetImtIndex(resolved_method);
-        tls_cache->Set(resolved_method, imt_index);
-      }
-      DCHECK_EQ(imt_index, ImTable::GetImtIndex(resolved_method));
+      size_t imt_index = resolved_method->GetImtIndex();
       PointerSize pointer_size = class_linker->GetImagePointerSize();
       ObjPtr<mirror::Class> klass = (*this_object)->GetClass();
       ArtMethod* imt_method = klass->GetImt(pointer_size)->Get(imt_index, pointer_size);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1472490..b6adcf0 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2671,7 +2671,7 @@
 
   DCHECK(!interface_method->IsRuntimeMethod());
   // Look whether we have a match in the ImtConflictTable.
-  uint32_t imt_index = ImTable::GetImtIndex(interface_method);
+  uint32_t imt_index = interface_method->GetImtIndex();
   ArtMethod* conflict_method = imt->Get(imt_index, kRuntimePointerSize);
   if (LIKELY(conflict_method->IsRuntimeMethod())) {
     ImtConflictTable* current_table = conflict_method->GetImtConflictTable(kRuntimePointerSize);
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f451978..040a8c5 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <setjmp.h>
-
 #include <memory>
 
 #include "base/macros.h"
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 1ab0b0e..5c2830d 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -16,7 +16,6 @@
 
 #include "fault_handler.h"
 
-#include <setjmp.h>
 #include <string.h>
 #include <sys/mman.h>
 #include <sys/ucontext.h>
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 02eeefe..24ef0b1 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -18,7 +18,6 @@
 #ifndef ART_RUNTIME_FAULT_HANDLER_H_
 #define ART_RUNTIME_FAULT_HANDLER_H_
 
-#include <setjmp.h>
 #include <signal.h>
 #include <stdint.h>
 
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index b39628b..e66a174 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -162,9 +162,9 @@
     }
     default: {
       UNIMPLEMENTED(FATAL) << "Invalid type " << type;
+      UNREACHABLE();
     }
   }
-  return nullptr;
 }
 
 TEST_F(ModUnionTableTest, TestCardCache) {
diff --git a/runtime/image.cc b/runtime/image.cc
index 3023cef..f50c39c 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '6', '9', '\0' };  // Remove boot oat extents.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '0', '\0' };  // Store ImtIndex.
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
index 355058f..003ea6c 100644
--- a/runtime/interpreter/interpreter_cache.h
+++ b/runtime/interpreter/interpreter_cache.h
@@ -38,7 +38,6 @@
 //   iget/iput: The field offset. The field must be non-volatile.
 //   sget/sput: The ArtField* pointer. The field must be non-volitile.
 //   invoke: The ArtMethod* pointer (before vtable indirection, etc).
-//   ArtMethod*: The ImtIndex of the method.
 //
 // We ensure consistency of the cache by clearing it
 // whenever any dex file is unloaded.
diff --git a/runtime/interpreter/mterp/arm/arithmetic.S b/runtime/interpreter/mterp/arm/arithmetic.S
index 7a373c7..a6ba454 100644
--- a/runtime/interpreter/mterp/arm/arithmetic.S
+++ b/runtime/interpreter/mterp/arm/arithmetic.S
@@ -157,8 +157,8 @@
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     .if $chkzero
     orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
     beq     common_errDivideByZero
@@ -168,7 +168,7 @@
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    SET_VREG_WIDE_BY_ADDR $result0,$result1,r9  @ vAA/vAA+1<,  $result0/$result1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 14-17 instructions */
 
@@ -192,8 +192,8 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
-    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r1    @ r2/r3<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     .if $chkzero
     orrs    ip, r2, r3                  @ second arg (r2-r3) is zero?
     beq     common_errDivideByZero
@@ -203,7 +203,7 @@
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ result<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {$result0,$result1}     @ vAA/vAA+1<- $result0/$result1
+    SET_VREG_WIDE_BY_ADDR $result0,$result1,r9  @ vAA/vAA+1<- $result0/$result1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 12-15 instructions */
 
@@ -243,7 +243,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ r0<- op, r0-r3 changed
@@ -265,13 +265,13 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
     $instr                              @ r0/r1<- op, r2-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 10-11 instructions */
 
@@ -293,7 +293,7 @@
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ r0<- op, r0-r3 changed
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vA/vA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vA/vA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
     /* 9-10 instructions */
 
@@ -345,8 +345,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     cmp     r0, r2
     sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
     mov     ip, #0
@@ -541,8 +541,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
-    ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
-    ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r3    @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @ ip<- ZxW
     umull   r1, lr, r2, r0              @ r1/lr <- ZxX
     mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
@@ -552,7 +552,7 @@
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    SET_VREG_WIDE_BY_ADDR r1, r2 , r0   @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_mul_long_2addr():
@@ -569,8 +569,8 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
-    ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
-    ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r1    @ r2/r3<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, rINST @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @ ip<- ZxW
     umull   r1, lr, r2, r0              @ r1/lr <- ZxX
     mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
@@ -578,7 +578,7 @@
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    SET_VREG_WIDE_BY_ADDR r1, r2, r0    @ vAA/vAA+1<- r1/r2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_neg_int():
@@ -781,7 +781,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -793,7 +793,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shl_long_2addr():
@@ -808,7 +808,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
@@ -817,7 +817,7 @@
     movpl   r1, r0, asl ip              @ if r2 >= 32, r1<- r0 << (r2-32)
     mov     r0, r0, asl r2              @ r0<- r0 << r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shr_int():
@@ -843,7 +843,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -855,7 +855,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_shr_long_2addr():
@@ -870,7 +870,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -879,7 +879,7 @@
     movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
     mov     r1, r1, asr r2              @ r1<- r1 >> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_sub_int():
@@ -917,7 +917,7 @@
     mov     r0, r0, lsr #8              @ r0<- CC
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
-    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
@@ -929,7 +929,7 @@
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_ushr_long_2addr():
@@ -944,7 +944,7 @@
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r9    @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -953,7 +953,7 @@
     movpl   r0, r1, lsr ip              @ if r2 >= 32, r0<-r1 >>> (r2-32)
     mov     r1, r1, lsr r2              @ r1<- r1 >>> r2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA/vAA+1<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_xor_int():
diff --git a/runtime/interpreter/mterp/arm/array.S b/runtime/interpreter/mterp/arm/array.S
index 88d89c5..7b3db61 100644
--- a/runtime/interpreter/mterp/arm/array.S
+++ b/runtime/interpreter/mterp/arm/array.S
@@ -87,7 +87,7 @@
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
+    SET_VREG_WIDE_BY_ADDR r2, r3, r9    @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_aput(store="str", shift="2", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET"):
@@ -169,7 +169,7 @@
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r2, r3, r9    @ r2/r3<- vAA/vAA+1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     strd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/control_flow.S b/runtime/interpreter/mterp/arm/control_flow.S
index 51832e1..2299ef9 100644
--- a/runtime/interpreter/mterp/arm/control_flow.S
+++ b/runtime/interpreter/mterp/arm/control_flow.S
@@ -189,7 +189,7 @@
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
-    ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
+    GET_VREG_WIDE_BY_ADDR r0, r1, r2    @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
 
 %def op_sparse_switch():
diff --git a/runtime/interpreter/mterp/arm/floating_point.S b/runtime/interpreter/mterp/arm/floating_point.S
index 21c386e..9e4d00c 100644
--- a/runtime/interpreter/mterp/arm/floating_point.S
+++ b/runtime/interpreter/mterp/arm/floating_point.S
@@ -13,8 +13,8 @@
     and     r2, r0, #255                @ r2<- BB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
-    flds    s1, [r3]                    @ s1<- vCC
-    flds    s0, [r2]                    @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
 
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $instr                              @ s2<- op
@@ -35,12 +35,12 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
-    flds    s1, [r3]                    @ s1<- vB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    flds    s0, [r9]                    @ s0<- vA
+    GET_VREG_FLOAT_BY_ADDR s0, r9       @ s0<- vA
     $instr                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    fsts    s2, [r9]                    @ vAA<- s2 No need to clear as it's 2addr
+    SET_VREG_FLOAT_BY_ADDR s2, r9       @ vAA<- s2 No need to clear as it's 2addr
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def fbinopWide(instr=""):
@@ -58,14 +58,14 @@
     and     r2, r0, #255                @ r2<- BB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
-    fldd    d1, [r3]                    @ d1<- vCC
-    fldd    d0, [r2]                    @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     $instr                              @ s2<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vAA
-    fstd    d2, [r9]                    @ vAA<- d2
+    SET_VREG_DOUBLE_BY_ADDR d2, r9      @ vAA<- d2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def fbinopWide2addr(instr=""):
@@ -82,13 +82,13 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
-    fldd    d1, [r3]                    @ d1<- vB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    fldd    d0, [r9]                    @ d0<- vA
+    GET_VREG_DOUBLE_BY_ADDR d0, r9      @ d0<- vA
     $instr                              @ d2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    fstd    d2, [r9]                    @ vAA<- d2
+    SET_VREG_DOUBLE_BY_ADDR d2, r9      @ vAA<- d2
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def funop(instr=""):
@@ -101,7 +101,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    flds    s0, [r3]                    @ s0<- vB
+    GET_VREG_FLOAT_BY_ADDR s0, r3       @ s0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ s1<- op
@@ -119,7 +119,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    fldd    d0, [r3]                    @ d0<- vB
+    GET_VREG_DOUBLE_BY_ADDR d0, r3      @ d0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ s0<- op
@@ -137,14 +137,14 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    flds    s0, [r3]                    @ s0<- vB
+    GET_VREG_FLOAT_BY_ADDR s0, r3       @ s0<- vB
     ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $instr                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
-    fstd    d0, [r9]                    @ vA<- d0
+    SET_VREG_DOUBLE_BY_ADDR d0, r9      @ vA<- d0
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_add_double():
@@ -183,8 +183,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    fldd    d0, [r2]                    @ d0<- vBB
-    fldd    d1, [r3]                    @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
     vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
@@ -219,8 +219,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    flds    s0, [r2]                    @ s0<- vBB
-    flds    s1, [r3]                    @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
     vcmpe.f32 s0, s1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mov     r0, #1                      @ r0<- 1 (default)
@@ -255,8 +255,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    fldd    d0, [r2]                    @ d0<- vBB
-    fldd    d1, [r3]                    @ d1<- vCC
+    GET_VREG_DOUBLE_BY_ADDR d0, r2      @ d0<- vBB
+    GET_VREG_DOUBLE_BY_ADDR d1, r3      @ d1<- vCC
     vcmpe.f64 d0, d1                    @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
@@ -291,8 +291,8 @@
     mov     r3, r0, lsr #8              @ r3<- CC
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &vBB
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vCC
-    flds    s0, [r2]                    @ s0<- vBB
-    flds    s1, [r3]                    @ s1<- vCC
+    GET_VREG_FLOAT_BY_ADDR s0, r2       @ s0<- vBB
+    GET_VREG_FLOAT_BY_ADDR s1, r3       @ s1<- vCC
     vcmpe.f32  s0, s1                   @ compare (vBB, vCC)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     mvn     r0, #0                      @ r0<- -1 (default)
diff --git a/runtime/interpreter/mterp/arm/main.S b/runtime/interpreter/mterp/arm/main.S
index 6d6b190..4cf65d1 100644
--- a/runtime/interpreter/mterp/arm/main.S
+++ b/runtime/interpreter/mterp/arm/main.S
@@ -298,6 +298,25 @@
     add     \reg, rFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
 .endm
 
+.macro GET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+    ldmia \addr, {\reg0, \reg1}
+.endm
+.macro SET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+    stmia \addr, {\reg0, \reg1}
+.endm
+.macro GET_VREG_FLOAT_BY_ADDR reg, addr
+    flds \reg, [\addr]
+.endm
+.macro SET_VREG_FLOAT_BY_ADDR reg, addr
+    fsts \reg, [\addr]
+.endm
+.macro GET_VREG_DOUBLE_BY_ADDR reg, addr
+    fldd \reg, [\addr]
+.endm
+.macro SET_VREG_DOUBLE_BY_ADDR reg, addr
+    fstd \reg, [\addr]
+.endm
+
 /*
  * Refresh handler table.
  */
diff --git a/runtime/interpreter/mterp/arm/object.S b/runtime/interpreter/mterp/arm/object.S
index 092aa9e..7736383 100644
--- a/runtime/interpreter/mterp/arm/object.S
+++ b/runtime/interpreter/mterp/arm/object.S
@@ -160,7 +160,7 @@
     VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ fp[A]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_instance_of():
@@ -257,7 +257,7 @@
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
-    ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r0    @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/other.S b/runtime/interpreter/mterp/arm/other.S
index fcdde1e..31b9354 100644
--- a/runtime/interpreter/mterp/arm/other.S
+++ b/runtime/interpreter/mterp/arm/other.S
@@ -104,7 +104,7 @@
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r9, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r9    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_16():
@@ -116,7 +116,7 @@
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_32():
@@ -130,7 +130,7 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_const_wide_high16():
@@ -143,7 +143,7 @@
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r3, {r0-r1}                 @ vAA<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r3    @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_monitor_enter():
@@ -279,7 +279,7 @@
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[AA]<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -290,11 +290,11 @@
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r2, {r0-r1}                 @ fp[A]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[A]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_move_wide_16():
@@ -304,10 +304,10 @@
     FETCH r2, 1                         @ r2<- AAAA
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
     VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
-    stmia   lr, {r0-r1}                 @ fp[AAAA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, lr    @ fp[AAAA]<- r0/r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -318,11 +318,11 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
     VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
-    ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
+    GET_VREG_WIDE_BY_ADDR r0, r1, r3    @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r2, {r0-r1}                 @ fp[AA]<- r0/r1
+    SET_VREG_WIDE_BY_ADDR r0, r1, r2    @ fp[AA]<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
 
 %def op_nop():
diff --git a/runtime/interpreter/mterp/arm64/floating_point.S b/runtime/interpreter/mterp/arm64/floating_point.S
index 04ca694..ad42db3 100644
--- a/runtime/interpreter/mterp/arm64/floating_point.S
+++ b/runtime/interpreter/mterp/arm64/floating_point.S
@@ -1,5 +1,5 @@
 %def fbinop(instr=""):
-    /*:
+    /*
      * Generic 32-bit floating-point operation.
      *
      * For: add-float, sub-float, mul-float, div-float
@@ -15,7 +15,24 @@
     lsr     w1, wINST, #8               // r1<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG  s0, w1
+    SET_VREG_FLOAT s0, w1
+    GOTO_OPCODE ip                      // jump to next instruction
+
+%def fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2"):
+    /*
+     * Generic 64-bit floating-point operation.
+     */
+    /* binop vAA, vBB, vCC */
+    FETCH w0, 1                         // w0<- CCBB
+    lsr     w4, wINST, #8               // w4<- AA
+    lsr     w2, w0, #8                  // w2<- CC
+    and     w1, w0, #255                // w1<- BB
+    GET_VREG_DOUBLE $r2, w2             // w2<- vCC
+    GET_VREG_DOUBLE $r1, w1             // w1<- vBB
+    FETCH_ADVANCE_INST 2                // advance rPC, load rINST
+    $instr                              // $result<- op, w0-w4 changed
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_DOUBLE $result, w4         // vAA<- $result
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def fbinop2addr(instr=""):
@@ -34,7 +51,22 @@
     $instr                              // s2<- op
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG s2, w9
+    SET_VREG_FLOAT s2, w9
+    GOTO_OPCODE ip                      // jump to next instruction
+
+%def fbinopWide2addr(instr="fadd d0, d0, d1", r0="d0", r1="d1"):
+    /*
+     * Generic 64-bit floating point "/2addr" binary operation.
+     */
+    /* binop/2addr vA, vB */
+    lsr     w1, wINST, #12              // w1<- B
+    ubfx    w2, wINST, #8, #4           // w2<- A
+    GET_VREG_DOUBLE $r1, w1             // x1<- vB
+    GET_VREG_DOUBLE $r0, w2             // x0<- vA
+    FETCH_ADVANCE_INST 1                // advance rPC, load rINST
+    $instr                              // result<- op
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    SET_VREG_DOUBLE $r0, w2             // vAA<- result
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def fcmp(wide="", r1="s1", r2="s2", cond="lt"):
@@ -47,8 +79,13 @@
     lsr     w4, wINST, #8               // w4<- AA
     and     w2, w0, #255                // w2<- BB
     lsr     w3, w0, #8                  // w3<- CC
-    GET_VREG$wide $r1, w2
-    GET_VREG$wide $r2, w3
+%  if r1.startswith("d"):
+    GET_VREG_DOUBLE $r1, w2
+    GET_VREG_DOUBLE $r2, w3
+%  else:
+    GET_VREG $r1, w2
+    GET_VREG $r2, w3
+%  #endif
     fcmp $r1, $r2
     cset w0, ne
     cneg w0, w0, $cond
@@ -72,7 +109,7 @@
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG $tgtreg, w4                // vA<- d0
+    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopNarrower(srcreg="s0", tgtreg="d0", instr=""):
@@ -85,11 +122,15 @@
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
+%  if srcreg.startswith("d"):
+    GET_VREG_DOUBLE $srcreg, w3
+%  else:
     GET_VREG_WIDE $srcreg, w3
+%  #endif
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
-    SET_VREG $tgtreg, w4                // vA<- d0
+    SET_VREG_FLOAT $tgtreg, w4          // vA<- d0
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopWide(srcreg="s0", tgtreg="d0", instr=""):
@@ -102,11 +143,19 @@
     /* unop vA, vB */
     lsr     w3, wINST, #12              // w3<- B
     ubfx    w4, wINST, #8, #4           // w4<- A
+%  if srcreg.startswith("d"):
+    GET_VREG_DOUBLE $srcreg, w3
+%  else:
     GET_VREG_WIDE $srcreg, w3
+%  #endif
     FETCH_ADVANCE_INST 1                // advance rPC, load wINST
     $instr                              // d0<- op
     GET_INST_OPCODE ip                  // extract opcode from wINST
+%  if tgtreg.startswith("d"):
+    SET_VREG_DOUBLE $tgtreg, w4         // vA<- d0
+%  else:
     SET_VREG_WIDE $tgtreg, w4           // vA<- d0
+%  #endif
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def funopWider(srcreg="s0", tgtreg="d0", instr=""):
@@ -127,10 +176,10 @@
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def op_add_double():
-%  binopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fadd d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_add_double_2addr():
-%  binopWide2addr(instr="fadd     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fadd     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_add_float():
 %  fbinop(instr="fadd   s0, s0, s1")
@@ -151,10 +200,10 @@
 %  fcmp(wide="", r1="s1", r2="s2", cond="lt")
 
 %def op_div_double():
-%  binopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fdiv d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_div_double_2addr():
-%  binopWide2addr(instr="fdiv     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fdiv     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_div_float():
 %  fbinop(instr="fdiv   s0, s0, s1")
@@ -193,10 +242,10 @@
 %  funopNarrower(instr="scvtf s0, x0", srcreg="x0", tgtreg="s0")
 
 %def op_mul_double():
-%  binopWide(instr="fmul d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fmul d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_mul_double_2addr():
-%  binopWide2addr(instr="fmul     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fmul     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_mul_float():
 %  fbinop(instr="fmul   s0, s0, s1")
@@ -215,8 +264,8 @@
     FETCH w0, 1                         // w0<- CCBB
     lsr     w2, w0, #8                  // w2<- CC
     and     w1, w0, #255                // w1<- BB
-    GET_VREG_WIDE d1, w2                // d1<- vCC
-    GET_VREG_WIDE d0, w1                // d0<- vBB
+    GET_VREG_DOUBLE d1, w2              // d1<- vCC
+    GET_VREG_DOUBLE d0, w1              // d0<- vBB
     bl  fmod
     lsr     w4, wINST, #8               // w4<- AA
     FETCH_ADVANCE_INST 2                // advance rPC, load rINST
@@ -229,8 +278,8 @@
     /* rem vA, vB */
     lsr     w1, wINST, #12              // w1<- B
     ubfx    w2, wINST, #8, #4           // w2<- A
-    GET_VREG_WIDE d1, w1                // d1<- vB
-    GET_VREG_WIDE d0, w2                // d0<- vA
+    GET_VREG_DOUBLE d1, w1              // d1<- vB
+    GET_VREG_DOUBLE d0, w2              // d0<- vA
     bl fmod
     ubfx    w2, wINST, #8, #4           // w2<- A (need to reload - killed across call)
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
@@ -253,14 +302,14 @@
     ubfx    w9, wINST, #8, #4           // w9<- A
     FETCH_ADVANCE_INST 1                // advance rPC, load rINST
     GET_INST_OPCODE ip                  // extract opcode from rINST
-    SET_VREG s0, w9
+    SET_VREG_FLOAT s0, w9
     GOTO_OPCODE ip                      // jump to next instruction
 
 %def op_sub_double():
-%  binopWide(instr="fsub d0, d1, d2", result="d0", r1="d1", r2="d2")
+%  fbinopWide(instr="fsub d0, d1, d2", result="d0", r1="d1", r2="d2")
 
 %def op_sub_double_2addr():
-%  binopWide2addr(instr="fsub     d0, d0, d1", r0="d0", r1="d1")
+%  fbinopWide2addr(instr="fsub     d0, d0, d1", r0="d0", r1="d1")
 
 %def op_sub_float():
 %  fbinop(instr="fsub   s0, s0, s1")
diff --git a/runtime/interpreter/mterp/arm64/main.S b/runtime/interpreter/mterp/arm64/main.S
index 858cb38..0cfbbff 100644
--- a/runtime/interpreter/mterp/arm64/main.S
+++ b/runtime/interpreter/mterp/arm64/main.S
@@ -259,10 +259,13 @@
     str     \reg, [xFP, \vreg, uxtw #2]
     str     \reg, [xREFS, \vreg, uxtw #2]
 .endm
+.macro SET_VREG_FLOAT reg, vreg
+    str     \reg, [xFP, \vreg, uxtw #2]
+    str     wzr, [xREFS, \vreg, uxtw #2]
+.endm
 
 /*
  * Get/set the 64-bit value from a Dalvik register.
- * TUNING: can we do better here?
  */
 .macro GET_VREG_WIDE reg, vreg
     add     ip2, xFP, \vreg, lsl #2
@@ -274,6 +277,16 @@
     add     ip2, xREFS, \vreg, lsl #2
     str     xzr, [ip2]
 .endm
+.macro GET_VREG_DOUBLE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    ldr     \reg, [ip2]
+.endm
+.macro SET_VREG_DOUBLE reg, vreg
+    add     ip2, xFP, \vreg, lsl #2
+    str     \reg, [ip2]
+    add     ip2, xREFS, \vreg, lsl #2
+    str     xzr, [ip2]
+.endm
 
 /*
  * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit.
diff --git a/runtime/interpreter/mterp/x86/arithmetic.S b/runtime/interpreter/mterp/x86/arithmetic.S
index 3b5f0be..973e5b8 100644
--- a/runtime/interpreter/mterp/x86/arithmetic.S
+++ b/runtime/interpreter/mterp/x86/arithmetic.S
@@ -153,7 +153,7 @@
     movzbl  2(rPC), %eax                    # eax <- BB
     movzbl  3(rPC), %ecx                    # ecx <- CC
     GET_VREG %eax, %eax                     # eax <- vBB
-    $instr                                  # ex: addl    VREG_ADDRESS(%ecx),%eax
+    $instr VREG_ADDRESS(%ecx), %eax
     SET_VREG $result, rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -187,7 +187,7 @@
     sarl    $$4, rINST                      # rINST <- B
     GET_VREG %eax, rINST                    # eax <- vB
     andb    $$0xf, %cl                      # ecx <- A
-    $instr                                  # for ex: addl   %eax,VREG_ADDRESS(%ecx)
+    $instr %eax, VREG_ADDRESS(%ecx)
     CLEAR_REF %ecx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -240,8 +240,8 @@
     movl    rIBASE, LOCAL0(%esp)            # save rIBASE
     GET_VREG rIBASE, %eax                   # rIBASE <- v[BB+0]
     GET_VREG_HIGH %eax, %eax                # eax <- v[BB+1]
-    $instr1                                 # ex: addl   VREG_ADDRESS(%ecx),rIBASE
-    $instr2                                 # ex: adcl   VREG_HIGH_ADDRESS(%ecx),%eax
+    $instr1 VREG_ADDRESS(%ecx), rIBASE
+    $instr2 VREG_HIGH_ADDRESS(%ecx), %eax
     SET_VREG rIBASE, rINST                  # v[AA+0] <- rIBASE
     movl    LOCAL0(%esp), rIBASE            # restore rIBASE
     SET_VREG_HIGH %eax, rINST               # v[AA+1] <- eax
@@ -257,8 +257,8 @@
     GET_VREG %eax, %ecx                     # eax<- v[B+0]
     GET_VREG_HIGH %ecx, %ecx                # eax<- v[B+1]
     andb    $$0xF, rINSTbl                  # rINST<- A
-    $instr1                                 # ex: addl   %eax,(rFP,rINST,4)
-    $instr2                                 # ex: adcl   %ecx,4(rFP,rINST,4)
+    $instr1 %eax, VREG_ADDRESS(rINST)
+    $instr2 %ecx, VREG_HIGH_ADDRESS(rINST)
     CLEAR_WIDE_REF rINST
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -355,10 +355,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_add_int():
-%  binop(instr="addl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="addl")
 
 %def op_add_int_2addr():
-%  binop2addr(instr="addl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="addl")
 
 %def op_add_int_lit16():
 %  binopLit16(instr="addl    %ecx, %eax")
@@ -367,16 +367,16 @@
 %  binopLit8(instr="addl    %ecx, %eax")
 
 %def op_add_long():
-%  binopWide(instr1="addl    VREG_ADDRESS(%ecx), rIBASE", instr2="adcl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="addl", instr2="adcl")
 
 %def op_add_long_2addr():
-%  binopWide2addr(instr1="addl    %eax, (rFP,rINST,4)", instr2="adcl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="addl", instr2="adcl")
 
 %def op_and_int():
-%  binop(instr="andl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="andl")
 
 %def op_and_int_2addr():
-%  binop2addr(instr="andl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="andl")
 
 %def op_and_int_lit16():
 %  binopLit16(instr="andl    %ecx, %eax")
@@ -385,10 +385,10 @@
 %  binopLit8(instr="andl    %ecx, %eax")
 
 %def op_and_long():
-%  binopWide(instr1="andl    VREG_ADDRESS(%ecx), rIBASE", instr2="andl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="andl", instr2="andl")
 
 %def op_and_long_2addr():
-%  binopWide2addr(instr1="andl    %eax, (rFP,rINST,4)", instr2="andl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="andl", instr2="andl")
 
 %def op_cmp_long():
 /*
@@ -666,10 +666,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_or_int():
-%  binop(instr="orl     VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="orl")
 
 %def op_or_int_2addr():
-%  binop2addr(instr="orl     %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="orl")
 
 %def op_or_int_lit16():
 %  binopLit16(instr="orl     %ecx, %eax")
@@ -678,10 +678,10 @@
 %  binopLit8(instr="orl     %ecx, %eax")
 
 %def op_or_long():
-%  binopWide(instr1="orl     VREG_ADDRESS(%ecx), rIBASE", instr2="orl     VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="orl", instr2="orl")
 
 %def op_or_long_2addr():
-%  binopWide2addr(instr1="orl     %eax, (rFP,rINST,4)", instr2="orl     %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="orl", instr2="orl")
 
 %def op_rem_int():
 %  bindiv(result="rIBASE", special="$0", rem="1")
@@ -845,16 +845,16 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_sub_int():
-%  binop(instr="subl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="subl")
 
 %def op_sub_int_2addr():
-%  binop2addr(instr="subl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="subl")
 
 %def op_sub_long():
-%  binopWide(instr1="subl    VREG_ADDRESS(%ecx), rIBASE", instr2="sbbl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="subl", instr2="sbbl")
 
 %def op_sub_long_2addr():
-%  binopWide2addr(instr1="subl    %eax, (rFP,rINST,4)", instr2="sbbl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="subl", instr2="sbbl")
 
 %def op_ushr_int():
 %  binop1(instr="shrl    %cl, %eax")
@@ -925,10 +925,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_xor_int():
-%  binop(instr="xorl    VREG_ADDRESS(%ecx), %eax")
+%  binop(instr="xorl")
 
 %def op_xor_int_2addr():
-%  binop2addr(instr="xorl    %eax, VREG_ADDRESS(%ecx)")
+%  binop2addr(instr="xorl")
 
 %def op_xor_int_lit16():
 %  binopLit16(instr="xorl    %ecx, %eax")
@@ -937,7 +937,7 @@
 %  binopLit8(instr="xorl    %ecx, %eax")
 
 %def op_xor_long():
-%  binopWide(instr1="xorl    VREG_ADDRESS(%ecx), rIBASE", instr2="xorl    VREG_HIGH_ADDRESS(%ecx), %eax")
+%  binopWide(instr1="xorl", instr2="xorl")
 
 %def op_xor_long_2addr():
-%  binopWide2addr(instr1="xorl    %eax, (rFP,rINST,4)", instr2="xorl    %ecx, 4(rFP,rINST,4)")
+%  binopWide2addr(instr1="xorl", instr2="xorl")
diff --git a/runtime/interpreter/mterp/x86/floating_point.S b/runtime/interpreter/mterp/x86/floating_point.S
index 3de1fc8..bc7c59d 100644
--- a/runtime/interpreter/mterp/x86/floating_point.S
+++ b/runtime/interpreter/mterp/x86/floating_point.S
@@ -18,7 +18,7 @@
     /* op vAA, vBB, vCC */
     movzbl  3(rPC), %ecx                    # ecx<- CC
     movzbl  2(rPC), %eax                    # eax<- BB
-    movs${suff} VREG_ADDRESS(%eax), %xmm0
+    GET_VREG_XMM${suff} %xmm0, %eax
     xor     %eax, %eax
     ucomis${suff} VREG_ADDRESS(%ecx), %xmm0
     jp      .L${opcode}_nan_is_${nanval}
@@ -55,9 +55,9 @@
 %def sseBinop(instr="", suff=""):
     movzbl  2(rPC), %ecx                    # ecx <- BB
     movzbl  3(rPC), %eax                    # eax <- CC
-    movs${suff}   VREG_ADDRESS(%ecx), %xmm0  # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %ecx         # %xmm0 <- 1st src
     ${instr}${suff} VREG_ADDRESS(%eax), %xmm0
-    movs${suff}   %xmm0, VREG_ADDRESS(rINST) # vAA <- %xmm0
+    SET_VREG_XMM${suff} %xmm0, rINST        # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff}   %xmm0, VREG_REF_ADDRESS(rINST) # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -65,10 +65,10 @@
 %def sseBinop2Addr(instr="", suff=""):
     movzx   rINSTbl, %ecx                   # ecx <- A+
     andl    $$0xf, %ecx                     # ecx <- A
-    movs${suff} VREG_ADDRESS(%ecx), %xmm0      # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %ecx         # %xmm0 <- 1st src
     sarl    $$4, rINST                      # rINST<- B
     ${instr}${suff} VREG_ADDRESS(rINST), %xmm0
-    movs${suff} %xmm0, VREG_ADDRESS(%ecx)   # vAA<- %xmm0
+    SET_VREG_XMM${suff} %xmm0, %ecx         # vAA<- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff} %xmm0, VREG_REF_ADDRESS(rINST)  # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86/main.S b/runtime/interpreter/mterp/x86/main.S
index 0621fb4..6eaea6f 100644
--- a/runtime/interpreter/mterp/x86/main.S
+++ b/runtime/interpreter/mterp/x86/main.S
@@ -318,6 +318,19 @@
     movl    MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg)
 .endm
 
+.macro GET_VREG_XMMs _xmmreg _vreg
+    movss VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro GET_VREG_XMMd _xmmreg _vreg
+    movsd VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro SET_VREG_XMMs _xmmreg _vreg
+    movss \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+.macro SET_VREG_XMMd _xmmreg _vreg
+    movsd \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+
 /*
  * function support macros.
  */
diff --git a/runtime/interpreter/mterp/x86_64/arithmetic.S b/runtime/interpreter/mterp/x86_64/arithmetic.S
index 263f82b..ff64b53 100644
--- a/runtime/interpreter/mterp/x86_64/arithmetic.S
+++ b/runtime/interpreter/mterp/x86_64/arithmetic.S
@@ -137,7 +137,7 @@
     movzbq  2(rPC), %rax                    # rax <- BB
     movzbq  3(rPC), %rcx                    # rcx <- CC
     GET_VREG %eax, %rax                     # eax <- vBB
-    $instr                                  # ex: addl    VREG_ADDRESS(%rcx),%eax
+    $instr VREG_ADDRESS(%rcx),%eax
     SET_VREG $result, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -177,7 +177,7 @@
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
     GET_VREG %eax, rINSTq                   # eax <- vB
-    $instr                                  # for ex: addl   %eax,(rFP,%ecx,4)
+    $instr %eax, VREG_ADDRESS(%rcx)
     CLEAR_REF %rcx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -228,7 +228,7 @@
     movzbq  2(rPC), %rax                    # eax <- BB
     movzbq  3(rPC), %rcx                    # ecx <- CC
     GET_WIDE_VREG %rax, %rax                # rax <- v[BB]
-    $instr                                  # ex: addq   VREG_ADDRESS(%rcx),%rax
+    $instr VREG_ADDRESS(%rcx),%rax
     SET_WIDE_VREG %rax, rINSTq              # v[AA] <- rax
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
 
@@ -241,7 +241,7 @@
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
     GET_WIDE_VREG %rax, rINSTq              # rax <- vB
-    $instr                                  # for ex: addq   %rax,VREG_ADDRESS(%rcx)
+    $instr %rax,VREG_ADDRESS(%rcx)
     CLEAR_WIDE_REF %rcx
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
@@ -255,7 +255,7 @@
     movl    rINST, %ecx                     # rcx <- A+
     sarl    $$4, rINST                      # rINST <- B
     andb    $$0xf, %cl                      # ecx <- A
-    movs${fp_suffix}   VREG_ADDRESS(rINSTq), %xmm0
+    GET_VREG_XMM${fp_suffix} %xmm0, rINSTq
     mov${i_suffix}  ${max_const}, ${result_reg}
     cvtsi2s${fp_suffix}${i_suffix} ${result_reg}, %xmm1
     comis${fp_suffix}    %xmm1, %xmm0
@@ -317,10 +317,10 @@
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 %def op_add_int():
-%  binop(instr="addl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="addl")
 
 %def op_add_int_2addr():
-%  binop2addr(instr="addl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="addl")
 
 %def op_add_int_lit16():
 %  binopLit16(instr="addl    %ecx, %eax")
@@ -329,16 +329,16 @@
 %  binopLit8(instr="addl    %ecx, %eax")
 
 %def op_add_long():
-%  binopWide(instr="addq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="addq")
 
 %def op_add_long_2addr():
-%  binopWide2addr(instr="addq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="addq")
 
 %def op_and_int():
-%  binop(instr="andl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="andl")
 
 %def op_and_int_2addr():
-%  binop2addr(instr="andl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="andl")
 
 %def op_and_int_lit16():
 %  binopLit16(instr="andl    %ecx, %eax")
@@ -347,10 +347,10 @@
 %  binopLit8(instr="andl    %ecx, %eax")
 
 %def op_and_long():
-%  binopWide(instr="andq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="andq")
 
 %def op_and_long_2addr():
-%  binopWide2addr(instr="andq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="andq")
 
 %def op_cmp_long():
 /*
@@ -413,7 +413,7 @@
 %  op_move()
 
 %def op_mul_int():
-%  binop(instr="imull   VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="imull")
 
 %def op_mul_int_2addr():
     /* mul vA, vB */
@@ -432,7 +432,7 @@
 %  binopLit8(instr="imull   %ecx, %eax")
 
 %def op_mul_long():
-%  binopWide(instr="imulq   VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="imulq")
 
 %def op_mul_long_2addr():
     /* mul vA, vB */
@@ -457,10 +457,10 @@
 %  unop(instr="    notq    %rax", wide="1")
 
 %def op_or_int():
-%  binop(instr="orl     VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="orl")
 
 %def op_or_int_2addr():
-%  binop2addr(instr="orl     %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="orl")
 
 %def op_or_int_lit16():
 %  binopLit16(instr="orl     %ecx, %eax")
@@ -469,10 +469,10 @@
 %  binopLit8(instr="orl     %ecx, %eax")
 
 %def op_or_long():
-%  binopWide(instr="orq     VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="orq")
 
 %def op_or_long_2addr():
-%  binopWide2addr(instr="orq     %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="orq")
 
 %def op_rem_int():
 %  bindiv(result="%edx", second="%ecx", wide="0", suffix="l", rem="1")
@@ -530,16 +530,16 @@
 %  shop2addr(instr="sarq    %cl, %rax", wide="1")
 
 %def op_sub_int():
-%  binop(instr="subl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="subl")
 
 %def op_sub_int_2addr():
-%  binop2addr(instr="subl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="subl")
 
 %def op_sub_long():
-%  binopWide(instr="subq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="subq")
 
 %def op_sub_long_2addr():
-%  binopWide2addr(instr="subq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="subq")
 
 %def op_ushr_int():
 %  binop1(instr="shrl    %cl, %eax")
@@ -557,10 +557,10 @@
 %  shop2addr(instr="shrq    %cl, %rax", wide="1")
 
 %def op_xor_int():
-%  binop(instr="xorl    VREG_ADDRESS(%rcx), %eax")
+%  binop(instr="xorl")
 
 %def op_xor_int_2addr():
-%  binop2addr(instr="xorl    %eax, VREG_ADDRESS(%rcx)")
+%  binop2addr(instr="xorl")
 
 %def op_xor_int_lit16():
 %  binopLit16(instr="xorl    %ecx, %eax")
@@ -569,7 +569,7 @@
 %  binopLit8(instr="xorl    %ecx, %eax")
 
 %def op_xor_long():
-%  binopWide(instr="xorq    VREG_ADDRESS(%rcx), %rax")
+%  binopWide(instr="xorq")
 
 %def op_xor_long_2addr():
-%  binopWide2addr(instr="xorq    %rax, VREG_ADDRESS(%rcx)")
+%  binopWide2addr(instr="xorq")
diff --git a/runtime/interpreter/mterp/x86_64/floating_point.S b/runtime/interpreter/mterp/x86_64/floating_point.S
index b40c0e6..7fcb742 100644
--- a/runtime/interpreter/mterp/x86_64/floating_point.S
+++ b/runtime/interpreter/mterp/x86_64/floating_point.S
@@ -18,7 +18,7 @@
     /* op vAA, vBB, vCC */
     movzbq  3(rPC), %rcx                    # ecx<- CC
     movzbq  2(rPC), %rax                    # eax<- BB
-    movs${suff} VREG_ADDRESS(%rax), %xmm0
+    GET_VREG_XMM${suff} %xmm0, %rax
     xor     %eax, %eax
     ucomis${suff} VREG_ADDRESS(%rcx), %xmm0
     jp      .L${opcode}_nan_is_${nanval}
@@ -44,10 +44,10 @@
     andb    $$0xf, %cl                      # ecx <- A
     cvts${source_suffix}2s${dest_suffix}    VREG_ADDRESS(rINSTq), %xmm0
     .if $wide
-    movsd   %xmm0, VREG_ADDRESS(%rcx)
+    SET_VREG_XMMd %xmm0, %rcx
     CLEAR_WIDE_REF %rcx
     .else
-    movss   %xmm0, VREG_ADDRESS(%rcx)
+    SET_VREG_XMMs %xmm0, %rcx
     CLEAR_REF %rcx
     .endif
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
@@ -55,9 +55,9 @@
 %def sseBinop(instr="", suff=""):
     movzbq  2(rPC), %rcx                    # ecx <- BB
     movzbq  3(rPC), %rax                    # eax <- CC
-    movs${suff}   VREG_ADDRESS(%rcx), %xmm0       # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %rcx         # %xmm0 <- 1st src
     ${instr}${suff} VREG_ADDRESS(%rax), %xmm0
-    movs${suff}   %xmm0, VREG_ADDRESS(rINSTq)     # vAA <- %xmm0
+    SET_VREG_XMM${suff} %xmm0, rINSTq       # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff}   %xmm0, VREG_REF_ADDRESS(rINSTq) # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -65,10 +65,10 @@
 %def sseBinop2Addr(instr="", suff=""):
     movl    rINST, %ecx                     # ecx <- A+
     andl    $$0xf, %ecx                     # ecx <- A
-    movs${suff} VREG_ADDRESS(%rcx), %xmm0        # %xmm0 <- 1st src
+    GET_VREG_XMM${suff} %xmm0, %rcx         # %xmm0 <- 1st src
     sarl    $$4, rINST                      # rINST<- B
     ${instr}${suff} VREG_ADDRESS(rINSTq), %xmm0
-    movs${suff} %xmm0, VREG_ADDRESS(%rcx)   # vAA<- %xmm0
+    SET_VREG_XMM${suff} %xmm0, %rcx         # vAA <- %xmm0
     pxor    %xmm0, %xmm0
     movs${suff} %xmm0, VREG_REF_ADDRESS(rINSTq)  # clear ref
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/main.S b/runtime/interpreter/mterp/x86_64/main.S
index 4609067..5900220 100644
--- a/runtime/interpreter/mterp/x86_64/main.S
+++ b/runtime/interpreter/mterp/x86_64/main.S
@@ -306,6 +306,19 @@
     movl    MACRO_LITERAL(0), VREG_REF_HIGH_ADDRESS(\_vreg)
 .endm
 
+.macro GET_VREG_XMMs _xmmreg _vreg
+    movss VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro GET_VREG_XMMd _xmmreg _vreg
+    movsd VREG_ADDRESS(\_vreg), \_xmmreg
+.endm
+.macro SET_VREG_XMMs _xmmreg _vreg
+    movss \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+.macro SET_VREG_XMMd _xmmreg _vreg
+    movsd \_xmmreg, VREG_ADDRESS(\_vreg)
+.endm
+
 /*
  * function support macros.
  */
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 877e030..4a3ef07 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -630,8 +630,11 @@
 
 void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count, bool with_backedges) {
   if (thread_pool_ == nullptr) {
-    // Should only see this when shutting down or starting up.
-    DCHECK(Runtime::Current()->IsShuttingDown(self) || !Runtime::Current()->IsFinishedStarting());
+    // Should only see this when shutting down, starting up, or in zygote, which doesn't
+    // have a thread pool.
+    DCHECK(Runtime::Current()->IsShuttingDown(self) ||
+           !Runtime::Current()->IsFinishedStarting() ||
+           Runtime::Current()->IsZygote());
     return;
   }
   if (IgnoreSamplesForMethod(method)) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index ff39a66..28978c5 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -892,7 +892,8 @@
   }
 }
 
-static void ClearMethodCounter(ArtMethod* method, bool was_warm) {
+static void ClearMethodCounter(ArtMethod* method, bool was_warm)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (was_warm) {
     method->SetPreviouslyWarm();
   }
@@ -1128,7 +1129,7 @@
     return false;
   }
 
-  method->ClearCounter();
+  method->SetCounter(0);
   Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
       method, GetQuickToInterpreterBridge());
   VLOG(jit)
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index e3248ea..c8d4728 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -362,7 +362,7 @@
       }
       // Visit all of the methods in the class to see which ones were executed.
       for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
-        if (!method.IsNative()) {
+        if (!method.IsNative() && !method.IsAbstract()) {
           DCHECK(!method.IsProxyMethod());
           const uint16_t counter = method.GetCounter();
           // Mark startup methods as hot if they have more than hot_method_sample_threshold
diff --git a/runtime/thread.h b/runtime/thread.h
index b304cef..ccde236 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -17,8 +17,6 @@
 #ifndef ART_RUNTIME_THREAD_H_
 #define ART_RUNTIME_THREAD_H_
 
-#include <setjmp.h>
-
 #include <atomic>
 #include <bitset>
 #include <deque>
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index c713aa4..141a089 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -15,14 +15,13 @@
  */
 
 import junit.framework.Assert;
-import java.lang.reflect.Method;
 import java.util.Locale;
 
 /**
  * more string tests
  */
 public class Main {
-    public static void main(String args[]) throws Exception {
+    public static void main(String args[]) {
         String test = "0123456789";
         String test1 = new String("0123456789");    // different object
         String test2 = new String("0123456780");    // different value
@@ -86,9 +85,7 @@
         Assert.assertEquals("this is a path", test.replaceAll("/", " "));
         Assert.assertEquals("this is a path", test.replace("/", " "));
 
-        Class<?> Strings = Class.forName("com.android.org.bouncycastle.util.Strings");
-        Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
-        String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
+        String result = new String(new char[] { 'O', 'K' });
         System.out.println(result);
 
         testCompareToAndEquals();