Merge "ART: Use StackReference in Quick Stack Frame"
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 9927fe1..8f4eddb 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -60,7 +60,7 @@
     } else {
       method = c->FindVirtualMethod(method_name, method_sig);
     }
-    ASSERT_TRUE(method != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig;
     if (method->GetEntryPointFromQuickCompiledCode() == nullptr) {
       ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr);
       CompileMethod(method);
@@ -88,16 +88,16 @@
     // JNI operations after runtime start.
     env_ = Thread::Current()->GetJniEnv();
     jklass_ = env_->FindClass("MyClassNatives");
-    ASSERT_TRUE(jklass_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig;
 
     if (direct) {
       jmethod_ = env_->GetStaticMethodID(jklass_, method_name, method_sig);
     } else {
       jmethod_ = env_->GetMethodID(jklass_, method_name, method_sig);
     }
-    ASSERT_TRUE(jmethod_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
 
-    if (native_fnptr != NULL) {
+    if (native_fnptr != nullptr) {
       JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
       ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
               << method_name << " " << method_sig;
@@ -107,7 +107,7 @@
 
     jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V");
     jobj_ = env_->NewObject(jklass_, constructor);
-    ASSERT_TRUE(jobj_ != NULL) << method_name << " " << method_sig;
+    ASSERT_TRUE(jobj_ != nullptr) << method_name << " " << method_sig;
   }
 
  public:
@@ -125,13 +125,14 @@
 jobject JniCompilerTest::jobj_;
 jobject JniCompilerTest::class_loader_;
 
+
 int gJava_MyClassNatives_foo_calls = 0;
 void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   Locks::mutator_lock_->AssertNotHeld(Thread::Current());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_foo_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -151,8 +152,8 @@
 
 TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "bar", "(I)I",
-               NULL /* calling through stub will link with &Java_MyClassNatives_bar */);
+  SetUpForTest(false, "bar", "(I)I", nullptr);
+  // calling through stub will link with &Java_MyClassNatives_bar
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
@@ -168,8 +169,8 @@
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntMethodThroughStub) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(true, "sbar", "(I)I",
-               NULL /* calling through stub will link with &Java_MyClassNatives_sbar */);
+  SetUpForTest(true, "sbar", "(I)I", nullptr);
+  // calling through stub will link with &Java_MyClassNatives_sbar
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
@@ -188,7 +189,7 @@
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooI_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -215,7 +216,7 @@
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooII_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -243,7 +244,7 @@
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -272,7 +273,7 @@
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooDD_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -302,7 +303,7 @@
   // 1 = thisObj
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_synchronized_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -329,7 +330,7 @@
   // 3 = this + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -353,28 +354,28 @@
                reinterpret_cast<void*>(&Java_MyClassNatives_fooIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooIOO_calls);
-  jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooIOO_calls);
 
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, jklass_);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, NULL, jklass_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, nullptr, jklass_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, NULL, jklass_);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooIOO_calls);
 
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, NULL);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, NULL);
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooIOO_calls);
-  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooIOO_calls);
 }
 
@@ -383,7 +384,7 @@
   // 1 = klass
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSII_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -407,7 +408,7 @@
   // 1 = klass
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSDD_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -437,7 +438,7 @@
   // 3 = klass + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -462,28 +463,28 @@
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSIOO_calls);
-  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooSIOO_calls);
 }
 
@@ -492,7 +493,7 @@
   // 3 = klass + y + z
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != NULL);
+  EXPECT_TRUE(klass != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSSIOO_calls++;
   ScopedObjectAccess soa(Thread::Current());
@@ -516,28 +517,28 @@
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSSIOO));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls);
-  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL);
+  jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(1, gJava_MyClassNatives_fooSSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(2, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(3, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(4, gJava_MyClassNatives_fooSSIOO_calls);
 
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
   EXPECT_EQ(5, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL);
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
   EXPECT_EQ(6, gJava_MyClassNatives_fooSSIOO_calls);
-  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL);
-  EXPECT_TRUE(env_->IsSameObject(NULL, result));
+  result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
+  EXPECT_TRUE(env_->IsSameObject(nullptr, result));
   EXPECT_EQ(7, gJava_MyClassNatives_fooSSIOO_calls);
 }
 
@@ -591,7 +592,7 @@
 
 jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
   if (i <= 0) {
-    // We want to check raw Object*/Array* below
+    // We want to check raw Object* / Array* below
     ScopedObjectAccess soa(env);
 
     // Build stack trace
@@ -599,7 +600,7 @@
     jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal);
     mirror::ObjectArray<mirror::StackTraceElement>* trace_array =
         soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array);
-    EXPECT_TRUE(trace_array != NULL);
+    EXPECT_TRUE(trace_array != nullptr);
     EXPECT_EQ(11, trace_array->GetLength());
 
     // Check stack trace entries have expected values
@@ -615,9 +616,9 @@
     return 0;
   } else {
     jclass jklass = env->FindClass("MyClassNatives");
-    EXPECT_TRUE(jklass != NULL);
+    EXPECT_TRUE(jklass != nullptr);
     jmethodID jmethod = env->GetMethodID(jklass, "fooI", "(I)I");
-    EXPECT_TRUE(jmethod != NULL);
+    EXPECT_TRUE(jmethod != nullptr);
 
     // Recurse with i - 1
     jint result = env->CallNonvirtualIntMethod(thisObj, jklass, jmethod, i - 1);
@@ -721,7 +722,7 @@
 
 TEST_F(JniCompilerTest, GetSinkPropertiesNative) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", NULL);
+  SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", nullptr);
   // This space intentionally left blank. Just testing compilation succeeds.
 }
 
@@ -804,7 +805,7 @@
 jfloat Java_MyClassNatives_checkFloats(JNIEnv* env, jobject thisObj, jfloat f1, jfloat f2) {
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
@@ -826,12 +827,12 @@
 }
 
 void Java_MyClassNatives_checkParameterAlign(JNIEnv* env, jobject thisObj, jint i1, jlong l1) {
-  /*EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());*/
+//  EXPECT_EQ(kNative, Thread::Current()->GetState());
+//  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
+//  EXPECT_TRUE(thisObj != nullptr);
+//  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
+//  ScopedObjectAccess soa(Thread::Current());
+//  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(i1, 1234);
   EXPECT_EQ(l1, INT64_C(0x12345678ABCDEF0));
 }
@@ -879,7 +880,7 @@
     jobject o248, jobject o249, jobject o250, jobject o251, jobject o252, jobject o253) {
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != NULL);
+  EXPECT_TRUE(thisObj != nullptr);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_GE(255U, Thread::Current()->NumStackReferences());
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 649a80f..f0c0ed7 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -143,9 +143,10 @@
 
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index ffd27ee..0a00d7d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -95,7 +95,7 @@
   CHECK(IsCurrentParamOnStack());
   FrameOffset result =
       FrameOffset(displacement_.Int32Value() +   // displacement
-                  kFramePointerSize +                 // Method*
+                  sizeof(StackReference<mirror::ArtMethod>) +  // Method ref
                   (itr_slots_ * sizeof(uint32_t)));  // offset into in args
   return result;
 }
@@ -196,9 +196,10 @@
 
 size_t Arm64JniCallingConvention::FrameSize() {
   // Method*, callee save area size, local reference segment state
-  size_t frame_data_size = ((1 + CalleeSaveRegisters().size()) * kFramePointerSize) + sizeof(uint32_t);
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 2a6e7d9..efc0b42 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -319,7 +319,8 @@
 
   // Position of handle scope and interior fields
   FrameOffset HandleScopeOffset() const {
-    return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_);  // above Method*
+    return FrameOffset(this->displacement_.Int32Value() + sizeof(StackReference<mirror::ArtMethod>));
+    // above Method reference
   }
 
   FrameOffset HandleScopeLinkOffset() const {
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 0402fe6..f7a7be7 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -147,9 +147,10 @@
 
 size_t MipsJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 97b4cdf..9bf7d0f 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -124,9 +124,10 @@
 
 size_t X86JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 4871c87..5febed2 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -96,7 +96,7 @@
 
 FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
   return FrameOffset(displacement_.Int32Value() +   // displacement
-                     kFramePointerSize +                 // Method*
+                     sizeof(StackReference<mirror::ArtMethod>) +  // Method ref
                      (itr_slots_ * sizeof(uint32_t)));  // offset into in args
 }
 
@@ -139,9 +139,10 @@
 
 size_t X86_64JniCallingConvention::FrameSize() {
   // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) +
+      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
-  size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount());
+  size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount());
   // Plus return value spill area size
   return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
 }
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 27188b2..009b227 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -530,7 +530,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsCoreRegister()) << scratch;
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsCoreRegister(), SP, base.Int32Value());
+  LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP, base.Int32Value());
   LoadFromOffset(scratch.AsCoreRegister(), scratch.AsCoreRegister(), offs.Int32Value());
   ___ Blr(reg_x(scratch.AsCoreRegister()));
 }
@@ -656,16 +656,17 @@
   // trashed by native code.
   ___ Mov(reg_x(ETR), reg_x(TR));
 
-  // Increate frame to required size - must be at least space to push Method*.
+  // Increase frame to required size - must be at least space to push StackReference<Method>.
   CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
   size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   IncreaseFrameSize(adjust);
 
-  // Write Method*.
-  StoreToOffset(X0, SP, 0);
+  // Write StackReference<Method>.
+  DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
+  StoreWToOffset(StoreOperandType::kStoreWord, W0, SP, 0);
 
   // Write out entry spills
-  int32_t offset = frame_size + kFramePointerSize;
+  int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>);
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
     if (reg.IsNoRegister()) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 0791c63..56c6536 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1411,10 +1411,12 @@
   }
   // return address then method on stack
   addl(ESP, Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
-                      kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
+                      sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
+                      kFramePointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    movl(Address(ESP, frame_size + kFramePointerSize + (i * kFramePointerSize)),
+    movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) +
+                 (i * kFramePointerSize)),
          entry_spills.at(i).AsX86().AsCpuRegister());
   }
 }
@@ -1422,7 +1424,8 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
+  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
+                      sizeof(StackReference<mirror::ArtMethod>)));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     popl(spill_regs.at(i).AsX86().AsCpuRegister());
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 0ede875..a14551c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -59,7 +59,6 @@
   EmitLabel(label, kSize);
 }
 
-
 void X86_64Assembler::pushq(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1652,8 +1651,12 @@
   }
   // return address then method on stack
   addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
-                                   kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
-  pushq(method_reg.AsX86_64().AsCpuRegister());
+                                   sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
+                                   kFramePointerSize /*return address*/));
+
+  DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>));
+  subq(CpuRegister(RSP), Immediate(4));
+  movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
@@ -1732,7 +1735,7 @@
 void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) {
   X86_64ManagedRegister src = msrc.AsX86_64();
   CHECK(src.IsCpuRegister());
-  movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
+  movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
 }
 
 void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) {
@@ -2070,7 +2073,7 @@
 
 void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
   CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
-  movq(scratch, Address(CpuRegister(RSP), base));
+  movl(scratch, Address(CpuRegister(RSP), base));
   call(Address(scratch, offset));
 }
 
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 45ff21f..5220dc3 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -337,30 +337,22 @@
 // The following tests are all for the running architecture. So we get away
 // with just including it and not undefining it every time.
 
-
 #if defined(__arm__)
 #include "arch/arm/asm_support_arm.h"
-#undef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_H_
 #elif defined(__aarch64__)
 #include "arch/arm64/asm_support_arm64.h"
-#undef ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
 #elif defined(__mips__)
 #include "arch/mips/asm_support_mips.h"
-#undef ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
 #elif defined(__i386__)
 #include "arch/x86/asm_support_x86.h"
-#undef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
 #elif defined(__x86_64__)
 #include "arch/x86_64/asm_support_x86_64.h"
-#undef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
 #else
   // This happens for the host test.
 #ifdef __LP64__
 #include "arch/x86_64/asm_support_x86_64.h"
-#undef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
 #else
 #include "arch/x86/asm_support_x86.h"
-#undef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_H_
 #endif
 #endif
 
@@ -436,4 +428,13 @@
 #endif
 }
 
+TEST_F(ArchTest, StackReferenceSize) {
+#if defined(STACK_REFERENCE_SIZE)
+  EXPECT_EQ(sizeof(StackReference<mirror::Object>),
+            static_cast<size_t>(STACK_REFERENCE_SIZE));
+#else
+  LOG(INFO) << "No expected StackReference Size #define found.";
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index e55885f..422e20cf 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -43,5 +43,7 @@
 
 // Expected size of a heap reference
 #define HEAP_REFERENCE_SIZE 4
+// Expected size of a stack reference
+#define STACK_REFERENCE_SIZE 4
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 97caa1f..28bf856 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -559,8 +559,9 @@
 
 .macro INVOKE_STUB_CREATE_FRAME
 
-SAVE_SIZE=5*8   // x4, x5, SP, LR & FP saved.
-SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
+SAVE_SIZE=6*8   // x4, x5, x19(wSUSPEND), SP, LR & FP saved.
+SAVE_SIZE_AND_METHOD=SAVE_SIZE+STACK_REFERENCE_SIZE
+
 
     mov x9, sp                             // Save stack pointer.
     .cfi_register sp,x9
@@ -574,8 +575,9 @@
     .cfi_def_cfa_register x10              // before this.
     .cfi_adjust_cfa_offset SAVE_SIZE
 
-    str x9, [x10, #32]                     // Save old stack pointer.
+    stp x9, x19, [x10, #32]                // Save old stack pointer and x19(wSUSPEND)
     .cfi_rel_offset sp, 32
+    .cfi_rel_offset x19, 40
 
     stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
     .cfi_rel_offset x4, 16
@@ -597,7 +599,7 @@
     // W2 - args length
     // X9 - destination address.
     // W10 - temporary
-    add x9, sp, #8                         // Destination address is bottom of stack + NULL.
+    add x9, sp, #4                         // Destination address is bottom of stack + NULL.
 
     // Use \@ to differentiate between macro invocations.
 .LcopyParams\@:
@@ -611,9 +613,12 @@
 
 .LendCopyParams\@:
 
-    // Store NULL into Method* at bottom of frame.
-    str xzr, [sp]
+    // Store NULL into StackReference<Method>* at bottom of frame.
+    str wzr, [sp]
 
+#if (STACK_REFERENCE_SIZE != 4)
+#error "STACK_REFERENCE_SIZE(ARM64) size not as expected."
+#endif
 .endm
 
 .macro INVOKE_STUB_CALL_AND_RETURN
@@ -651,7 +656,8 @@
     str x0, [x4]
 
 .Lexit_art_quick_invoke_stub\@:
-    ldr x2, [x29, #32]   // Restore stack pointer.
+    ldp x2, x19, [x29, #32]   // Restore stack pointer and x19.
+    .cfi_restore x19
     mov sp, x2
     .cfi_restore sp
 
@@ -687,7 +693,7 @@
  *  | uint32_t out[n-1]    |
  *  |    :      :          |        Outs
  *  | uint32_t out[0]      |
- *  | ArtMethod* NULL      | <- SP
+ *  | StackRef<ArtMethod>  | <- SP  value=null
  *  +----------------------+
  *
  * Outgoing registers:
@@ -1289,7 +1295,7 @@
     .extern \entrypoint
 ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    ldr    w1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x2, xSELF                  // pass Thread::Current
     mov    x3, sp                     // pass SP
     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
@@ -1303,7 +1309,7 @@
     .extern \entrypoint
 ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    ldr    w2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x3, xSELF                  // pass Thread::Current
     mov    x4, sp                     // pass SP
     bl     \entrypoint
@@ -1317,7 +1323,7 @@
     .extern \entrypoint
 ENTRY \name
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
-    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    ldr    w3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x4, xSELF                  // pass Thread::Current
     mov    x5, sp                     // pass SP
     bl     \entrypoint
@@ -1356,7 +1362,7 @@
 ENTRY art_quick_set64_static
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
     mov    x3, x1                     // Store value
-    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    ldr    w1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
     mov    x2, x3                     // Put value param
     mov    x3, xSELF                  // pass Thread::Current
     mov    x4, sp                     // pass SP
@@ -1420,7 +1426,7 @@
      * dex method index.
      */
 ENTRY art_quick_imt_conflict_trampoline
-    ldr    x0, [sp, #0]                                // load caller Method*
+    ldr    w0, [sp, #0]                                // load caller Method*
     ldr    w0, [x0, #METHOD_DEX_CACHE_METHODS_OFFSET]  // load dex_cache_resolved_methods
     add    x0, x0, #OBJECT_ARRAY_DATA_OFFSET           // get starting address of data
     ldr    w0, [x0, x12, lsl 2]                        // load the target method
@@ -1434,7 +1440,7 @@
     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
     cbz x0, 1f
     mov x9, x0              // Remember returned code pointer in x9.
-    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
+    ldr w0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
     br x9
 1:
@@ -1484,7 +1490,7 @@
  * | D2                |    float arg 3
  * | D1                |    float arg 2
  * | D0                |    float arg 1
- * | RDI/Method*       |  <- X0
+ * | Method*           | <- X0
  * #-------------------#
  * | local ref cookie  | // 4B
  * | handle scope size | // 4B
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index fac9883..44edd4b 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -225,7 +225,8 @@
         "cmp x1, x2\n\t"
         "b.ne 1f\n\t"
 
-        "mov %[fpr_result], #0\n\t"
+        "mov x2, #0\n\t"
+        "str x2, %[fpr_result]\n\t"
 
         // Finish up.
         "2:\n\t"
@@ -247,15 +248,16 @@
 
         // Failed fpr verification.
         "1:\n\t"
-        "mov %[fpr_result], #1\n\t"
+        "mov x2, #1\n\t"
+        "str x2, %[fpr_result]\n\t"
         "b 2b\n\t"                     // Goto finish-up
 
         // End
         "3:\n\t"
-        : [result] "=r" (result), [fpr_result] "=r" (fpr_result)
+        : [result] "=r" (result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-          [referrer] "r"(referrer)
+          [referrer] "r"(referrer), [fpr_result] "m" (fpr_result)
         : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index 29633fb..bff8501 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -41,5 +41,7 @@
 
 // Expected size of a heap reference
 #define HEAP_REFERENCE_SIZE 4
+// Expected size of a stack reference
+#define STACK_REFERENCE_SIZE 4
 
 #endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 971688d..48c33d5 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -384,16 +384,24 @@
     PUSH r9                       // Save r9/shorty*.
     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
     CFI_DEF_CFA_REGISTER(rbp)
+
     movl %edx, %r10d
-    addl LITERAL(64), %edx        // Reserve space for return addr, method*, rbp, r8 and r9 in frame.
+    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
+                                  // r8 and r9 in frame.
     andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
     subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
     subq %rdx, %rsp               // Reserve stack space for argument array.
-    movq LITERAL(0), (%rsp)       // Store NULL for method*
+
+#if (STACK_REFERENCE_SIZE != 4)
+#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
+#endif
+    movl LITERAL(0), (%rsp)       // Store NULL for method*
+
     movl %r10d, %ecx              // Place size of args in rcx.
     movq %rdi, %rax               // RAX := method to be called
     movq %rsi, %r11               // R11 := arg_array
-    leaq 8(%rsp), %rdi            // Rdi is pointing just above the method* in the stack arguments.
+    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
+                                  // stack arguments.
     // Copy arg array into stack.
     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
@@ -455,16 +463,24 @@
     PUSH r9                       // Save r9/shorty*.
     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
     CFI_DEF_CFA_REGISTER(rbp)
+
     movl %edx, %r10d
-    addl LITERAL(64), %edx        // Reserve space for return addr, method*, rbp, r8 and r9 in frame.
+    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
+                                  // r8 and r9 in frame.
     andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
     subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
     subq %rdx, %rsp               // Reserve stack space for argument array.
-    movq LITERAL(0), (%rsp)       // Store NULL for method*
+
+#if (STACK_REFERENCE_SIZE != 4)
+#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
+#endif
+    movl LITERAL(0), (%rsp)       // Store NULL for method*
+
     movl %r10d, %ecx              // Place size of args in rcx.
     movq %rdi, %rax               // RAX := method to be called
     movq %rsi, %r11               // R11 := arg_array
-    leaq 8(%rsp), %rdi            // Rdi is pointing just above the method* in the stack arguments.
+    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
+                                  // stack arguments.
     // Copy arg array into stack.
     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index 3fd4adc..b582abb 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -26,12 +26,12 @@
 }  // namespace mirror
 
 // Place a special frame at the TOS that will save the callee saves for the given type.
-static inline void FinishCalleeSaveFrameSetup(Thread* self, mirror::ArtMethod** sp,
+static inline void FinishCalleeSaveFrameSetup(Thread* self, StackReference<mirror::ArtMethod>* sp,
                                               Runtime::CalleeSaveType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Be aware the store below may well stomp on an incoming argument.
   Locks::mutator_lock_->AssertSharedHeld(self);
-  *sp = Runtime::Current()->GetCalleeSaveMethod(type);
+  sp->Assign(Runtime::Current()->GetCalleeSaveMethod(type));
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
 }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index ccc0f3d..3301254 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -27,32 +27,36 @@
 
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
 extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
-    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
-    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
-    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<true, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    mirror::ArtMethod** sp) \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
@@ -60,7 +64,7 @@
 } \
 extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \
     mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    mirror::ArtMethod** sp) \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, method, component_count, self, \
@@ -68,7 +72,7 @@
 } \
 extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    mirror::ArtMethod** sp) \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocArrayFromCode<true, instrumented_bool>(type_idx, method, component_count, self, \
@@ -76,7 +80,7 @@
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCode##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    mirror::ArtMethod** sp) \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   if (!instrumented_bool) { \
@@ -87,7 +91,7 @@
 } \
 extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
-    mirror::ArtMethod** sp) \
+    StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   if (!instrumented_bool) { \
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 6448045..47fb9d6 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -28,7 +28,7 @@
 
 namespace art {
 
-extern "C" void artDeoptimize(Thread* self, mirror::ArtMethod** sp)
+extern "C" void artDeoptimize(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   self->SetException(ThrowLocation(), Thread::GetDeoptimizationException());
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index ab428a5..53c9b97 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -28,7 +28,7 @@
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
                                                              mirror::ArtMethod* referrer,
                                                              Thread* self,
-                                                             mirror::ArtMethod** sp)
+                                                             StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
@@ -39,7 +39,8 @@
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
                                                     mirror::ArtMethod* referrer,
-                                                    Thread* self, mirror::ArtMethod** sp)
+                                                    Thread* self,
+                                                    StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
@@ -47,10 +48,9 @@
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-                                                                   mirror::ArtMethod* referrer,
-                                                                   Thread* self,
-                                                                   mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* referrer,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
@@ -59,7 +59,8 @@
 
 extern "C" mirror::String* artResolveStringFromCode(mirror::ArtMethod* referrer,
                                                     int32_t string_idx,
-                                                    Thread* self, mirror::ArtMethod** sp)
+                                                    Thread* self,
+                                                    StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return ResolveStringFromCode(referrer, string_idx);
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index c38a595..844367d 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -27,7 +27,7 @@
 
 extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
                                            mirror::ArtMethod* referrer,
-                                           Thread* self, mirror::ArtMethod** sp)
+                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int32_t));
@@ -44,7 +44,7 @@
 
 extern "C" uint64_t artGet64StaticFromCode(uint32_t field_idx,
                                            mirror::ArtMethod* referrer,
-                                           Thread* self, mirror::ArtMethod** sp)
+                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
                                           sizeof(int64_t));
@@ -61,7 +61,8 @@
 
 extern "C" mirror::Object* artGetObjStaticFromCode(uint32_t field_idx,
                                                    mirror::ArtMethod* referrer,
-                                                   Thread* self, mirror::ArtMethod** sp)
+                                                   Thread* self,
+                                                   StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
                                           sizeof(mirror::HeapReference<mirror::Object>));
@@ -79,7 +80,7 @@
 
 extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                              mirror::ArtMethod* referrer, Thread* self,
-                                             mirror::ArtMethod** sp)
+                                             StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int32_t));
@@ -102,7 +103,7 @@
 
 extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                              mirror::ArtMethod* referrer, Thread* self,
-                                             mirror::ArtMethod** sp)
+                                             StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
                                           sizeof(int64_t));
@@ -126,7 +127,7 @@
 extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                                      mirror::ArtMethod* referrer,
                                                      Thread* self,
-                                                     mirror::ArtMethod** sp)
+                                                     StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
                                           sizeof(mirror::HeapReference<mirror::Object>));
@@ -149,7 +150,7 @@
 
 extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
                                       mirror::ArtMethod* referrer, Thread* self,
-                                      mirror::ArtMethod** sp)
+                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int32_t));
@@ -169,7 +170,8 @@
 }
 
 extern "C" int artSet64StaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                      uint64_t new_value, Thread* self, mirror::ArtMethod** sp)
+                                      uint64_t new_value, Thread* self,
+                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
                                           sizeof(int64_t));
@@ -190,7 +192,7 @@
 
 extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
                                        mirror::ArtMethod* referrer, Thread* self,
-                                       mirror::ArtMethod** sp)
+                                       StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
@@ -214,7 +216,7 @@
 
 extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
                                         mirror::ArtMethod* referrer, Thread* self,
-                                        mirror::ArtMethod** sp)
+                                        StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int32_t));
@@ -240,13 +242,15 @@
 }
 
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint64_t new_value,
-                                        Thread* self, mirror::ArtMethod** sp)
+                                        Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   Runtime* runtime = Runtime::Current();
   mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
   uint32_t frame_size =
       runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes();
-  mirror::ArtMethod* referrer = sp[frame_size / sizeof(mirror::ArtMethod*)];
+  mirror::ArtMethod* referrer =
+      reinterpret_cast<StackReference<mirror::ArtMethod>*>(
+          reinterpret_cast<uint8_t*>(sp) + frame_size)->AsMirrorPtr();
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
                                           sizeof(int64_t));
   if (LIKELY(field != NULL  && obj != NULL)) {
@@ -254,7 +258,7 @@
     field->Set64<false>(obj, new_value);
     return 0;  // success
   }
-  *sp = callee_save;
+  sp->Assign(callee_save);
   self->SetTopOfStack(sp, 0);
   field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
                                                           sizeof(int64_t));
@@ -274,7 +278,7 @@
 extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                          mirror::Object* new_value,
                                          mirror::ArtMethod* referrer, Thread* self,
-                                         mirror::ArtMethod** sp)
+                                         StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
diff --git a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
index 8dac750..4ec2879 100644
--- a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
@@ -39,7 +39,7 @@
  */
 extern "C" int artHandleFillArrayDataFromCode(mirror::Array* array,
                                               const Instruction::ArrayDataPayload* payload,
-                                              Thread* self, mirror::ArtMethod** sp)
+                                              Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   DCHECK_EQ(payload->ident, static_cast<uint16_t>(Instruction::kArrayDataSignature));
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 11a4b3b..6ef075d 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -26,7 +26,7 @@
 extern "C" const void* artInstrumentationMethodEntryFromCode(mirror::ArtMethod* method,
                                                              mirror::Object* this_object,
                                                              Thread* self,
-                                                             mirror::ArtMethod** sp,
+                                                             StackReference<mirror::ArtMethod>* sp,
                                                              uintptr_t lr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
@@ -40,7 +40,8 @@
   return result;
 }
 
-extern "C" uint64_t artInstrumentationMethodExitFromCode(Thread* self, mirror::ArtMethod** sp,
+extern "C" uint64_t artInstrumentationMethodExitFromCode(Thread* self,
+                                                         StackReference<mirror::ArtMethod>* sp,
                                                          uint64_t gpr_result, uint64_t fpr_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // TODO: use FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly) not the hand inlined below.
@@ -50,7 +51,7 @@
   Locks::mutator_lock_->AssertSharedHeld(self);
   Runtime* runtime = Runtime::Current();
   mirror::ArtMethod* callee_save = runtime->GetCalleeSaveMethod(Runtime::kRefsOnly);
-  *sp = callee_save;
+  sp->Assign(callee_save);
   uint32_t return_pc_offset = callee_save->GetReturnPcOffsetInBytes(
       runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsOnly).FrameSizeInBytes());
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<byte*>(sp) +
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 5d36b4c..140b075 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -34,7 +34,7 @@
   DCHECK(env != nullptr);
   uint32_t saved_local_ref_cookie = env->local_ref_cookie;
   env->local_ref_cookie = env->locals.GetSegmentState();
-  mirror::ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+  mirror::ArtMethod* native_method = self->GetManagedStack()->GetTopQuickFrame()->AsMirrorPtr();
   if (!native_method->IsFastNative()) {
     // When not fast JNI we transition out of runnable.
     self->TransitionFromRunnableToSuspended(kNative);
@@ -49,7 +49,7 @@
 
 // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
 static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
-  mirror::ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+  mirror::ArtMethod* native_method = self->GetManagedStack()->GetTopQuickFrame()->AsMirrorPtr();
   bool is_fast = native_method->IsFastNative();
   if (!is_fast) {
     self->TransitionFromSuspendedToRunnable();
diff --git a/runtime/entrypoints/quick/quick_lock_entrypoints.cc b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
index 817d053..92c0841 100644
--- a/runtime/entrypoints/quick/quick_lock_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_lock_entrypoints.cc
@@ -20,7 +20,8 @@
 
 namespace art {
 
-extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+extern "C" int artLockObjectFromCode(mirror::Object* obj, Thread* self,
+                                     StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     NO_THREAD_SAFETY_ANALYSIS /* EXCLUSIVE_LOCK_FUNCTION(Monitor::monitor_lock_) */ {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
@@ -42,7 +43,8 @@
   }
 }
 
-extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self, mirror::ArtMethod** sp)
+extern "C" int artUnlockObjectFromCode(mirror::Object* obj, Thread* self,
+                                       StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     NO_THREAD_SAFETY_ANALYSIS /* UNLOCK_FUNCTION(Monitor::monitor_lock_) */ {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index 53e725e..f61c754 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -28,7 +28,7 @@
   CheckSuspend(thread);
 }
 
-extern "C" void artTestSuspendFromCode(Thread* thread, mirror::ArtMethod** sp)
+extern "C" void artTestSuspendFromCode(Thread* thread, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when suspend count check value is 0 and thread->suspend_count_ != 0
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 31eacac..e6f294a 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -25,7 +25,8 @@
 namespace art {
 
 // Deliver an exception that's pending on thread helping set up a callee save frame on the way.
-extern "C" void artDeliverPendingExceptionFromCode(Thread* thread, mirror::ArtMethod** sp)
+extern "C" void artDeliverPendingExceptionFromCode(Thread* thread,
+                                                   StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   thread->QuickDeliverException();
@@ -33,7 +34,7 @@
 
 // Called by generated call to throw an exception.
 extern "C" void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self,
-                                            mirror::ArtMethod** sp)
+                                            StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   /*
    * exception may be NULL, in which case this routine should
@@ -55,7 +56,7 @@
 
 // Called by generated call to throw a NPE exception.
 extern "C" void artThrowNullPointerExceptionFromCode(Thread* self,
-                                                     mirror::ArtMethod** sp)
+                                                     StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -64,8 +65,7 @@
 }
 
 // Called by generated call to throw an arithmetic divide by zero exception.
-extern "C" void artThrowDivZeroFromCode(Thread* self,
-                                        mirror::ArtMethod** sp)
+extern "C" void artThrowDivZeroFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowArithmeticExceptionDivideByZero();
@@ -74,14 +74,14 @@
 
 // Called by generated call to throw an array index out of bounds exception.
 extern "C" void artThrowArrayBoundsFromCode(int index, int length, Thread* self,
-                                            mirror::ArtMethod** sp)
+                                            StackReference<mirror::ArtMethod>*sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowArrayIndexOutOfBoundsException(index, length);
   self->QuickDeliverException();
 }
 
-extern "C" void artThrowStackOverflowFromCode(Thread* self, mirror::ArtMethod** sp)
+extern "C" void artThrowStackOverflowFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowStackOverflowError(self);
@@ -89,7 +89,7 @@
 }
 
 extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self,
-                                             mirror::ArtMethod** sp)
+                                             StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowNoSuchMethodError(method_idx);
@@ -97,7 +97,7 @@
 }
 
 extern "C" void artThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type,
-                                           Thread* self, mirror::ArtMethod** sp)
+                                           Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   CHECK(!dest_type->IsAssignableFrom(src_type));
@@ -106,7 +106,7 @@
 }
 
 extern "C" void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
-                                            Thread* self, mirror::ArtMethod** sp)
+                                            Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   ThrowArrayStoreException(value->GetClass(), array->GetClass());
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 554bff4..1d524cb 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -195,22 +195,22 @@
 #endif
 
  public:
-  static mirror::ArtMethod* GetCallingMethod(mirror::ArtMethod** sp)
+  static mirror::ArtMethod* GetCallingMethod(StackReference<mirror::ArtMethod>* sp)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK((*sp)->IsCalleeSaveMethod());
+    DCHECK(sp->AsMirrorPtr()->IsCalleeSaveMethod());
     byte* previous_sp = reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize;
-    return *reinterpret_cast<mirror::ArtMethod**>(previous_sp);
+    return reinterpret_cast<StackReference<mirror::ArtMethod>*>(previous_sp)->AsMirrorPtr();
   }
 
   // For the given quick ref and args quick frame, return the caller's PC.
-  static uintptr_t GetCallingPc(mirror::ArtMethod** sp)
+  static uintptr_t GetCallingPc(StackReference<mirror::ArtMethod>* sp)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK((*sp)->IsCalleeSaveMethod());
+    DCHECK(sp->AsMirrorPtr()->IsCalleeSaveMethod());
     byte* lr = reinterpret_cast<byte*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_LrOffset;
     return *reinterpret_cast<uintptr_t*>(lr);
   }
 
-  QuickArgumentVisitor(mirror::ArtMethod** sp, bool is_static,
+  QuickArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
                        const char* shorty, uint32_t shorty_len)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
       is_static_(is_static), shorty_(shorty), shorty_len_(shorty_len),
@@ -383,12 +383,12 @@
     if (kQuickSoftFloatAbi) {
       CHECK_EQ(kNumQuickFprArgs, 0U);
       return (kNumQuickGprArgs * GetBytesPerGprSpillLocation(kRuntimeISA))
-          + GetBytesPerGprSpillLocation(kRuntimeISA) /* ArtMethod* */;
+          + sizeof(StackReference<mirror::ArtMethod>) /* StackReference<ArtMethod> */;
     } else {
       // For now, there is no reg-spill area for the targets with
       // hard float ABI. So, the offset pointing to the first method's
       // parameter ('this' for non-static methods) should be returned.
-      return GetBytesPerGprSpillLocation(kRuntimeISA);  // Skip Method*.
+      return sizeof(StackReference<mirror::ArtMethod>);  // Skip StackReference<ArtMethod>.
     }
   }
 
@@ -410,8 +410,9 @@
 // Visits arguments on the stack placing them into the shadow frame.
 class BuildQuickShadowFrameVisitor FINAL : public QuickArgumentVisitor {
  public:
-  BuildQuickShadowFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
-                               uint32_t shorty_len, ShadowFrame* sf, size_t first_arg_reg) :
+  BuildQuickShadowFrameVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
+                               const char* shorty, uint32_t shorty_len, ShadowFrame* sf,
+                               size_t first_arg_reg) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sf_(sf), cur_reg_(first_arg_reg) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
@@ -457,7 +458,7 @@
 }
 
 extern "C" uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
-                                                mirror::ArtMethod** sp)
+                                                StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Ensure we don't get thread suspension until the object arguments are safely in the shadow
   // frame.
@@ -510,9 +511,9 @@
 // to jobjects.
 class BuildQuickArgumentVisitor FINAL : public QuickArgumentVisitor {
  public:
-  BuildQuickArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
-                            uint32_t shorty_len, ScopedObjectAccessUnchecked* soa,
-                            std::vector<jvalue>* args) :
+  BuildQuickArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
+                            const char* shorty, uint32_t shorty_len,
+                            ScopedObjectAccessUnchecked* soa, std::vector<jvalue>* args) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa), args_(args) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
@@ -577,7 +578,7 @@
 // field within the proxy object, which will box the primitive arguments and deal with error cases.
 extern "C" uint64_t artQuickProxyInvokeHandler(mirror::ArtMethod* proxy_method,
                                                mirror::Object* receiver,
-                                               Thread* self, mirror::ArtMethod** sp)
+                                               Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(proxy_method->IsProxyMethod()) << PrettyMethod(proxy_method);
   DCHECK(receiver->GetClass()->IsProxyClass()) << PrettyMethod(proxy_method);
@@ -585,7 +586,7 @@
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
-  DCHECK_EQ(*sp, proxy_method) << PrettyMethod(proxy_method);
+  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method);
   self->SetTopOfStack(sp, 0);
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
             Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
@@ -629,8 +630,9 @@
 // so they don't get garbage collected.
 class RememberForGcArgumentVisitor FINAL : public QuickArgumentVisitor {
  public:
-  RememberForGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty,
-                               uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) :
+  RememberForGcArgumentVisitor(StackReference<mirror::ArtMethod>* sp, bool is_static,
+                               const char* shorty, uint32_t shorty_len,
+                               ScopedObjectAccessUnchecked* soa) :
     QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {}
 
   void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
@@ -666,7 +668,8 @@
 // Lazily resolve a method for quick. Called by stub code.
 extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called,
                                                     mirror::Object* receiver,
-                                                    Thread* self, mirror::ArtMethod** sp)
+                                                    Thread* self,
+                                                    StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
   // Start new JNI local reference state
@@ -821,7 +824,7 @@
   // Fixup any locally saved objects may have moved during a GC.
   visitor.FixupReferences();
   // Place called method in callee-save frame to be placed as first argument to quick method.
-  *sp = called;
+  sp->Assign(called);
   return code;
 }
 
@@ -1171,14 +1174,14 @@
   }
 
   // WARNING: After this, *sp won't be pointing to the method anymore!
-  void ComputeLayout(mirror::ArtMethod*** m, bool is_static, const char* shorty, uint32_t shorty_len,
-                     void* sp, HandleScope** table, uint32_t* handle_scope_entries,
-                     uintptr_t** start_stack, uintptr_t** start_gpr, uint32_t** start_fpr,
-                     void** code_return, size_t* overall_size)
+  void ComputeLayout(StackReference<mirror::ArtMethod>** m, bool is_static, const char* shorty,
+                     uint32_t shorty_len, void* sp, HandleScope** table,
+                     uint32_t* handle_scope_entries, uintptr_t** start_stack, uintptr_t** start_gpr,
+                     uint32_t** start_fpr, void** code_return, size_t* overall_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ComputeAll(is_static, shorty, shorty_len);
 
-    mirror::ArtMethod* method = **m;
+    mirror::ArtMethod* method = (*m)->AsMirrorPtr();
 
     uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
 
@@ -1186,20 +1189,30 @@
     // We have to squeeze in the HandleScope, and relocate the method pointer.
 
     // "Free" the slot for the method.
-    sp8 += kPointerSize;
+    sp8 += kPointerSize;  // In the callee-save frame we use a full pointer.
 
-    // Add the HandleScope.
+    // Under the callee saves put handle scope and new method stack reference.
     *handle_scope_entries = num_handle_scope_references_;
-    size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSize(num_handle_scope_references_);
-    sp8 -= handle_scope_size;
-    *table = reinterpret_cast<HandleScope*>(sp8);
+
+    size_t handle_scope_size = HandleScope::SizeOf(num_handle_scope_references_);
+    size_t scope_and_method = handle_scope_size + sizeof(StackReference<mirror::ArtMethod>);
+
+    sp8 -= scope_and_method;
+    // Align by kStackAlignment
+    uintptr_t sp_to_align = reinterpret_cast<uintptr_t>(sp8);
+    sp_to_align = RoundDown(sp_to_align, kStackAlignment);
+    sp8 = reinterpret_cast<uint8_t*>(sp_to_align);
+
+    uint8_t* sp8_table = sp8 + sizeof(StackReference<mirror::ArtMethod>);
+    *table = reinterpret_cast<HandleScope*>(sp8_table);
     (*table)->SetNumberOfReferences(num_handle_scope_references_);
 
     // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
-    sp8 -= kPointerSize;
     uint8_t* method_pointer = sp8;
-    *(reinterpret_cast<mirror::ArtMethod**>(method_pointer)) = method;
-    *m = reinterpret_cast<mirror::ArtMethod**>(method_pointer);
+    StackReference<mirror::ArtMethod>* new_method_ref =
+        reinterpret_cast<StackReference<mirror::ArtMethod>*>(method_pointer);
+    new_method_ref->Assign(method);
+    *m = new_method_ref;
 
     // Reference cookie and padding
     sp8 -= 8;
@@ -1306,8 +1319,8 @@
 // of transitioning into native code.
 class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor {
  public:
-  BuildGenericJniFrameVisitor(mirror::ArtMethod*** sp, bool is_static, const char* shorty,
-                              uint32_t shorty_len, Thread* self) :
+  BuildGenericJniFrameVisitor(StackReference<mirror::ArtMethod>** sp, bool is_static,
+                              const char* shorty, uint32_t shorty_len, Thread* self) :
       QuickArgumentVisitor(*sp, is_static, shorty, shorty_len), sm_(this) {
     ComputeGenericJniFrameSize fsc;
     fsc.ComputeLayout(sp, is_static, shorty, shorty_len, *sp, &handle_scope_, &handle_scope_expected_refs_,
@@ -1320,7 +1333,7 @@
     sm_.AdvancePointer(self->GetJniEnv());
 
     if (is_static) {
-      sm_.AdvanceHandleScope((**sp)->GetDeclaringClass());
+      sm_.AdvanceHandleScope((*sp)->AsMirrorPtr()->GetDeclaringClass());
     }
   }
 
@@ -1488,9 +1501,9 @@
  * 1) How many bytes of the alloca can be released, if the value is non-negative.
  * 2) An error, if the value is negative.
  */
-extern "C" ssize_t artQuickGenericJniTrampoline(Thread* self, mirror::ArtMethod** sp)
+extern "C" ssize_t artQuickGenericJniTrampoline(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* called = *sp;
+  mirror::ArtMethod* called = sp->AsMirrorPtr();
   DCHECK(called->IsNative()) << PrettyMethod(called, true);
 
   // run the visitor
@@ -1562,17 +1575,18 @@
  * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and
  * unlocking.
  */
-extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMethod** sp,
+extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self,
+                                                    StackReference<mirror::ArtMethod>* sp,
                                                     jvalue result, uint64_t result_f)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
-  mirror::ArtMethod* called = *sp;
+  mirror::ArtMethod* called = sp->AsMirrorPtr();
   uint32_t cookie = *(sp32 - 1);
 
   jobject lock = nullptr;
   if (called->IsSynchronized()) {
     HandleScope* table = reinterpret_cast<HandleScope*>(
-        reinterpret_cast<uint8_t*>(sp) + kPointerSize);
+        reinterpret_cast<uint8_t*>(sp) + sizeof(StackReference<mirror::ArtMethod>));
     lock = table->GetHandle(0).ToJObject();
   }
 
@@ -1669,12 +1683,12 @@
 template<InvokeType type, bool access_check>
 static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
-                                     Thread* self, mirror::ArtMethod** sp);
+                                     Thread* self, StackReference<mirror::ArtMethod>* sp);
 
 template<InvokeType type, bool access_check>
 static MethodAndCode artInvokeCommon(uint32_t method_idx, mirror::Object* this_object,
                                      mirror::ArtMethod* caller_method,
-                                     Thread* self, mirror::ArtMethod** sp) {
+                                     Thread* self, StackReference<mirror::ArtMethod>* sp) {
   mirror::ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check,
                                              type);
   if (UNLIKELY(method == nullptr)) {
@@ -1714,7 +1728,8 @@
   MethodAndCode artInvokeCommon<type, access_check>(uint32_t method_idx,                        \
                                                     mirror::Object* this_object,                \
                                                     mirror::ArtMethod* caller_method,           \
-                                                    Thread* self, mirror::ArtMethod** sp)       \
+                                                    Thread* self,                               \
+                                                    StackReference<mirror::ArtMethod>* sp)      \
 
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false);
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true);
@@ -1731,48 +1746,43 @@
 
 // See comments in runtime_support_asm.S
 extern "C" MethodAndCode artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                     mirror::Object* this_object,
-                                                                     mirror::ArtMethod* caller_method,
-                                                                     Thread* self,
-                                                                     mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* this_object,
+    mirror::ArtMethod* caller_method,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kInterface, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 
 extern "C" MethodAndCode artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                  mirror::Object* this_object,
-                                                                  mirror::ArtMethod* caller_method,
-                                                                  Thread* self,
-                                                                  mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* this_object,
+    mirror::ArtMethod* caller_method,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kDirect, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 extern "C" MethodAndCode artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                  mirror::Object* this_object,
-                                                                  mirror::ArtMethod* caller_method,
-                                                                  Thread* self,
-                                                                  mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* this_object,
+    mirror::ArtMethod* caller_method,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kStatic, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 extern "C" MethodAndCode artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                 mirror::Object* this_object,
-                                                                 mirror::ArtMethod* caller_method,
-                                                                 Thread* self,
-                                                                 mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* this_object,
+    mirror::ArtMethod* caller_method,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kSuper, true>(method_idx, this_object, caller_method, self, sp);
 }
 
 extern "C" MethodAndCode artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
-                                                                   mirror::Object* this_object,
-                                                                   mirror::ArtMethod* caller_method,
-                                                                   Thread* self,
-                                                                   mirror::ArtMethod** sp)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* this_object,
+    mirror::ArtMethod* caller_method,
+    Thread* self,
+    StackReference<mirror::ArtMethod>* sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, caller_method, self, sp);
 }
 
@@ -1780,7 +1790,8 @@
 extern "C" MethodAndCode artInvokeInterfaceTrampoline(mirror::ArtMethod* interface_method,
                                                       mirror::Object* this_object,
                                                       mirror::ArtMethod* caller_method,
-                                                      Thread* self, mirror::ArtMethod** sp)
+                                                      Thread* self,
+                                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtMethod* method;
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 751cdb6..99633a3 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -192,7 +192,9 @@
     fake_stack.push_back(0);
 
     // Set up thread to appear as if we called out of method_g_ at pc dex 3
-    thread->SetTopOfStack(reinterpret_cast<mirror::ArtMethod**>(&fake_stack[0]), method_g_->ToNativePc(dex_pc));  // return pc
+    thread->SetTopOfStack(
+        reinterpret_cast<StackReference<mirror::ArtMethod>*>(&fake_stack[0]),
+        method_g_->ToNativePc(dex_pc));  // return pc
   } else {
     // Create/push fake 20-byte shadow frame for method g
     fake_stack.push_back(0);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 15c38c1..6b216c7 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -250,7 +250,8 @@
     manager_->GetMethodAndReturnPCAndSP(context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
     // Inside of generated code, sp[0] is the method, so sp is the frame.
-    mirror::ArtMethod** frame = reinterpret_cast<mirror::ArtMethod**>(sp);
+    StackReference<mirror::ArtMethod>* frame =
+        reinterpret_cast<StackReference<mirror::ArtMethod>*>(sp);
     self->SetTopOfStack(frame, 0);  // Since we don't necessarily have a dex pc, pass in 0.
     self->DumpJavaStack(LOG(ERROR));
   }
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index f2e059d..8ff7086 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -51,20 +51,12 @@
     return header_size + data_size;
   }
 
-  // Get the size of the handle scope for the number of entries, with padding added for potential alignment.
-  static size_t GetAlignedHandleScopeSize(uint32_t num_references) {
-    size_t handle_scope_size = SizeOf(num_references);
-    return RoundUp(handle_scope_size, 8);
-  }
-
-  // Get the size of the handle scope for the number of entries, with padding added for potential alignment.
-  static size_t GetAlignedHandleScopeSizeTarget(size_t pointer_size, uint32_t num_references) {
+  // Returns the size of a HandleScope containing num_references handles.
+  static size_t SizeOf(size_t pointer_size, uint32_t num_references) {
     // Assume that the layout is packed.
     size_t header_size = pointer_size + sizeof(number_of_references_);
-    // This assumes there is no layout change between 32 and 64b.
     size_t data_size = sizeof(StackReference<mirror::Object>) * num_references;
-    size_t handle_scope_size = header_size + data_size;
-    return RoundUp(handle_scope_size, 8);
+    return header_size + data_size;
   }
 
   // Link to previous HandleScope or null.
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 39efa58..5f4619b 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -296,10 +296,16 @@
     // Generic JNI frame.
     DCHECK(IsNative());
     uint32_t handle_refs = MethodHelper(this).GetNumberOfReferenceArgsWithoutReceiver() + 1;
-    size_t scope_size = HandleScope::GetAlignedHandleScopeSize(handle_refs);
+    size_t scope_size = HandleScope::SizeOf(handle_refs);
     QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
-    return QuickMethodFrameInfo(callee_info.FrameSizeInBytes() + scope_size,
-                                callee_info.CoreSpillMask(), callee_info.FpSpillMask());
+
+    // Callee saves + handle scope + method ref + alignment
+    size_t frame_size = RoundUp(callee_info.FrameSizeInBytes() + scope_size
+                                - kPointerSize  // callee-save frame stores a whole method pointer
+                                + sizeof(StackReference<mirror::ArtMethod>),
+                                kStackAlignment);
+
+    return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
   }
 
   const void* code_pointer = EntryPointToCodePointer(entry_point);
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index 822aefa..606d62d 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -31,7 +31,7 @@
     SHARED_LOCK_FUNCTION(Locks::mutator_lock_) ALWAYS_INLINE
      : ScopedObjectAccessAlreadyRunnable(env) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
-    DCHECK((*Self()->GetManagedStack()->GetTopQuickFrame())->IsFastNative());
+    DCHECK(Self()->GetManagedStack()->GetTopQuickFrame()->AsMirrorPtr()->IsFastNative());
     // Don't work with raw objects in non-runnable states.
     DCHECK_EQ(Self()->GetState(), kRunnable);
   }
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 8300195..243818c 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -42,7 +42,7 @@
   CatchBlockStackVisitor visitor(self_, context_, &exception_ref, this);
   visitor.WalkStack(true);
 
-  mirror::ArtMethod* catch_method = *handler_quick_frame_;
+  mirror::ArtMethod* catch_method = handler_quick_frame_->AsMirrorPtr();
   if (kDebugExceptionDelivery) {
     if (catch_method == nullptr) {
       LOG(INFO) << "Handler is upcall";
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index ef3766c..2597ebd 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
+#include "stack.h"  // StackReference
 
 namespace art {
 
@@ -50,7 +51,7 @@
   void UpdateInstrumentationStack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DoLongJump() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetHandlerQuickFrame(mirror::ArtMethod** handler_quick_frame) {
+  void SetHandlerQuickFrame(StackReference<mirror::ArtMethod>* handler_quick_frame) {
     handler_quick_frame_ = handler_quick_frame;
   }
 
@@ -77,7 +78,7 @@
   // Is method tracing active?
   const bool method_tracing_active_;
   // Quick frame with found handler or last frame if no handler found.
-  mirror::ArtMethod** handler_quick_frame_;
+  StackReference<mirror::ArtMethod>* handler_quick_frame_;
   // PC to branch to for the handler.
   uintptr_t handler_quick_frame_pc_;
   // Associated dex PC.
diff --git a/runtime/stack.cc b/runtime/stack.cc
index be1fba4..6633159 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -205,16 +205,16 @@
 }
 
 uintptr_t StackVisitor::GetReturnPc() const {
-  mirror::ArtMethod** sp = GetCurrentQuickFrame();
+  byte* sp = reinterpret_cast<byte*>(GetCurrentQuickFrame());
   DCHECK(sp != NULL);
-  byte* pc_addr = reinterpret_cast<byte*>(sp) + GetMethod()->GetReturnPcOffsetInBytes();
+  byte* pc_addr = sp + GetMethod()->GetReturnPcOffsetInBytes();
   return *reinterpret_cast<uintptr_t*>(pc_addr);
 }
 
 void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) {
-  mirror::ArtMethod** sp = GetCurrentQuickFrame();
+  byte* sp = reinterpret_cast<byte*>(GetCurrentQuickFrame());
   CHECK(sp != NULL);
-  byte* pc_addr = reinterpret_cast<byte*>(sp) + GetMethod()->GetReturnPcOffsetInBytes();
+  byte* pc_addr = sp + GetMethod()->GetReturnPcOffsetInBytes();
   *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc;
 }
 
@@ -307,7 +307,7 @@
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
-      mirror::ArtMethod* method = *cur_quick_frame_;
+      mirror::ArtMethod* method = cur_quick_frame_->AsMirrorPtr();
       while (method != NULL) {
         SanityCheckFrame();
         bool should_continue = VisitFrame();
@@ -352,9 +352,9 @@
         }
         cur_quick_frame_pc_ = return_pc;
         byte* next_frame = reinterpret_cast<byte*>(cur_quick_frame_) + frame_size;
-        cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
+        cur_quick_frame_ = reinterpret_cast<StackReference<mirror::ArtMethod>*>(next_frame);
         cur_depth_++;
-        method = *cur_quick_frame_;
+        method = cur_quick_frame_->AsMirrorPtr();
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 2e32f51..e93fcbc 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -429,11 +429,11 @@
     return link_;
   }
 
-  mirror::ArtMethod** GetTopQuickFrame() const {
+  StackReference<mirror::ArtMethod>* GetTopQuickFrame() const {
     return top_quick_frame_;
   }
 
-  void SetTopQuickFrame(mirror::ArtMethod** top) {
+  void SetTopQuickFrame(StackReference<mirror::ArtMethod>* top) {
     DCHECK(top_shadow_frame_ == NULL);
     top_quick_frame_ = top;
   }
@@ -491,7 +491,7 @@
  private:
   ManagedStack* link_;
   ShadowFrame* top_shadow_frame_;
-  mirror::ArtMethod** top_quick_frame_;
+  StackReference<mirror::ArtMethod>* top_quick_frame_;
   uintptr_t top_quick_frame_pc_;
 };
 
@@ -512,17 +512,7 @@
     if (cur_shadow_frame_ != nullptr) {
       return cur_shadow_frame_->GetMethod();
     } else if (cur_quick_frame_ != nullptr) {
-      return *cur_quick_frame_;
-    } else {
-      return nullptr;
-    }
-  }
-
-  mirror::ArtMethod** GetMethodAddress() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (cur_shadow_frame_ != nullptr) {
-      return cur_shadow_frame_->GetMethodAddress();
-    } else if (cur_quick_frame_ != nullptr) {
-      return cur_quick_frame_;
+      return cur_quick_frame_->AsMirrorPtr();
     } else {
       return nullptr;
     }
@@ -578,7 +568,8 @@
   void SetGPR(uint32_t reg, uintptr_t value);
 
   // This is a fast-path for getting/setting values in a quick frame.
-  uint32_t* GetVRegAddr(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
+  uint32_t* GetVRegAddr(StackReference<mirror::ArtMethod>* cur_quick_frame,
+                        const DexFile::CodeItem* code_item,
                         uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
                         uint16_t vreg) const {
     int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg, kRuntimeISA);
@@ -679,7 +670,7 @@
     return cur_quick_frame_pc_;
   }
 
-  mirror::ArtMethod** GetCurrentQuickFrame() const {
+  StackReference<mirror::ArtMethod>* GetCurrentQuickFrame() const {
     return cur_quick_frame_;
   }
 
@@ -688,7 +679,7 @@
   }
 
   HandleScope* GetCurrentHandleScope() const {
-    mirror::ArtMethod** sp = GetCurrentQuickFrame();
+    StackReference<mirror::ArtMethod>* sp = GetCurrentQuickFrame();
     ++sp;  // Skip Method*; handle scope comes next;
     return reinterpret_cast<HandleScope*>(sp);
   }
@@ -706,7 +697,7 @@
 
   Thread* const thread_;
   ShadowFrame* cur_shadow_frame_;
-  mirror::ArtMethod** cur_quick_frame_;
+  StackReference<mirror::ArtMethod>* cur_quick_frame_;
   uintptr_t cur_quick_frame_pc_;
   // Lazily computed, number of frames in the stack.
   size_t num_frames_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 1355aa1..a8135e0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2012,9 +2012,14 @@
 
  private:
   void VisitQuickFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtMethod** method_addr = GetMethodAddress();
-    visitor_(reinterpret_cast<mirror::Object**>(method_addr), 0 /*ignored*/, this);
-    mirror::ArtMethod* m = *method_addr;
+    StackReference<mirror::ArtMethod>* cur_quick_frame = GetCurrentQuickFrame();
+    mirror::ArtMethod* m = cur_quick_frame->AsMirrorPtr();
+    mirror::ArtMethod* old_method = m;
+    visitor_(reinterpret_cast<mirror::Object**>(&m), 0 /*ignored*/, this);
+    if (m != old_method) {
+      cur_quick_frame->Assign(m);
+    }
+
     // Process register map (which native and runtime methods don't have)
     if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
       const uint8_t* native_gc_map = m->GetNativeGcMap();
@@ -2035,7 +2040,7 @@
         const VmapTable vmap_table(m->GetVmapTable(code_pointer));
         QuickMethodFrameInfo frame_info = m->GetQuickFrameInfo(code_pointer);
         // For all dex registers in the bitmap
-        mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
+        StackReference<mirror::ArtMethod>* cur_quick_frame = GetCurrentQuickFrame();
         DCHECK(cur_quick_frame != nullptr);
         for (size_t reg = 0; reg < num_regs; ++reg) {
           // Does this register hold a reference?
diff --git a/runtime/thread.h b/runtime/thread.h
index 08bbcae..4601248 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -345,7 +345,7 @@
 
   ThrowLocation GetCurrentLocationForThrow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetTopOfStack(mirror::ArtMethod** top_method, uintptr_t pc) {
+  void SetTopOfStack(StackReference<mirror::ArtMethod>* top_method, uintptr_t pc) {
     tlsPtr_.managed_stack.SetTopQuickFrame(top_method);
     tlsPtr_.managed_stack.SetTopQuickFramePc(pc);
   }