Merge "Enable concurrent sweeping for non-concurrent GC."
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index a89b307..646859c 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -160,7 +160,7 @@
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
     LIR* OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    LIR* OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index f47e693..a2d6373 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -361,37 +361,40 @@
   return res;
 }
 
-LIR* ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  LIR* res = OpRegCopyNoInsert(r_dest, r_src);
-  AppendLIR(res);
-  return res;
+void ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR* res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
 }
 
 void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  bool dest_fp = ARM_FPREG(r_dest.GetLowReg());
-  bool src_fp = ARM_FPREG(r_src.GetLowReg());
-  if (dest_fp) {
-    if (src_fp) {
-      // FIXME: handle 64-bit solo's here.
-      OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
-    } else {
-      NewLIR3(kThumb2Fmdrr, S2d(r_dest.GetLowReg(), r_dest.GetHighReg()),
-              r_src.GetLowReg(), r_src.GetHighReg());
-    }
-  } else {
-    if (src_fp) {
-      NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), S2d(r_src.GetLowReg(),
-              r_src.GetHighReg()));
-    } else {
-      // Handle overlap
-      if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-        DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+  if (r_dest != r_src) {
+    bool dest_fp = ARM_FPREG(r_dest.GetLowReg());
+    bool src_fp = ARM_FPREG(r_src.GetLowReg());
+    if (dest_fp) {
+      if (src_fp) {
+        // FIXME: handle 64-bit solo's here.
+        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
+                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
       } else {
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        NewLIR3(kThumb2Fmdrr, S2d(r_dest.GetLowReg(), r_dest.GetHighReg()),
+                r_src.GetLowReg(), r_src.GetHighReg());
+      }
+    } else {
+      if (src_fp) {
+        NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), S2d(r_src.GetLowReg(),
+                r_src.GetHighReg()));
+      } else {
+        // Handle overlap
+        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+          DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        } else {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        }
       }
     }
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 677ee15..501e4e20 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -274,6 +274,19 @@
   }
 }
 
+void Mir2Lir::UpdateLIROffsets() {
+  // Only used for code listings.
+  size_t offset = 0;
+  for (LIR* lir = first_lir_insn_; lir != nullptr; lir = lir->next) {
+    lir->offset = offset;
+    if (!lir->flags.is_nop && !IsPseudoLirOp(lir->opcode)) {
+      offset += GetInsnSize(lir);
+    } else if (lir->opcode == kPseudoPseudoAlign4) {
+      offset += (offset & 0x2);
+    }
+  }
+}
+
 /* Dump instructions and constant pool contents */
 void Mir2Lir::CodegenDump() {
   LOG(INFO) << "Dumping LIR insns for "
@@ -293,6 +306,7 @@
   LOG(INFO) << "expansion factor: "
             << static_cast<float>(total_size_) / static_cast<float>(insns_size * 2);
   DumpPromotionMap();
+  UpdateLIROffsets();
   for (lir_insn = first_lir_insn_; lir_insn != NULL; lir_insn = lir_insn->next) {
     DumpLIRInsn(lir_insn, 0);
   }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index da65f34..81d6782 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -159,7 +159,7 @@
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
     LIR* OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    LIR* OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 88d5d2b..7c0becd 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -177,37 +177,40 @@
   return res;
 }
 
-LIR* MipsMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  LIR *res = OpRegCopyNoInsert(r_dest, r_src);
-  AppendLIR(res);
-  return res;
+void MipsMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
 }
 
 void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  bool dest_fp = MIPS_FPREG(r_dest.GetLowReg());
-  bool src_fp = MIPS_FPREG(r_src.GetLowReg());
-  if (dest_fp) {
-    if (src_fp) {
-      // FIXME: handle this here - reserve OpRegCopy for 32-bit copies.
-      OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
+  if (r_dest != r_src) {
+    bool dest_fp = MIPS_FPREG(r_dest.GetLowReg());
+    bool src_fp = MIPS_FPREG(r_src.GetLowReg());
+    if (dest_fp) {
+      if (src_fp) {
+        // FIXME: handle this here - reserve OpRegCopy for 32-bit copies.
+        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
+                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
+        } else {
+          /* note the operands are swapped for the mtc1 instr */
+          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+          NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+      }
     } else {
-       /* note the operands are swapped for the mtc1 instr */
-      NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-      NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
-    }
-  } else {
-    if (src_fp) {
-      NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
-      NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
-    } else {
-      // Handle overlap
-      if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+      if (src_fp) {
+        NewLIR2(kMipsMfc1, r_dest.GetLowReg(), r_src.GetLowReg());
+        NewLIR2(kMipsMfc1, r_dest.GetHighReg(), r_src.GetHighReg());
       } else {
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        // Handle overlap
+        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        } else {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        }
       }
     }
   }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6d38488..b8ab609 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -990,6 +990,9 @@
     ResetRegPool();
     if (cu_->disable_opt & (1 << kTrackLiveTemps)) {
       ClobberAllRegs();
+      // Reset temp allocation to minimize differences when A/B testing.
+      reg_pool_->next_core_reg = 0;
+      reg_pool_->next_fp_reg = 0;
     }
 
     if (cu_->disable_opt & (1 << kSuppressLoads)) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8d593ae..2b6d78b 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -811,6 +811,8 @@
     bool MethodBlockCodeGen(BasicBlock* bb);
     bool SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
+    // Update LIR for verbose listings.
+    void UpdateLIROffsets();
 
     /*
      * @brief Load the address of the dex method into the register.
@@ -1050,7 +1052,7 @@
     virtual LIR* OpMem(OpKind op, RegStorage r_base, int disp) = 0;
     virtual LIR* OpPcRelLoad(RegStorage reg, LIR* target) = 0;
     virtual LIR* OpReg(OpKind op, RegStorage r_dest_src) = 0;
-    virtual LIR* OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
+    virtual void OpRegCopy(RegStorage r_dest, RegStorage r_src) = 0;
     virtual LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) = 0;
     virtual LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value) = 0;
     virtual LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) = 0;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index fb61627..760290c 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -232,7 +232,7 @@
     LIR* OpMem(OpKind op, RegStorage r_base, int disp);
     LIR* OpPcRelLoad(RegStorage reg, LIR* target);
     LIR* OpReg(OpKind op, RegStorage r_dest_src);
-    LIR* OpRegCopy(RegStorage r_dest, RegStorage r_src);
+    void OpRegCopy(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src);
     LIR* OpRegImm(OpKind op, RegStorage r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 5ba9709..3bff497 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -116,52 +116,55 @@
   return res;
 }
 
-LIR* X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
-  LIR *res = OpRegCopyNoInsert(r_dest, r_src);
-  AppendLIR(res);
-  return res;
+void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
+  if (r_dest != r_src) {
+    LIR *res = OpRegCopyNoInsert(r_dest, r_src);
+    AppendLIR(res);
+  }
 }
 
 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
-  // FIXME: handle k64BitSolo when we start using them.
-  DCHECK(r_dest.IsPair());
-  DCHECK(r_src.IsPair());
-  bool dest_fp = X86_FPREG(r_dest.GetLowReg());
-  bool src_fp = X86_FPREG(r_src.GetLowReg());
-  if (dest_fp) {
-    if (src_fp) {
-      // TODO: we ought to handle this case here - reserve OpRegCopy for 32-bit copies.
-      OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
-                RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
-    } else {
-      // TODO: Prevent this from happening in the code. The result is often
-      // unused or could have been loaded more easily from memory.
-      NewLIR2(kX86MovdxrRR, r_dest.GetLowReg(), r_src.GetLowReg());
-      RegStorage r_tmp = AllocTempDouble();
-      NewLIR2(kX86MovdxrRR, r_tmp.GetLowReg(), r_src.GetHighReg());
-      NewLIR2(kX86PunpckldqRR, r_dest.GetLowReg(), r_tmp.GetLowReg());
-      FreeTemp(r_tmp);
-    }
-  } else {
-    if (src_fp) {
-      NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetLowReg());
-      NewLIR2(kX86PsrlqRI, r_src.GetLowReg(), 32);
-      NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetLowReg());
-    } else {
-      // Handle overlap
-      if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
-        // Deal with cycles.
-        RegStorage temp_reg = AllocTemp();
-        OpRegCopy(temp_reg, r_dest.GetHigh());
-        OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
-        OpRegCopy(r_dest.GetLow(), temp_reg);
-        FreeTemp(temp_reg);
-      } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+  if (r_dest != r_src) {
+    // FIXME: handle k64BitSolo when we start using them.
+    DCHECK(r_dest.IsPair());
+    DCHECK(r_src.IsPair());
+    bool dest_fp = X86_FPREG(r_dest.GetLowReg());
+    bool src_fp = X86_FPREG(r_src.GetLowReg());
+    if (dest_fp) {
+      if (src_fp) {
+        // TODO: we ought to handle this case here - reserve OpRegCopy for 32-bit copies.
+        OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())),
+                  RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg())));
       } else {
-        OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-        OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        // TODO: Prevent this from happening in the code. The result is often
+        // unused or could have been loaded more easily from memory.
+        NewLIR2(kX86MovdxrRR, r_dest.GetLowReg(), r_src.GetLowReg());
+        RegStorage r_tmp = AllocTempDouble();
+        NewLIR2(kX86MovdxrRR, r_tmp.GetLowReg(), r_src.GetHighReg());
+        NewLIR2(kX86PunpckldqRR, r_dest.GetLowReg(), r_tmp.GetLowReg());
+        FreeTemp(r_tmp);
+      }
+    } else {
+      if (src_fp) {
+        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetLowReg());
+        NewLIR2(kX86PsrlqRI, r_src.GetLowReg(), 32);
+        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetLowReg());
+      } else {
+        // Handle overlap
+        if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
+          // Deal with cycles.
+          RegStorage temp_reg = AllocTemp();
+          OpRegCopy(temp_reg, r_dest.GetHigh());
+          OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
+          OpRegCopy(r_dest.GetLow(), temp_reg);
+          FreeTemp(temp_reg);
+        } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        } else {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        }
       }
     }
   }
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 00bebd2..4d45055 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -426,7 +426,8 @@
         RegStorage t_reg = AllocTemp();
         OpRegCopy(t_reg, r_src1);
         OpRegReg(op, t_reg, r_src2);
-        LIR* res = OpRegCopy(r_dest, t_reg);
+        LIR* res = OpRegCopyNoInsert(r_dest, t_reg);
+        AppendLIR(res);
         FreeTemp(t_reg);
         return res;
       }
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 8769736..4be0f59 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -415,6 +415,7 @@
     loc[i] = fresh_loc;
     loc[i].s_reg_low = i;
     loc[i].is_const = is_constant_v_->IsBitSet(i);
+    loc[i].wide = false;
   }
 
   /* Patch up the locations for the compiler temps */
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index babb1f5..ff316e5 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -55,9 +55,105 @@
   }
 }
 
+size_t CodeGenerator::AllocateFreeRegisterInternal(
+    bool* blocked_registers, size_t number_of_registers) const {
+  for (size_t regno = 0; regno < number_of_registers; regno++) {
+    if (!blocked_registers[regno]) {
+      blocked_registers[regno] = true;
+      return regno;
+    }
+  }
+  LOG(FATAL) << "Unreachable";
+  return -1;
+}
+
+
+void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations == nullptr) return;
+
+  for (size_t i = 0, e = GetNumberOfRegisters(); i < e; ++i) {
+    blocked_registers_[i] = false;
+  }
+
+  // Mark all fixed input, temp and output registers as used.
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+    Location loc = locations->InAt(i);
+    if (loc.IsRegister()) {
+      // Check that a register is not specified twice in the summary.
+      DCHECK(!blocked_registers_[loc.GetEncoding()]);
+      blocked_registers_[loc.GetEncoding()] = true;
+    }
+  }
+
+  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
+    Location loc = locations->GetTemp(i);
+    if (loc.IsRegister()) {
+      // Check that a register is not specified twice in the summary.
+      DCHECK(!blocked_registers_[loc.GetEncoding()]);
+      blocked_registers_[loc.GetEncoding()] = true;
+    }
+  }
+
+  SetupBlockedRegisters(blocked_registers_);
+
+  // Allocate all unallocated input locations.
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+    Location loc = locations->InAt(i);
+    HInstruction* input = instruction->InputAt(i);
+    if (loc.IsUnallocated()) {
+      if (loc.GetPolicy() == Location::kRequiresRegister) {
+        loc = Location::RegisterLocation(
+            AllocateFreeRegister(input->GetType(), blocked_registers_));
+      } else {
+        DCHECK_EQ(loc.GetPolicy(), Location::kAny);
+        HLoadLocal* load = input->AsLoadLocal();
+        if (load != nullptr) {
+          loc = GetStackLocation(load);
+        } else {
+          loc = Location::RegisterLocation(
+              AllocateFreeRegister(input->GetType(), blocked_registers_));
+        }
+      }
+      locations->SetInAt(i, loc);
+    }
+  }
+
+  // Allocate all unallocated temp locations.
+  for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
+    Location loc = locations->GetTemp(i);
+    if (loc.IsUnallocated()) {
+      DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister);
+      // TODO: Adjust handling of temps. We currently consider temps to use
+      // core registers. They may also use floating point registers at some point.
+      loc = Location::RegisterLocation(static_cast<ManagedRegister>(
+          AllocateFreeRegister(Primitive::kPrimInt, blocked_registers_)));
+      locations->SetTempAt(i, loc);
+    }
+  }
+
+  Location result_location = locations->Out();
+  if (result_location.IsUnallocated()) {
+    switch (result_location.GetPolicy()) {
+      case Location::kAny:
+      case Location::kRequiresRegister:
+        result_location = Location::RegisterLocation(
+            AllocateFreeRegister(instruction->GetType(), blocked_registers_));
+        break;
+      case Location::kSameAsFirstInput:
+        result_location = locations->InAt(0);
+        break;
+    }
+    locations->SetOut(result_location);
+  }
+}
+
 void CodeGenerator::InitLocations(HInstruction* instruction) {
-  if (instruction->GetLocations() == nullptr) return;
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
+  if (instruction->GetLocations() == nullptr) {
+    return;
+  }
+  AllocateRegistersLocally(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
     Location location = instruction->GetLocations()->InAt(i);
     if (location.IsValid()) {
       // Move the input to the desired location.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 54f9e70..74cbccc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -62,6 +62,12 @@
     // bits are in a stack slot. The kQuickParameter kind is for
     // handling this special case.
     kQuickParameter = 4,
+
+    // Unallocated location represents a location that is not fixed and can be
+    // allocated by a register allocator.  Each unallocated location has
+    // a policy that specifies what kind of location is suitable. Payload
+    // contains register allocation policy.
+    kUnallocated = 5,
   };
 
   Location() : value_(kInvalid) {
@@ -166,10 +172,50 @@
       case kStackSlot: return "S";
       case kDoubleStackSlot: return "DS";
       case kQuickParameter: return "Q";
+      case kUnallocated: return "U";
     }
     return "?";
   }
 
+  // Unallocated locations.
+  enum Policy {
+    kAny,
+    kRequiresRegister,
+    kSameAsFirstInput,
+  };
+
+  bool IsUnallocated() const {
+    return GetKind() == kUnallocated;
+  }
+
+  static Location UnallocatedLocation(Policy policy) {
+    return Location(kUnallocated, PolicyField::Encode(policy));
+  }
+
+  // Any free register is suitable to replace this unallocated location.
+  static Location Any() {
+    return UnallocatedLocation(kAny);
+  }
+
+  static Location RequiresRegister() {
+    return UnallocatedLocation(kRequiresRegister);
+  }
+
+  // The location of the first input to the instruction will be
+  // used to replace this unallocated location.
+  static Location SameAsFirstInput() {
+    return UnallocatedLocation(kSameAsFirstInput);
+  }
+
+  Policy GetPolicy() const {
+    DCHECK(IsUnallocated());
+    return PolicyField::Decode(GetPayload());
+  }
+
+  uword GetEncoding() const {
+    return GetPayload();
+  }
+
  private:
   // Number of bits required to encode Kind value.
   static constexpr uint32_t kBitsForKind = 4;
@@ -187,6 +233,9 @@
   typedef BitField<Kind, 0, kBitsForKind> KindField;
   typedef BitField<uword, kBitsForKind, kBitsForPayload> PayloadField;
 
+  // Layout for kUnallocated locations payload.
+  typedef BitField<Policy, 0, 3> PolicyField;
+
   // Layout for stack slots.
   static const intptr_t kStackIndexBias =
       static_cast<intptr_t>(1) << (kBitsForPayload - 1);
@@ -208,40 +257,52 @@
 class LocationSummary : public ArenaObject {
  public:
   explicit LocationSummary(HInstruction* instruction)
-      : inputs(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
-        temps(instruction->GetBlock()->GetGraph()->GetArena(), 0) {
-    inputs.SetSize(instruction->InputCount());
+      : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
+        temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0) {
+    inputs_.SetSize(instruction->InputCount());
     for (size_t i = 0; i < instruction->InputCount(); i++) {
-      inputs.Put(i, Location());
+      inputs_.Put(i, Location());
     }
   }
 
   void SetInAt(uint32_t at, Location location) {
-    inputs.Put(at, location);
+    inputs_.Put(at, location);
   }
 
   Location InAt(uint32_t at) const {
-    return inputs.Get(at);
+    return inputs_.Get(at);
+  }
+
+  size_t GetInputCount() const {
+    return inputs_.Size();
   }
 
   void SetOut(Location location) {
-    output = Location(location);
+    output_ = Location(location);
   }
 
   void AddTemp(Location location) {
-    temps.Add(location);
+    temps_.Add(location);
   }
 
   Location GetTemp(uint32_t at) const {
-    return temps.Get(at);
+    return temps_.Get(at);
   }
 
-  Location Out() const { return output; }
+  void SetTempAt(uint32_t at, Location location) {
+    temps_.Put(at, location);
+  }
+
+  size_t GetTempCount() const {
+    return temps_.Size();
+  }
+
+  Location Out() const { return output_; }
 
  private:
-  GrowableArray<Location> inputs;
-  GrowableArray<Location> temps;
-  Location output;
+  GrowableArray<Location> inputs_;
+  GrowableArray<Location> temps_;
+  Location output_;
 
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
@@ -286,15 +347,33 @@
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
 
  protected:
-  explicit CodeGenerator(HGraph* graph)
+  CodeGenerator(HGraph* graph, size_t number_of_registers)
       : frame_size_(0),
         graph_(graph),
         block_labels_(graph->GetArena(), 0),
-        pc_infos_(graph->GetArena(), 32) {
+        pc_infos_(graph->GetArena(), 32),
+        blocked_registers_(static_cast<bool*>(
+            graph->GetArena()->Alloc(number_of_registers * sizeof(bool), kArenaAllocData))) {
     block_labels_.SetSize(graph->GetBlocks()->Size());
   }
   ~CodeGenerator() { }
 
+  // Register allocation logic.
+  void AllocateRegistersLocally(HInstruction* instruction) const;
+
+  // Backend specific implementation for allocating a register.
+  virtual ManagedRegister AllocateFreeRegister(Primitive::Type type,
+                                               bool* blocked_registers) const = 0;
+
+  // Raw implementation of allocating a register: loops over blocked_registers to find
+  // the first available register.
+  size_t AllocateFreeRegisterInternal(bool* blocked_registers, size_t number_of_registers) const;
+
+  virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0;
+  virtual size_t GetNumberOfRegisters() const = 0;
+
+  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -309,6 +388,9 @@
   GrowableArray<Label> block_labels_;
   GrowableArray<PcInfo> pc_infos_;
 
+  // Temporary data structure used when doing register allocation.
+  bool* const blocked_registers_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6e528f9..a446701 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -35,6 +35,81 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
+CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
+    : CodeGenerator(graph, kNumberOfRegIds),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this) {}
+
+static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
+  return blocked_registers + kNumberOfAllocIds;
+}
+
+ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type,
+                                                       bool* blocked_registers) const {
+  switch (type) {
+    case Primitive::kPrimLong: {
+      size_t reg = AllocateFreeRegisterInternal(
+          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      ArmManagedRegister pair =
+          ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
+      blocked_registers[pair.AsRegisterPairLow()] = true;
+      blocked_registers[pair.AsRegisterPairHigh()] = true;
+      return pair;
+    }
+
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCoreRegisters);
+      return ArmManagedRegister::FromCoreRegister(static_cast<Register>(reg));
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  return ManagedRegister::NoRegister();
+}
+
+void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const {
+  bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+
+  // Don't allocate the dalvik style register pair passing.
+  blocked_register_pairs[R1_R2] = true;
+
+  // Stack register, LR and PC are always reserved.
+  blocked_registers[SP] = true;
+  blocked_registers[LR] = true;
+  blocked_registers[PC] = true;
+
+  // Reserve R4 for suspend check.
+  blocked_registers[R4] = true;
+  blocked_register_pairs[R4_R5] = true;
+
+  // Reserve thread register.
+  blocked_registers[TR] = true;
+
+  // TODO: We currently don't use Quick's callee saved registers.
+  blocked_registers[R5] = true;
+  blocked_registers[R6] = true;
+  blocked_registers[R7] = true;
+  blocked_registers[R8] = true;
+  blocked_registers[R10] = true;
+  blocked_registers[R11] = true;
+  blocked_register_pairs[R6_R7] = true;
+}
+
+size_t CodeGeneratorARM::GetNumberOfRegisters() const {
+  return kNumberOfRegIds;
+}
+
 static Location ArmCoreLocation(Register reg) {
   return Location::RegisterLocation(ArmManagedRegister::FromCoreRegister(reg));
 }
@@ -85,6 +160,32 @@
   }
 }
 
+Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
+  switch (load->GetType()) {
+    case Primitive::kPrimLong:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented type " << load->GetType();
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << load->GetType();
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location();
+}
+
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -302,7 +403,7 @@
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, ArmCoreLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
   if_instr->SetLocations(locations);
 }
 
@@ -317,9 +418,9 @@
 
 void LocationsBuilderARM::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, ArmCoreLocation(R0));
-  locations->SetInAt(1, ArmCoreLocation(R1));
-  locations->SetOut(ArmCoreLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
   equal->SetLocations(locations);
 }
 
@@ -409,7 +510,8 @@
       break;
 
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      locations->SetInAt(
+          0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
       break;
 
     default:
@@ -444,7 +546,7 @@
 
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(ArmCoreLocation(R0));
+  locations->AddTemp(Location::RequiresRegister());
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
   for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -512,19 +614,11 @@
 void LocationsBuilderARM::VisitAdd(HAdd* add) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, ArmCoreLocation(R0));
-      locations->SetInAt(1, ArmCoreLocation(R1));
-      locations->SetOut(ArmCoreLocation(R0));
-      break;
-    }
-
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(
-          0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
-      locations->SetInAt(
-          1, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R2_R3)));
-      locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister());
       break;
     }
 
@@ -574,19 +668,11 @@
 void LocationsBuilderARM::VisitSub(HSub* sub) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, ArmCoreLocation(R0));
-      locations->SetInAt(1, ArmCoreLocation(R1));
-      locations->SetOut(ArmCoreLocation(R0));
-      break;
-    }
-
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(
-          0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
-      locations->SetInAt(
-          1, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R2_R3)));
-      locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister());
       break;
     }
 
@@ -649,6 +735,9 @@
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(ArmCoreLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(ArmCoreLocation(R0));
   instruction->SetLocations(locations);
 }
@@ -683,8 +772,8 @@
 
 void LocationsBuilderARM::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, ArmCoreLocation(R0));
-  locations->SetOut(ArmCoreLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
   instruction->SetLocations(locations);
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ed35f94..2405d4b 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -101,10 +101,7 @@
 
 class CodeGeneratorARM : public CodeGenerator {
  public:
-  explicit CodeGeneratorARM(HGraph* graph)
-      : CodeGenerator(graph),
-        location_builder_(graph, this),
-        instruction_visitor_(graph, this) { }
+  explicit CodeGeneratorARM(HGraph* graph);
   virtual ~CodeGeneratorARM() { }
 
   virtual void GenerateFrameEntry() OVERRIDE;
@@ -128,7 +125,13 @@
     return &assembler_;
   }
 
+  virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE;
+  virtual ManagedRegister AllocateFreeRegister(
+      Primitive::Type type, bool* blocked_registers) const OVERRIDE;
+  virtual size_t GetNumberOfRegisters() const OVERRIDE;
+
   int32_t GetStackSlot(HLocal* local) const;
+  virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
  private:
   // Helper method to move a 32bits value between two locations.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index dc10830..fbb054a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -35,6 +35,72 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph)
+    : CodeGenerator(graph, kNumberOfRegIds),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this) {}
+
+static bool* GetBlockedRegisterPairs(bool* blocked_registers) {
+  return blocked_registers + kNumberOfAllocIds;
+}
+
+ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type,
+                                                       bool* blocked_registers) const {
+  switch (type) {
+    case Primitive::kPrimLong: {
+      size_t reg = AllocateFreeRegisterInternal(
+          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      X86ManagedRegister pair =
+          X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
+      blocked_registers[pair.AsRegisterPairLow()] = true;
+      blocked_registers[pair.AsRegisterPairHigh()] = true;
+      return pair;
+    }
+
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      size_t reg = AllocateFreeRegisterInternal(blocked_registers, kNumberOfCpuRegisters);
+      return X86ManagedRegister::FromCpuRegister(static_cast<Register>(reg));
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << type;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  return ManagedRegister::NoRegister();
+}
+
+void CodeGeneratorX86::SetupBlockedRegisters(bool* blocked_registers) const {
+  bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+
+  // Don't allocate the dalvik style register pair passing.
+  blocked_register_pairs[ECX_EDX] = true;
+
+  // Stack register is always reserved.
+  blocked_registers[ESP] = true;
+
+  // TODO: We currently don't use Quick's callee saved registers.
+  blocked_registers[EBP] = true;
+  blocked_registers[ESI] = true;
+  blocked_registers[EDI] = true;
+  blocked_register_pairs[EAX_EDI] = true;
+  blocked_register_pairs[EDX_EDI] = true;
+  blocked_register_pairs[ECX_EDI] = true;
+  blocked_register_pairs[EBX_EDI] = true;
+}
+
+size_t CodeGeneratorX86::GetNumberOfRegisters() const {
+  return kNumberOfRegIds;
+}
+
 static Location X86CpuLocation(Register reg) {
   return Location::RegisterLocation(X86ManagedRegister::FromCpuRegister(reg));
 }
@@ -90,6 +156,33 @@
   }
 }
 
+
+Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
+  switch (load->GetType()) {
+    case Primitive::kPrimLong:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+      break;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented type " << load->GetType();
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << load->GetType();
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location();
+}
+
 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
@@ -311,13 +404,18 @@
 
 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  locations->SetInAt(0, X86CpuLocation(EAX));
+  locations->SetInAt(0, Location::Any());
   if_instr->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
   // TODO: Generate the input as a condition, instead of materializing in a register.
-  __ cmpl(if_instr->GetLocations()->InAt(0).AsX86().AsCpuRegister(), Immediate(0));
+  Location location = if_instr->GetLocations()->InAt(0);
+  if (location.IsRegister()) {
+    __ cmpl(location.AsX86().AsCpuRegister(), Immediate(0));
+  } else {
+    __ cmpl(Address(ESP, location.GetStackIndex()), Immediate(0));
+  }
   __ j(kEqual, codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
     __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
@@ -367,16 +465,22 @@
 
 void LocationsBuilderX86::VisitEqual(HEqual* equal) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(equal);
-  locations->SetInAt(0, X86CpuLocation(EAX));
-  locations->SetInAt(1, X86CpuLocation(ECX));
-  locations->SetOut(X86CpuLocation(EAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  locations->SetOut(Location::SameAsFirstInput());
   equal->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* equal) {
-  __ cmpl(equal->GetLocations()->InAt(0).AsX86().AsCpuRegister(),
-          equal->GetLocations()->InAt(1).AsX86().AsCpuRegister());
-  __ setb(kEqual, equal->GetLocations()->Out().AsX86().AsCpuRegister());
+  LocationSummary* locations = equal->GetLocations();
+  if (locations->InAt(1).IsRegister()) {
+    __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
+            locations->InAt(1).AsX86().AsCpuRegister());
+  } else {
+    __ cmpl(locations->InAt(0).AsX86().AsCpuRegister(),
+            Address(ESP, locations->InAt(1).GetStackIndex()));
+  }
+  __ setb(kEqual, locations->Out().AsX86().AsCpuRegister());
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -453,7 +557,7 @@
 
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  locations->AddTemp(X86CpuLocation(EAX));
+  locations->AddTemp(Location::RequiresRegister());
 
   InvokeDexCallingConventionVisitor calling_convention_visitor;
   for (size_t i = 0; i < invoke->InputCount(); i++) {
@@ -514,18 +618,11 @@
 void LocationsBuilderX86::VisitAdd(HAdd* add) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(add);
   switch (add->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, X86CpuLocation(EAX));
-      locations->SetInAt(1, X86CpuLocation(ECX));
-      locations->SetOut(X86CpuLocation(EAX));
-      break;
-    }
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(
-          0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
-      locations->SetInAt(
-          1, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(ECX_EBX)));
-      locations->SetOut(Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -548,18 +645,30 @@
     case Primitive::kPrimInt: {
       DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
                 locations->Out().AsX86().AsCpuRegister());
-      __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
-              locations->InAt(1).AsX86().AsCpuRegister());
+      if (locations->InAt(1).IsRegister()) {
+        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
+                locations->InAt(1).AsX86().AsCpuRegister());
+      } else {
+        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
+                Address(ESP, locations->InAt(1).GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimLong: {
       DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
                 locations->Out().AsX86().AsRegisterPair());
-      __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-              locations->InAt(1).AsX86().AsRegisterPairLow());
-      __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-              locations->InAt(1).AsX86().AsRegisterPairHigh());
+      if (locations->InAt(1).IsRegister()) {
+        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+                locations->InAt(1).AsX86().AsRegisterPairLow());
+        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+                locations->InAt(1).AsX86().AsRegisterPairHigh());
+      } else {
+        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+                Address(ESP, locations->InAt(1).GetStackIndex()));
+        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+                Address(ESP, locations->InAt(1).GetHighStackIndex(kX86WordSize)));
+      }
       break;
     }
 
@@ -578,19 +687,11 @@
 void LocationsBuilderX86::VisitSub(HSub* sub) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(sub);
   switch (sub->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, X86CpuLocation(EAX));
-      locations->SetInAt(1, X86CpuLocation(ECX));
-      locations->SetOut(X86CpuLocation(EAX));
-      break;
-    }
-
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(
-          0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
-      locations->SetInAt(
-          1, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(ECX_EBX)));
-      locations->SetOut(Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
     }
 
@@ -613,18 +714,30 @@
     case Primitive::kPrimInt: {
       DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
                 locations->Out().AsX86().AsCpuRegister());
-      __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
-              locations->InAt(1).AsX86().AsCpuRegister());
+      if (locations->InAt(1).IsRegister()) {
+        __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
+                locations->InAt(1).AsX86().AsCpuRegister());
+      } else {
+        __ subl(locations->InAt(0).AsX86().AsCpuRegister(),
+                Address(ESP, locations->InAt(1).GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimLong: {
       DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
                 locations->Out().AsX86().AsRegisterPair());
-      __ subl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-              locations->InAt(1).AsX86().AsRegisterPairLow());
-      __ sbbl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-              locations->InAt(1).AsX86().AsRegisterPairHigh());
+      if (locations->InAt(1).IsRegister()) {
+        __ subl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+                locations->InAt(1).AsX86().AsRegisterPairLow());
+        __ sbbl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+                locations->InAt(1).AsX86().AsRegisterPairHigh());
+      } else {
+        __ subl(locations->InAt(0).AsX86().AsRegisterPairLow(),
+                Address(ESP, locations->InAt(1).GetStackIndex()));
+        __ sbbl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
+                Address(ESP, locations->InAt(1).GetHighStackIndex(kX86WordSize)));
+      }
       break;
     }
 
@@ -643,14 +756,16 @@
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetOut(X86CpuLocation(EAX));
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(X86CpuLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(X86CpuLocation(calling_convention.GetRegisterAt(1)));
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   LoadCurrentMethod(calling_convention.GetRegisterAt(1));
-  __ movl(calling_convention.GetRegisterAt(0),
-          Immediate(instruction->GetTypeIndex()));
+  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
 
   __ fs()->call(
       Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocObjectWithAccessCheck)));
@@ -676,15 +791,16 @@
 
 void LocationsBuilderX86::VisitNot(HNot* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, X86CpuLocation(EAX));
-  locations->SetOut(X86CpuLocation(EAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
   instruction->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(), locations->Out().AsX86().AsCpuRegister());
-  __ xorl(locations->Out().AsX86().AsCpuRegister(), Immediate(1));
+  Location out = locations->Out();
+  DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(), out.AsX86().AsCpuRegister());
+  __ xorl(out.AsX86().AsCpuRegister(), Immediate(1));
 }
 
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f22890e..1ee11bf 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -102,10 +102,7 @@
 
 class CodeGeneratorX86 : public CodeGenerator {
  public:
-  explicit CodeGeneratorX86(HGraph* graph)
-      : CodeGenerator(graph),
-        location_builder_(graph, this),
-        instruction_visitor_(graph, this) { }
+  explicit CodeGeneratorX86(HGraph* graph);
   virtual ~CodeGeneratorX86() { }
 
   virtual void GenerateFrameEntry() OVERRIDE;
@@ -129,7 +126,13 @@
     return &assembler_;
   }
 
+  virtual size_t GetNumberOfRegisters() const OVERRIDE;
+  virtual void SetupBlockedRegisters(bool* blocked_registers) const OVERRIDE;
+  virtual ManagedRegister AllocateFreeRegister(
+      Primitive::Type type, bool* blocked_registers) const OVERRIDE;
+
   int32_t GetStackSlot(HLocal* local) const;
+  virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
  private:
   // Helper method to move a 32bits value between two locations.
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index b33a62e..993492d 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -49,9 +49,14 @@
           : idx_(0),
             g_list_(g_list) {}
 
+        explicit Iterator()
+          : idx_(0),
+            g_list_(nullptr) {}
+
         // NOTE: returns 0/NULL when no next.
         // TODO: redo to make usage consistent with other iterators.
         T Next() {
+          DCHECK(g_list_ != nullptr);
           if (idx_ >= g_list_->Size()) {
             return 0;
           } else {
@@ -63,6 +68,15 @@
           idx_ = 0;
         }
 
+        void Reset(GrowableArray* g_list) {
+          idx_ = 0;
+          g_list_ = g_list;
+        }
+
+        size_t GetIndex() const {
+          return idx_;
+        }
+
       private:
         size_t idx_;
         GrowableArray* const g_list_;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 71f5bf7..2083051 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -158,7 +158,42 @@
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
-    brk 0
+    // FP callee saves
+    ldp d8, d9,   [sp, #8]
+    ldp d10, d11, [sp, #24]
+    ldp d12, d13, [sp, #40]
+    ldp d14, d15, [sp, #56]
+
+    // Callee saved.
+    ldp xSELF, x19, [sp, #72]
+    .cfi_restore x18
+    .cfi_restore x19
+
+    ldp x20, x21, [sp, #88]
+    .cfi_restore x20
+    .cfi_restore x21
+
+    ldp x22, x23, [sp, #104]
+    .cfi_restore x22
+    .cfi_restore x23
+
+    ldp x24, x25, [sp, #120]
+    .cfi_restore x24
+    .cfi_restore x25
+
+    ldp x26, x27, [sp, #136]
+    .cfi_restore x26
+    .cfi_restore x27
+
+    ldp x28, xFP, [sp, #152]    // Save FP.
+    .cfi_restore x28
+    .cfi_restore x29
+
+    ldr xLR, [sp, #168]
+    .cfi_restore x30
+
+    add sp, sp, #176
+    .cfi_adjust_cfa_offset -176
 .endm
 
 .macro RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -359,11 +394,15 @@
 .endm
 
 .macro RETURN_IF_RESULT_IS_ZERO
-    brk 0
+    cbnz x0, 1f                // result non-zero branch over
+    ret                        // return
+1:
 .endm
 
 .macro RETURN_IF_RESULT_IS_NON_ZERO
-    brk 0
+    cbz x0, 1f                 // result zero branch over
+    ret                        // return
+1:
 .endm
 
     /*
@@ -1008,18 +1047,32 @@
 UNIMPLEMENTED art_quick_resolve_string
 
 // Macro to facilitate adding new allocation entrypoints.
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro TWO_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    brk 0
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x2, xSELF                  // pass Thread::Current
+    mov    x3, sp                     // pass SP
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+    DELIVER_PENDING_EXCEPTION
 END \name
 .endm
 
 // Macro to facilitate adding new array allocation entrypoints.
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
 .macro THREE_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
-    brk 0
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+    DELIVER_PENDING_EXCEPTION
 END \name
 .endm
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 543e695..20dc53b 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -40,6 +40,14 @@
     }
   }
 
+  void SetUpRuntimeOptions(Runtime::Options *options) OVERRIDE {
+    // Use a smaller heap
+    for (std::pair<std::string, const void*>& pair : *options) {
+      if (pair.first.find("-Xmx") == 0) {
+        pair.first = "-Xmx4M";  // Smallest we can go.
+      }
+    }
+  }
 
   size_t Invoke3(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self) {
     // Push a transition back into managed code onto the linked list in thread.
@@ -62,8 +70,10 @@
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
     __asm__ __volatile__(
-        "push {r1-r2,r9, lr}\n\t"   // Save the link and thread register
-        ".cfi_adjust_cfa_offset 16\n\t"
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "sub sp, sp, #8\n\t"        // +8B, so 16B aligned with nullptr
+        ".cfi_adjust_cfa_offset 8\n\t"
         "mov r0, %[arg0]\n\t"       // Set arg0-arg2
         "mov r1, %[arg1]\n\t"       // TODO: Any way to use constraints like on x86?
         "mov r2, %[arg2]\n\t"
@@ -73,10 +83,10 @@
         ".cfi_adjust_cfa_offset 4\n\t"
         "mov r9, %[self]\n\t"       // Set the thread
         "blx %[code]\n\t"           // Call the stub
-        "pop {r1}\n\t"              // Pop nullptr
-        ".cfi_adjust_cfa_offset -4\n\t"
-        "pop {r1-r2,r9, lr}\n\t"    // Restore the link and thread register
-        ".cfi_adjust_cfa_offset -16\n\t"
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
@@ -85,6 +95,7 @@
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
         "stp xzr, x1, [sp]\n\t"        // nullptr(end of quick stack), x1
         "stp x2, x18, [sp, #16]\n\t"   // Save x2, x18(xSELF)
         "str x30, [sp, #32]\n\t"       // Save xLR
@@ -97,6 +108,7 @@
         "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
         "ldp x18, x30, [sp, #24]\n\t"  // Restore xSELF, xLR
         "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
         "mov %[result], x0\n\t"        // Save the result
         : [result] "=r" (result)
           // Use the result from r0
@@ -108,15 +120,16 @@
     __asm__ __volatile__(
         "pushq $0\n\t"                 // Push nullptr to terminate quick stack
         "pushq $0\n\t"                 // 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
         "call *%%rax\n\t"              // Call the stub
         "addq $16, %%rsp"              // Pop nullptr and padding
+        // ".cfi_adjust_cfa_offset -16\n\t"
         : "=a" (result)
           // Use the result from rax
         : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
           // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
         : "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
     // TODO: Should we clobber the other registers?
-    result = 0;
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
     result = 0;
@@ -266,6 +279,8 @@
 #endif
 
 TEST_F(StubTest, APutObj) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
 #if defined(__i386__) || defined(__arm__)
   Thread* self = Thread::Current();
   // Create an object
@@ -354,4 +369,230 @@
 #endif
 }
 
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_alloc_object_rosalloc(void);
+extern "C" void art_quick_alloc_object_resolved_rosalloc(void);
+extern "C" void art_quick_alloc_object_initialized_rosalloc(void);
+#endif
+
+TEST_F(StubTest, AllocObject) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  // TODO: Check the "Unresolved" allocation stubs
+
+  Thread* self = Thread::Current();
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
+                                                                      "Ljava/lang/Object;"));
+
+  // Play with it...
+
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  {
+    // Use an arbitrary method from c to use as referrer
+    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+                            reinterpret_cast<size_t>(c->GetVirtualMethod(0)),  // arbitrary
+                            0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_rosalloc),
+                            self);
+
+    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
+    EXPECT_EQ(c.get(), obj->GetClass());
+    VerifyObject(obj);
+  }
+
+  {
+    // We can use nullptr in the second argument as we do not need a method here (not used in
+    // resolved/initialized cases)
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_resolved_rosalloc),
+                            self);
+
+    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
+    EXPECT_EQ(c.get(), obj->GetClass());
+    VerifyObject(obj);
+  }
+
+  {
+    // We can use nullptr in the second argument as we do not need a method here (not used in
+    // resolved/initialized cases)
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+                            self);
+
+    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
+    EXPECT_EQ(c.get(), obj->GetClass());
+    VerifyObject(obj);
+  }
+
+  // Failure tests.
+
+  // Out-of-memory.
+  {
+    Runtime::Current()->GetHeap()->SetIdealFootprint(1 * GB);
+
+    // Array helps to fill memory faster.
+    SirtRef<mirror::Class> ca(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
+                                                                         "[Ljava/lang/Object;"));
+    std::vector<SirtRef<mirror::Object>*> sirt_refs;
+    // Start allocating with 128K
+    size_t length = 128 * KB / 4;
+    while (length > 10) {
+      SirtRef<mirror::Object>* ref = new SirtRef<mirror::Object>(soa.Self(),
+                                              mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(),
+                                                                                         ca.get(),
+                                                                                         length/4));
+      if (self->IsExceptionPending() || ref->get() == nullptr) {
+        self->ClearException();
+        delete ref;
+
+        // Try a smaller length
+        length = length / 8;
+        // Use at most half the reported free space.
+        size_t mem = Runtime::Current()->GetHeap()->GetFreeMemory();
+        if (length * 8 > mem) {
+          length = mem / 8;
+        }
+      } else {
+        sirt_refs.push_back(ref);
+      }
+    }
+    LOG(DEBUG) << "Used " << sirt_refs.size() << " arrays to fill space.";
+
+    // Allocate simple objects till it fails.
+    while (!self->IsExceptionPending()) {
+      SirtRef<mirror::Object>* ref = new SirtRef<mirror::Object>(soa.Self(),
+                                                                 c->AllocObject(soa.Self()));
+      if (!self->IsExceptionPending() && ref->get() != nullptr) {
+        sirt_refs.push_back(ref);
+      } else {
+        delete ref;
+      }
+    }
+    self->ClearException();
+
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_object_initialized_rosalloc),
+                            self);
+
+    EXPECT_TRUE(self->IsExceptionPending());
+    self->ClearException();
+    EXPECT_EQ(reinterpret_cast<size_t>(nullptr), result);
+
+    // Release all the allocated objects.
+    // Need to go backward to release SirtRef in the right order.
+    auto it = sirt_refs.rbegin();
+    auto end = sirt_refs.rend();
+    for (; it != end; ++it) {
+      delete *it;
+    }
+  }
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping alloc_object as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping alloc_object as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_alloc_array_rosalloc(void);
+extern "C" void art_quick_alloc_array_resolved_rosalloc(void);
+#endif
+
+TEST_F(StubTest, AllocObjectArray) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  // TODO: Check the "Unresolved" allocation stubs
+
+  Thread* self = Thread::Current();
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  SirtRef<mirror::Class> c(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
+                                                                        "[Ljava/lang/Object;"));
+
+  // Needed to have a linked method.
+  SirtRef<mirror::Class> c_obj(soa.Self(), class_linker_->FindSystemClass(soa.Self(),
+                                                                          "Ljava/lang/Object;"));
+
+  // Play with it...
+
+  EXPECT_FALSE(self->IsExceptionPending());
+/*
+ * For some reason this does not work, as the type_idx is artificial and outside what the
+ * resolved types of c_obj allow...
+ *
+  {
+    // Use an arbitrary method from c to use as referrer
+    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
+                            reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
+                            10U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_rosalloc),
+                            self);
+
+    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+    mirror::Array* obj = reinterpret_cast<mirror::Array*>(result);
+    EXPECT_EQ(c.get(), obj->GetClass());
+    VerifyObject(obj);
+    EXPECT_EQ(obj->GetLength(), 10);
+  }
+*/
+  {
+    // We can use nullptr in the second argument as we do not need a method here (not used in
+    // resolved/initialized cases)
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr), 10U,
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+                            self);
+
+    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
+    EXPECT_TRUE(obj->IsArrayInstance());
+    EXPECT_TRUE(obj->IsObjectArray());
+    EXPECT_EQ(c.get(), obj->GetClass());
+    VerifyObject(obj);
+    mirror::Array* array = reinterpret_cast<mirror::Array*>(result);
+    EXPECT_EQ(array->GetLength(), 10);
+  }
+
+  // Failure tests.
+
+  // Out-of-memory.
+  {
+    size_t result = Invoke3(reinterpret_cast<size_t>(c.get()), reinterpret_cast<size_t>(nullptr),
+                            GB,  // that should fail...
+                            reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
+                            self);
+
+    EXPECT_TRUE(self->IsExceptionPending());
+    self->ClearException();
+    EXPECT_EQ(reinterpret_cast<size_t>(nullptr), result);
+  }
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping alloc_array as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping alloc_array as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index a31ea58..bc9907b 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -52,7 +52,6 @@
      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
      */
 MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
-    UNTESTED
     // R10 := Runtime::Current()
     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     movq (%r10), %r10
@@ -78,7 +77,6 @@
 END_MACRO
 
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
-    UNTESTED
     addq MACRO_LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
     // TODO: optimize by not restoring callee-saves restored by the ABI
@@ -506,7 +504,6 @@
 
 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
     movq %rsp, %rcx                    // pass SP
@@ -519,19 +516,17 @@
 
 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    UNTESTED
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
     // Outgoing argument set up
     movq %rsp, %r8                     // pass SP
     movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
     call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
     CALL_MACRO(return_macro, 2)        // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
 
 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
-    UNTESTED
     testq %rax, %rax               // rax == 0 ?
     jz  1f                         // if rax == 0 goto 1
     ret                            // return
@@ -540,7 +535,6 @@
 END_MACRO
 
 MACRO0(RETURN_IF_EAX_ZERO)
-    UNTESTED
     testl %eax, %eax               // eax == 0 ?
     jnz  1f                        // if eax != 0 goto 1
     ret                            // return
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 9012f00..07d3a2a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -4215,6 +4215,13 @@
   DISALLOW_COPY_AND_ASSIGN(StringTable);
 };
 
+static const char* GetMethodSourceFile(MethodHelper* mh)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(mh != nullptr);
+  const char* source_file = mh->GetDeclaringClassSourceFile();
+  return (source_file != nullptr) ? source_file : "";
+}
+
 /*
  * The data we send to DDMS contains everything we have recorded.
  *
@@ -4287,7 +4294,7 @@
           mh.ChangeMethod(m);
           class_names.Add(mh.GetDeclaringClassDescriptor());
           method_names.Add(mh.GetName());
-          filenames.Add(mh.GetDeclaringClassSourceFile());
+          filenames.Add(GetMethodSourceFile(&mh));
         }
       }
 
@@ -4349,7 +4356,7 @@
         mh.ChangeMethod(record->stack[stack_frame].method);
         size_t class_name_index = class_names.IndexOf(mh.GetDeclaringClassDescriptor());
         size_t method_name_index = method_names.IndexOf(mh.GetName());
-        size_t file_name_index = filenames.IndexOf(mh.GetDeclaringClassSourceFile());
+        size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(&mh));
         JDWP::Append2BE(bytes, class_name_index);
         JDWP::Append2BE(bytes, method_name_index);
         JDWP::Append2BE(bytes, file_name_index);
diff --git a/runtime/deoptimize_stack_visitor.cc b/runtime/deoptimize_stack_visitor.cc
index f2eaf00..3eb1792 100644
--- a/runtime/deoptimize_stack_visitor.cc
+++ b/runtime/deoptimize_stack_visitor.cc
@@ -78,7 +78,7 @@
   if (prev_shadow_frame_ != nullptr) {
     prev_shadow_frame_->SetLink(new_frame);
   } else {
-    exception_handler_->SetTopShadowFrame(new_frame);
+    self_->SetDeoptimizationShadowFrame(new_frame);
   }
   prev_shadow_frame_ = new_frame;
   return true;
diff --git a/runtime/deoptimize_stack_visitor.h b/runtime/deoptimize_stack_visitor.h
index c898e7d..c41b803 100644
--- a/runtime/deoptimize_stack_visitor.h
+++ b/runtime/deoptimize_stack_visitor.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "stack.h"
+#include "thread.h"
 
 namespace art {
 
@@ -35,6 +36,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(self, context), self_(self), exception_handler_(exception_handler),
         prev_shadow_frame_(nullptr) {
+    CHECK(!self_->HasDeoptimizationShadowFrame());
   }
 
   bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/entrypoints/portable/portable_thread_entrypoints.cc b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
index 4f19964..9e62e0e 100644
--- a/runtime/entrypoints/portable/portable_thread_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_thread_entrypoints.cc
@@ -78,7 +78,7 @@
     visitor.WalkStack(true);
     self->SetDeoptimizationShadowFrame(visitor.GetShadowFrameCopy());
     self->SetDeoptimizationReturnValue(JValue());
-    self->SetException(ThrowLocation(), reinterpret_cast<mirror::Throwable*>(-1));
+    self->SetException(ThrowLocation(), Thread::GetDeoptimizationException());
   }
 }
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index d5844b6..a91fdf1 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -28,7 +28,7 @@
     method_tracing_active_(is_deoptimization ||
                            Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
     handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_dex_pc_(0),
-    clear_exception_(false), top_shadow_frame_(nullptr), handler_frame_id_(kInvalidFrameId) {
+    clear_exception_(false), handler_frame_id_(kInvalidFrameId) {
 }
 
 void QuickExceptionHandler::FindCatch(const ThrowLocation& throw_location,
@@ -125,10 +125,6 @@
 }
 
 void QuickExceptionHandler::DoLongJump() {
-  if (is_deoptimization_) {
-    // TODO: proper return value.
-    self_->SetDeoptimizationShadowFrame(top_shadow_frame_);
-  }
   // Place context back on thread so it will be available when we continue.
   self_->ReleaseLongJumpContext(context_);
   context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_));
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index d06ce7c..ef3766c 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -66,10 +66,6 @@
     clear_exception_ = clear_exception;
   }
 
-  void SetTopShadowFrame(ShadowFrame* top_shadow_frame) {
-    top_shadow_frame_ = top_shadow_frame;
-  }
-
   void SetHandlerFrameId(size_t frame_id) {
     handler_frame_id_ = frame_id;
   }
@@ -88,8 +84,6 @@
   uint32_t handler_dex_pc_;
   // Should the exception be cleared as the catch block has no move-exception?
   bool clear_exception_;
-  // Deoptimization top shadow frame.
-  ShadowFrame* top_shadow_frame_;
   // Frame id of the catch handler or the upcall.
   size_t handler_frame_id_;
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 771680b..0fafbfa 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1932,92 +1932,102 @@
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (false) {
       LOG(INFO) << "Visiting stack roots in " << PrettyMethod(GetMethod())
-          << StringPrintf("@ PC:%04x", GetDexPc());
+                << StringPrintf("@ PC:%04x", GetDexPc());
     }
     ShadowFrame* shadow_frame = GetCurrentShadowFrame();
     if (shadow_frame != nullptr) {
-      mirror::ArtMethod* m = shadow_frame->GetMethod();
-      size_t num_regs = shadow_frame->NumberOfVRegs();
-      if (m->IsNative() || shadow_frame->HasReferenceArray()) {
-        // SIRT for JNI or References for interpreter.
-        for (size_t reg = 0; reg < num_regs; ++reg) {
+      VisitShadowFrame(shadow_frame);
+    } else {
+      VisitQuickFrame();
+    }
+    return true;
+  }
+
+  void VisitShadowFrame(ShadowFrame* shadow_frame) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = shadow_frame->GetMethod();
+    size_t num_regs = shadow_frame->NumberOfVRegs();
+    if (m->IsNative() || shadow_frame->HasReferenceArray()) {
+      // SIRT for JNI or References for interpreter.
+      for (size_t reg = 0; reg < num_regs; ++reg) {
+        mirror::Object* ref = shadow_frame->GetVRegReference(reg);
+        if (ref != nullptr) {
+          mirror::Object* new_ref = ref;
+          visitor_(&new_ref, reg, this);
+          if (new_ref != ref) {
+            shadow_frame->SetVRegReference(reg, new_ref);
+          }
+        }
+      }
+    } else {
+      // Java method.
+      // Portable path use DexGcMap and store in Method.native_gc_map_.
+      const uint8_t* gc_map = m->GetNativeGcMap();
+      CHECK(gc_map != nullptr) << PrettyMethod(m);
+      verifier::DexPcToReferenceMap dex_gc_map(gc_map);
+      uint32_t dex_pc = shadow_frame->GetDexPC();
+      const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
+      DCHECK(reg_bitmap != nullptr);
+      num_regs = std::min(dex_gc_map.RegWidth() * 8, num_regs);
+      for (size_t reg = 0; reg < num_regs; ++reg) {
+        if (TestBitmap(reg, reg_bitmap)) {
           mirror::Object* ref = shadow_frame->GetVRegReference(reg);
           if (ref != nullptr) {
             mirror::Object* new_ref = ref;
             visitor_(&new_ref, reg, this);
             if (new_ref != ref) {
-             shadow_frame->SetVRegReference(reg, new_ref);
-            }
-          }
-        }
-      } else {
-        // Java method.
-        // Portable path use DexGcMap and store in Method.native_gc_map_.
-        const uint8_t* gc_map = m->GetNativeGcMap();
-        CHECK(gc_map != nullptr) << PrettyMethod(m);
-        verifier::DexPcToReferenceMap dex_gc_map(gc_map);
-        uint32_t dex_pc = GetDexPc();
-        const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
-        DCHECK(reg_bitmap != nullptr);
-        num_regs = std::min(dex_gc_map.RegWidth() * 8, num_regs);
-        for (size_t reg = 0; reg < num_regs; ++reg) {
-          if (TestBitmap(reg, reg_bitmap)) {
-            mirror::Object* ref = shadow_frame->GetVRegReference(reg);
-            if (ref != nullptr) {
-              mirror::Object* new_ref = ref;
-              visitor_(&new_ref, reg, this);
-              if (new_ref != ref) {
-               shadow_frame->SetVRegReference(reg, new_ref);
-              }
+              shadow_frame->SetVRegReference(reg, new_ref);
             }
           }
         }
       }
-    } else {
-      mirror::ArtMethod* m = GetMethod();
-      // Process register map (which native and runtime methods don't have)
-      if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
-        const uint8_t* native_gc_map = m->GetNativeGcMap();
-        CHECK(native_gc_map != nullptr) << PrettyMethod(m);
-        mh_.ChangeMethod(m);
-        const DexFile::CodeItem* code_item = mh_.GetCodeItem();
-        DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be nullptr or how would we compile its instructions?
-        NativePcOffsetToReferenceMap map(native_gc_map);
-        size_t num_regs = std::min(map.RegWidth() * 8,
-                                   static_cast<size_t>(code_item->registers_size_));
-        if (num_regs > 0) {
-          const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
-          DCHECK(reg_bitmap != nullptr);
-          const VmapTable vmap_table(m->GetVmapTable());
-          uint32_t core_spills = m->GetCoreSpillMask();
-          uint32_t fp_spills = m->GetFpSpillMask();
-          size_t frame_size = m->GetFrameSizeInBytes();
-          // For all dex registers in the bitmap
-          mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
-          DCHECK(cur_quick_frame != nullptr);
-          for (size_t reg = 0; reg < num_regs; ++reg) {
-            // Does this register hold a reference?
-            if (TestBitmap(reg, reg_bitmap)) {
-              uint32_t vmap_offset;
-              if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
-                int vmap_reg = vmap_table.ComputeRegister(core_spills, vmap_offset, kReferenceVReg);
-                // This is sound as spilled GPRs will be word sized (ie 32 or 64bit).
-                mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(vmap_reg));
-                if (*ref_addr != nullptr) {
-                  visitor_(ref_addr, reg, this);
-                }
-              } else {
-                StackReference<mirror::Object>* ref_addr =
-                    reinterpret_cast<StackReference<mirror::Object>*>(
-                        GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size,
-                                    reg));
-                mirror::Object* ref = ref_addr->AsMirrorPtr();
-                if (ref != nullptr) {
-                  mirror::Object* new_ref = ref;
-                  visitor_(&new_ref, reg, this);
-                  if (ref != new_ref) {
-                    ref_addr->Assign(new_ref);
-                  }
+    }
+  }
+
+ private:
+  void VisitQuickFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::ArtMethod* m = GetMethod();
+    // Process register map (which native and runtime methods don't have)
+    if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
+      const uint8_t* native_gc_map = m->GetNativeGcMap();
+      CHECK(native_gc_map != nullptr) << PrettyMethod(m);
+      mh_.ChangeMethod(m);
+      const DexFile::CodeItem* code_item = mh_.GetCodeItem();
+      DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be nullptr or how would we compile its instructions?
+      NativePcOffsetToReferenceMap map(native_gc_map);
+      size_t num_regs = std::min(map.RegWidth() * 8,
+                                 static_cast<size_t>(code_item->registers_size_));
+      if (num_regs > 0) {
+        const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
+        DCHECK(reg_bitmap != nullptr);
+        const VmapTable vmap_table(m->GetVmapTable());
+        uint32_t core_spills = m->GetCoreSpillMask();
+        uint32_t fp_spills = m->GetFpSpillMask();
+        size_t frame_size = m->GetFrameSizeInBytes();
+        // For all dex registers in the bitmap
+        mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
+        DCHECK(cur_quick_frame != nullptr);
+        for (size_t reg = 0; reg < num_regs; ++reg) {
+          // Does this register hold a reference?
+          if (TestBitmap(reg, reg_bitmap)) {
+            uint32_t vmap_offset;
+            if (vmap_table.IsInContext(reg, kReferenceVReg, &vmap_offset)) {
+              int vmap_reg = vmap_table.ComputeRegister(core_spills, vmap_offset, kReferenceVReg);
+              // This is sound as spilled GPRs will be word sized (ie 32 or 64bit).
+              mirror::Object** ref_addr = reinterpret_cast<mirror::Object**>(GetGPRAddress(vmap_reg));
+              if (*ref_addr != nullptr) {
+                visitor_(ref_addr, reg, this);
+              }
+            } else {
+              StackReference<mirror::Object>* ref_addr =
+                  reinterpret_cast<StackReference<mirror::Object>*>(
+                      GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size,
+                                  reg));
+              mirror::Object* ref = ref_addr->AsMirrorPtr();
+              if (ref != nullptr) {
+                mirror::Object* new_ref = ref;
+                visitor_(&new_ref, reg, this);
+                if (ref != new_ref) {
+                  ref_addr->Assign(new_ref);
                 }
               }
             }
@@ -2025,10 +2035,8 @@
         }
       }
     }
-    return true;
   }
 
- private:
   static bool TestBitmap(size_t reg, const uint8_t* reg_vector) {
     return ((reg_vector[reg / kBitsPerByte] >> (reg % kBitsPerByte)) & 0x01) != 0;
   }
@@ -2085,6 +2093,14 @@
   if (tlsPtr_.single_step_control != nullptr) {
     tlsPtr_.single_step_control->VisitRoots(visitor, arg, thread_id, kRootDebugger);
   }
+  if (tlsPtr_.deoptimization_shadow_frame != nullptr) {
+    RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
+    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitorToCallback);
+    for (ShadowFrame* shadow_frame = tlsPtr_.deoptimization_shadow_frame; shadow_frame != nullptr;
+        shadow_frame = shadow_frame->GetLink()) {
+      mapper.VisitShadowFrame(shadow_frame);
+    }
+  }
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
   RootCallbackVisitor visitorToCallback(visitor, arg, thread_id);
diff --git a/runtime/thread.h b/runtime/thread.h
index e5e4cae..f869285 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -696,6 +696,10 @@
 
   ShadowFrame* GetAndClearDeoptimizationShadowFrame(JValue* ret_val);
 
+  bool HasDeoptimizationShadowFrame() const {
+    return tlsPtr_.deoptimization_shadow_frame != nullptr;
+  }
+
   std::deque<instrumentation::InstrumentationStackFrame>* GetInstrumentationStack() {
     return tlsPtr_.instrumentation_stack;
   }