Support longs in the register allocator for x86_64.

Change-Id: I7fb6dfb761bc5cf9e5705682032855a0a70ca867
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c3a322c..cc995f7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -226,7 +226,7 @@
 }
 
 template<typename T>
-void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) {
+void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
   current_block_->AddInstruction(new (arena_) T(type, first, second));
@@ -501,22 +501,22 @@
     }
 
     case Instruction::ADD_INT: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimInt);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::ADD_LONG: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimLong);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimInt);
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::SUB_LONG: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimLong);
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
@@ -573,6 +573,11 @@
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
 
+    case Instruction::CMP_LONG: {
+      Binop_23x<HCompare>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 0852a26..ee32ca8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -73,7 +73,7 @@
   bool InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
-  void Binop_32x(const Instruction& instruction, Primitive::Type type);
+  void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -84,11 +84,8 @@
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T>
-  void If_22t(const Instruction& instruction, int32_t dex_offset);
-
-  template<typename T>
-  void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 83621e0..ae2f030 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -90,6 +90,7 @@
   virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0;
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
+  virtual InstructionSet GetInstructionSet() const = 0;
 
   void RecordPcInfo(uint32_t dex_pc) {
     struct PcInfo pc_info;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ec3c815..d87c14b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -905,6 +905,48 @@
          locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1));
 }
 
+void LocationsBuilderARM::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Register output = locations->Out().AsArm().AsCoreRegister();
+      ArmManagedRegister left = locations->InAt(0).AsArm();
+      ArmManagedRegister right = locations->InAt(1).AsArm();
+      Label less, greater, done;
+      __ cmp(left.AsRegisterPairHigh(),
+             ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
+      __ b(&less, LT);
+      __ b(&greater, GT);
+      __ cmp(left.AsRegisterPairLow(),
+             ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
+      __ LoadImmediate(output, 0);
+      __ b(&done, EQ);
+      __ b(&less, CC);
+
+      __ Bind(&greater);
+      __ LoadImmediate(output, 1);
+      __ b(&done);
+
+      __ Bind(&less);
+      __ LoadImmediate(output, -1);
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 712a24c..c46c1b1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -171,6 +171,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kArm;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f624f3c..572d494 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -81,12 +81,23 @@
                                                        bool* blocked_registers) const {
   switch (type) {
     case Primitive::kPrimLong: {
-      size_t reg = AllocateFreeRegisterInternal(
-          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs);
       X86ManagedRegister pair =
           X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
       blocked_registers[pair.AsRegisterPairLow()] = true;
       blocked_registers[pair.AsRegisterPairHigh()] = true;
+      // Block all other register pairs that share a register with `pair`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        X86ManagedRegister current =
+            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairLow() == pair.AsRegisterPairHigh()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return pair;
     }
 
@@ -901,6 +912,46 @@
   __ xorl(out.AsX86().AsCpuRegister(), Immediate(1));
 }
 
+void LocationsBuilderX86::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Label less, greater, done;
+      Register output = locations->Out().AsX86().AsCpuRegister();
+      X86ManagedRegister left = locations->InAt(0).AsX86();
+      X86ManagedRegister right = locations->InAt(1).AsX86();
+      __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh());
+      __ j(kLess, &less);  // Signed compare.
+      __ j(kGreater, &greater);  // Signed compare.
+      __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow());
+      __ movl(output, Immediate(0));
+      __ j(kEqual, &done);
+      __ j(kBelow, &less);  // Unsigned compare.
+
+      __ Bind(&greater);
+      __ movl(output, Immediate(1));
+      __ jmp(&done);
+
+      __ Bind(&less);
+      __ movl(output, Immediate(-1));
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acc670e..8a8216a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -173,6 +173,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 283f1f5..dc1d616 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -228,7 +228,9 @@
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+void CodeGeneratorX86_64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
@@ -383,7 +385,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  locations->SetOut(Location::RequiresRegister());
   comp->SetLocations(locations);
 }
 
@@ -444,6 +446,39 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong:
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, &done);
+  __ j(kGreater, &greater);
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1));
+  __ jmp(&done);
+
+  __ Bind(&greater);
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+
+  __ Bind(&done);
+}
+
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
@@ -463,7 +498,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -812,10 +847,13 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
-    } else {
-      DCHECK(destination.IsStackSlot());
+    } else if (destination.IsStackSlot()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
               source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
@@ -826,18 +864,27 @@
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsDoubleStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
 }
 
-void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
-  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
-  __ movl(CpuRegister(reg), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem), reg);
+  __ movl(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
 
@@ -850,6 +897,25 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movq(Address(CpuRegister(RSP), mem), reg);
+  __ movq(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movq(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_.Get(index);
   Location source = move->GetSource();
@@ -858,11 +924,17 @@
   if (source.IsRegister() && destination.IsRegister()) {
     __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+    Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+    Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
+  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
+    Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
+    Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f07df29..d347a4f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -69,8 +69,10 @@
   X86_64Assembler* GetAssembler() const;
 
  private:
-  void Exchange(CpuRegister reg, int mem);
-  void Exchange(int mem1, int mem2);
+  void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
 
@@ -170,6 +172,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86_64;
+  }
+
  private:
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a49ce64..f033e2e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -108,9 +108,11 @@
       } else {
         codegen_.DumpCoreRegister(output_, location.reg().RegId());
       }
-    } else {
-      DCHECK(location.IsStackSlot());
+    } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
+    } else {
+      DCHECK(location.IsDoubleStackSlot());
+      output_ << "2x" << location.GetStackIndex() << "(sp)";
     }
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 503f31d..9292084 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -414,6 +414,7 @@
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
   M(Sub)                                                   \
+  M(Compare)                                               \
 
 
 #define FORWARD_DECLARATION(type) class H##type;
@@ -986,6 +987,22 @@
 };
 
 
+// Instruction to check how two inputs compare to each other.
+// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
+class HCompare : public HBinaryOperation {
+ public:
+  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second)
+      : HBinaryOperation(Primitive::kPrimInt, first, second) {
+    DCHECK_EQ(type, first->GetType());
+    DCHECK_EQ(type, second->GetType());
+  }
+
+  DECLARE_INSTRUCTION(Compare);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HCompare);
+};
+
 // A local in the graph. Corresponds to a Dex register.
 class HLocal : public HTemplateInstruction<0> {
  public:
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1f4cb41..68130dd 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -55,7 +55,7 @@
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->NeedsEnvironment()) return false;
-      if (current->GetType() == Primitive::kPrimLong) return false;
+      if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
       if (current->GetType() == Primitive::kPrimFloat) return false;
       if (current->GetType() == Primitive::kPrimDouble) return false;
     }
@@ -139,7 +139,7 @@
         current->SetFrom(position + 1);
         current->SetRegister(output.reg().RegId());
         BlockRegister(output, position, position + 1, instruction->GetType());
-      } else if (output.IsStackSlot()) {
+      } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
         current->SetSpillSlot(output.GetStackIndex());
       }
       for (size_t i = 0; i < instruction->InputCount(); ++i) {
@@ -430,7 +430,7 @@
 // we spill `current` instead.
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
-  if (current->FirstRegisterUse() == kNoLifetime) {
+  if (first_register_use == kNoLifetime) {
     AllocateSpillSlotFor(current);
     return false;
   }
@@ -559,6 +559,10 @@
   }
 }
 
+static bool NeedTwoSpillSlot(Primitive::Type type) {
+  return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+}
+
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   LiveInterval* parent = interval->GetParent();
 
@@ -581,6 +585,43 @@
   }
   size_t end = last_sibling->GetEnd();
 
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    AllocateTwoSpillSlots(parent, end);
+  } else {
+    AllocateOneSpillSlot(parent, end);
+  }
+}
+
+void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    // We check if it is less rather than less or equal because the parallel move
+    // resolver does not work when a single spill slot needs to be exchanged with
+    // a double spill slot. The strict comparison avoids needing to exchange these
+    // locations at the same lifetime position.
+    if (spill_slots_.Get(slot) < parent->GetStart()
+        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+    spill_slots_.Add(end);
+  } else if (slot == spill_slots_.Size() - 1) {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Put(slot + 1, end);
+  }
+
+  parent->SetSpillSlot(slot * kVRegSize);
+}
+
+void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -604,7 +645,11 @@
     return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
   } else {
     DCHECK(interval->GetParent()->HasSpillSlot());
-    return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    if (NeedTwoSpillSlot(interval->GetType())) {
+      return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+    } else {
+      return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    }
   }
 }
 
@@ -750,7 +795,9 @@
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
                     Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
-                    Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+                    NeedTwoSpillSlot(interval->GetType())
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
 
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e63122f..7d4cd1a 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -93,6 +93,8 @@
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
+  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
+  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);