Fuse long and FP compare & condition on x86/x86-64 in Optimizing.

This is a preliminary implementation of fusing long/float/double
compares with conditions to avoid materializing the result from the
compare and condition.

The information from a HCompare is transferred to the HCondition if it
is legal.  There must be only a single use of the HCompare, the HCompare
and HCondition must be in the same block, the HCondition must not need
materialization.

Added GetOppositeCondition() to HCondition to return the flipped
condition.

Bug: 21120453
Change-Id: I1f1db206e6dc336270cd71070ed3232dedc754d6
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index daf7d67..329112a 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -119,6 +119,14 @@
   // Check if the selection negates/preserves the value of the condition and
   // if so, generate a suitable replacement instruction.
   HInstruction* if_condition = if_instruction->InputAt(0);
+
+  // Don't change FP compares.  The definition of compares involving NaNs forces
+  // the compares to be done as written by the user.
+  if (if_condition->IsCondition() &&
+      Primitive::IsFloatingPointType(if_condition->InputAt(0)->GetType())) {
+    return;
+  }
+
   HInstruction* replacement;
   if (NegatesCondition(true_value, false_value)) {
     replacement = GetOppositeCondition(if_condition);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 54155db..6b245b3 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -649,7 +649,7 @@
 
 void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
                                   Primitive::Type type,
-                                  HCompare::Bias bias,
+                                  ComparisonBias bias,
                                   uint32_t dex_pc) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
@@ -2303,27 +2303,27 @@
     }
 
     case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias, dex_pc);
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, kNoBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias, dex_pc);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias, dex_pc);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias, dex_pc);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, kLtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias, dex_pc);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, kLtBias, dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index cae762b..e487255 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -139,7 +139,7 @@
 
   void Binop_23x_cmp(const Instruction& instruction,
                      Primitive::Type type,
-                     HCompare::Bias bias,
+                     ComparisonBias bias,
                      uint32_t dex_pc);
 
   template<typename T>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 262b234..be71443 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -889,6 +889,180 @@
   UNUSED(exit);
 }
 
+void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
+                                                  Label* true_label,
+                                                  Label* false_label) {
+  bool gt_bias = cond->IsGtBias();
+  IfCondition if_cond = cond->GetCondition();
+  Condition ccode = X86Condition(if_cond);
+  switch (if_cond) {
+    case kCondEQ:
+      if (!gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      break;
+    case kCondNE:
+      if (!gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      break;
+    case kCondLT:
+      if (gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      ccode = kBelow;
+      break;
+    case kCondLE:
+      if (gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      ccode = kBelowEqual;
+      break;
+    case kCondGT:
+      if (gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      ccode = kAbove;
+      break;
+    case kCondGE:
+      if (gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      ccode = kAboveEqual;
+      break;
+  }
+  __ j(ccode, true_label);
+}
+
+void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
+                                                               Label* true_label,
+                                                               Label* false_label) {
+  LocationSummary* locations = cond->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+  IfCondition if_cond = cond->GetCondition();
+
+  Register left_low = left.AsRegisterPairLow<Register>();
+  Register left_high = left.AsRegisterPairHigh<Register>();
+  IfCondition true_high_cond = if_cond;
+  IfCondition false_high_cond = cond->GetOppositeCondition();
+  Condition final_condition = X86Condition(if_cond);
+
+  // Set the conditions for the test, remembering that == needs to be
+  // decided using the low words.
+  switch (if_cond) {
+    case kCondEQ:
+      false_high_cond = kCondNE;
+      break;
+    case kCondNE:
+      false_high_cond = kCondEQ;
+      break;
+    case kCondLT:
+      false_high_cond = kCondGT;
+      final_condition = kBelow;
+      break;
+    case kCondLE:
+      true_high_cond = kCondLT;
+      final_condition = kBelowEqual;
+      break;
+    case kCondGT:
+      false_high_cond = kCondLT;
+      final_condition = kAbove;
+      break;
+    case kCondGE:
+      true_high_cond = kCondGT;
+      final_condition = kAboveEqual;
+      break;
+  }
+
+  if (right.IsConstant()) {
+    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+    int32_t val_low = Low32Bits(value);
+    int32_t val_high = High32Bits(value);
+
+    if (val_high == 0) {
+      __ testl(left_high, left_high);
+    } else {
+      __ cmpl(left_high, Immediate(val_high));
+    }
+    if (if_cond == kCondNE) {
+      __ j(X86Condition(true_high_cond), true_label);
+    } else if (if_cond == kCondEQ) {
+      __ j(X86Condition(false_high_cond), false_label);
+    } else {
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
+    }
+    // Must be equal high, so compare the lows.
+    if (val_low == 0) {
+      __ testl(left_low, left_low);
+    } else {
+      __ cmpl(left_low, Immediate(val_low));
+    }
+  } else {
+    Register right_low = right.AsRegisterPairLow<Register>();
+    Register right_high = right.AsRegisterPairHigh<Register>();
+
+    __ cmpl(left_high, right_high);
+    if (if_cond == kCondNE) {
+      __ j(X86Condition(true_high_cond), true_label);
+    } else if (if_cond == kCondEQ) {
+      __ j(X86Condition(false_high_cond), false_label);
+    } else {
+      __ j(X86Condition(true_high_cond), true_label);
+      __ j(X86Condition(false_high_cond), false_label);
+    }
+    // Must be equal high, so compare the lows.
+    __ cmpl(left_low, right_low);
+  }
+  // The last comparison might be unsigned.
+  __ j(final_condition, true_label);
+}
+
+void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
+                                                               HCondition* condition,
+                                                               Label* true_target,
+                                                               Label* false_target,
+                                                               Label* always_true_target) {
+  LocationSummary* locations = condition->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  // We don't want true_target as a nullptr.
+  if (true_target == nullptr) {
+    true_target = always_true_target;
+  }
+  bool falls_through = (false_target == nullptr);
+
+  // FP compares don't like null false_targets.
+  if (false_target == nullptr) {
+    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  }
+
+  Primitive::Type type = condition->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong:
+      GenerateLongComparesAndJumps(condition, true_target, false_target);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK(right.IsFpuRegister());
+      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK(right.IsFpuRegister());
+      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected compare type " << type;
+  }
+
+  if (!falls_through) {
+    __ jmp(false_target);
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
                                                         Label* true_target,
                                                         Label* false_target,
@@ -910,9 +1084,12 @@
         !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
     // Moves do not affect the eflags register, so if the condition is
     // evaluated just before the if, we don't need to evaluate it
-    // again.
+    // again.  We can't use the eflags on long/FP conditions if they are
+    // materialized due to the complex branching.
+    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
     bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
+        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+        && type == Primitive::kPrimInt;
     if (materialized) {
       if (!eflags_set) {
         // Materialized condition, compare against 0.
@@ -927,6 +1104,16 @@
         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
+      // Is this a long or FP comparison that has been folded into the HCondition?
+      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+        // Generate the comparison directly.
+        GenerateCompareTestAndBranch(instruction->AsIf(),
+                                     cond->AsCondition(),
+                                     true_target,
+                                     false_target,
+                                     always_true_target);
+        return;
+      }
       Location lhs = cond->GetLocations()->InAt(0);
       Location rhs = cond->GetLocations()->InAt(1);
       // LHS is guaranteed to be in a register (see
@@ -1041,36 +1228,94 @@
 void LocationsBuilderX86::VisitCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
-  if (cond->NeedsMaterialization()) {
-    // We need a byte register.
-    locations->SetOut(Location::RegisterLocation(ECX));
+  // Handle the long/FP comparisons made in instruction simplification.
+  switch (cond->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+      if (cond->NeedsMaterialization()) {
+        locations->SetOut(Location::RequiresRegister());
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      if (cond->NeedsMaterialization()) {
+        locations->SetOut(Location::RequiresRegister());
+      }
+      break;
+    }
+    default:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      if (cond->NeedsMaterialization()) {
+        // We need a byte register.
+        locations->SetOut(Location::RegisterLocation(ECX));
+      }
+      break;
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
-  if (cond->NeedsMaterialization()) {
-    LocationSummary* locations = cond->GetLocations();
-    Register reg = locations->Out().AsRegister<Register>();
-    // Clear register: setcc only sets the low byte.
-    __ xorl(reg, reg);
-    Location lhs = locations->InAt(0);
-    Location rhs = locations->InAt(1);
-    if (rhs.IsRegister()) {
-      __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-    } else if (rhs.IsConstant()) {
-      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-      } else {
-      __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-      }
-    } else {
-      __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
-    }
-    __ setb(X86Condition(cond->GetCondition()), reg);
+  if (!cond->NeedsMaterialization()) {
+    return;
   }
+
+  LocationSummary* locations = cond->GetLocations();
+  Location lhs = locations->InAt(0);
+  Location rhs = locations->InAt(1);
+  Register reg = locations->Out().AsRegister<Register>();
+  Label true_label, false_label;
+
+  switch (cond->InputAt(0)->GetType()) {
+    default: {
+      // Integer case.
+
+      // Clear output register: setcc only sets the low byte.
+      __ xorl(reg, reg);
+
+      if (rhs.IsRegister()) {
+        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+      } else if (rhs.IsConstant()) {
+        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+        if (constant == 0) {
+          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+        } else {
+          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
+        }
+      } else {
+        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+      }
+      __ setb(X86Condition(cond->GetCondition()), reg);
+      return;
+    }
+    case Primitive::kPrimLong:
+      GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+      break;
+    case Primitive::kPrimFloat:
+      __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPJumps(cond, &true_label, &false_label);
+      break;
+    case Primitive::kPrimDouble:
+      __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
+      GenerateFPJumps(cond, &true_label, &false_label);
+      break;
+  }
+
+  // Convert the jumps into the result.
+  Label done_label;
+
+  // false case: result = 0;
+  __ Bind(&false_label);
+  __ xorl(reg, reg);
+  __ jmp(&done_label);
+
+  // True case: result = 1
+  __ Bind(&true_label);
+  __ movl(reg, Immediate(1));
+  __ Bind(&done_label);
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 623e832..65d6e0a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -177,7 +177,7 @@
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
-  void GenerateRemFP(HRem *rem);
+  void GenerateRemFP(HRem* rem);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
@@ -201,6 +201,13 @@
                              Label* true_target,
                              Label* false_target,
                              Label* always_true_target);
+  void GenerateCompareTestAndBranch(HIf* if_inst,
+                                    HCondition* condition,
+                                    Label* true_target,
+                                    Label* false_target,
+                                    Label* always_true_target);
+  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
+  void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   X86Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c9d19c8..ddaa60d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -833,6 +833,134 @@
   UNUSED(exit);
 }
 
+void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
+                                                     Label* true_label,
+                                                     Label* false_label) {
+  bool gt_bias = cond->IsGtBias();
+  IfCondition if_cond = cond->GetCondition();
+  Condition ccode = X86_64Condition(if_cond);
+  switch (if_cond) {
+    case kCondEQ:
+      if (!gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      break;
+    case kCondNE:
+      if (!gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      break;
+    case kCondLT:
+      if (gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      ccode = kBelow;
+      break;
+    case kCondLE:
+      if (gt_bias) {
+        __ j(kParityEven, false_label);
+      }
+      ccode = kBelowEqual;
+      break;
+    case kCondGT:
+      if (gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      ccode = kAbove;
+      break;
+    case kCondGE:
+      if (gt_bias) {
+        __ j(kParityEven, true_label);
+      }
+      ccode = kAboveEqual;
+      break;
+  }
+  __ j(ccode, true_label);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
+                                                                  HCondition* condition,
+                                                                  Label* true_target,
+                                                                  Label* false_target,
+                                                                  Label* always_true_target) {
+  LocationSummary* locations = condition->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  // We don't want true_target as a nullptr.
+  if (true_target == nullptr) {
+    true_target = always_true_target;
+  }
+  bool falls_through = (false_target == nullptr);
+
+  // FP compares don't like null false_targets.
+  if (false_target == nullptr) {
+    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  }
+
+  Primitive::Type type = condition->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong: {
+      CpuRegister left_reg = left.AsRegister<CpuRegister>();
+      if (right.IsConstant()) {
+        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+        if (IsInt<32>(value)) {
+          if (value == 0) {
+            __ testq(left_reg, left_reg);
+          } else {
+            __ cmpq(left_reg, Immediate(static_cast<int32_t>(value)));
+          }
+        } else {
+          // Value won't fit in an 32-bit integer.
+          __ cmpq(left_reg, codegen_->LiteralInt64Address(value));
+        }
+      } else if (right.IsDoubleStackSlot()) {
+        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
+      } else {
+        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
+      }
+      __ j(X86_64Condition(condition->GetCondition()), true_target);
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      if (right.IsFpuRegister()) {
+        __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      } else if (right.IsConstant()) {
+        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
+                   codegen_->LiteralFloatAddress(
+                     right.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(right.IsStackSlot());
+        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
+                   Address(CpuRegister(RSP), right.GetStackIndex()));
+      }
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      if (right.IsFpuRegister()) {
+        __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      } else if (right.IsConstant()) {
+        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
+                   codegen_->LiteralDoubleAddress(
+                     right.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(right.IsDoubleStackSlot());
+        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
+                   Address(CpuRegister(RSP), right.GetStackIndex()));
+      }
+      GenerateFPJumps(condition, true_target, false_target);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected condition type " << type;
+  }
+
+  if (!falls_through) {
+    __ jmp(false_target);
+  }
+}
+
 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
                                                            Label* true_target,
                                                            Label* false_target,
@@ -854,9 +982,13 @@
         !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
     // Moves do not affect the eflags register, so if the condition is
     // evaluated just before the if, we don't need to evaluate it
-    // again.
+    // again.  We can't use the eflags on FP conditions if they are
+    // materialized due to the complex branching.
+    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
     bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
+        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+        && !Primitive::IsFloatingPointType(type);
+
     if (materialized) {
       if (!eflags_set) {
         // Materialized condition, compare against 0.
@@ -872,6 +1004,13 @@
         __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
+      // Is this a long or FP comparison that has been folded into the HCondition?
+      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+        // Generate the comparison directly
+        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
+                                     true_target, false_target, always_true_target);
+        return;
+      }
       Location lhs = cond->GetLocations()->InAt(0);
       Location rhs = cond->GetLocations()->InAt(1);
       if (rhs.IsRegister()) {
@@ -985,35 +1124,122 @@
 void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  // Handle the long/FP comparisons made in instruction simplification.
+  switch (cond->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
+      break;
+    default:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::Any());
+      break;
+  }
   if (cond->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
-  if (cond->NeedsMaterialization()) {
-    LocationSummary* locations = cond->GetLocations();
-    CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
-    // Clear register: setcc only sets the low byte.
-    __ xorl(reg, reg);
-    Location lhs = locations->InAt(0);
-    Location rhs = locations->InAt(1);
-    if (rhs.IsRegister()) {
-      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
-    } else if (rhs.IsConstant()) {
-      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-      if (constant == 0) {
-        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-      } else {
-        __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-      }
-    } else {
-      __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
-    }
-    __ setcc(X86_64Condition(cond->GetCondition()), reg);
+  if (!cond->NeedsMaterialization()) {
+    return;
   }
+
+  LocationSummary* locations = cond->GetLocations();
+  Location lhs = locations->InAt(0);
+  Location rhs = locations->InAt(1);
+  CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
+  Label true_label, false_label;
+
+  switch (cond->InputAt(0)->GetType()) {
+    default:
+      // Integer case.
+
+      // Clear output register: setcc only sets the low byte.
+      __ xorl(reg, reg);
+
+      if (rhs.IsRegister()) {
+        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+      } else if (rhs.IsConstant()) {
+        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+        if (constant == 0) {
+          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+        } else {
+          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+        }
+      } else {
+        __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
+      }
+      __ setcc(X86_64Condition(cond->GetCondition()), reg);
+      return;
+    case Primitive::kPrimLong:
+      // Clear output register: setcc only sets the low byte.
+      __ xorl(reg, reg);
+
+      if (rhs.IsRegister()) {
+        __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+      } else if (rhs.IsConstant()) {
+        int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
+        if (IsInt<32>(value)) {
+          if (value == 0) {
+            __ testq(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+          } else {
+            __ cmpq(lhs.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+          }
+        } else {
+          // Value won't fit in an int.
+          __ cmpq(lhs.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
+        }
+      } else {
+        __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
+      }
+      __ setcc(X86_64Condition(cond->GetCondition()), reg);
+      return;
+    case Primitive::kPrimFloat: {
+      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
+      if (rhs.IsConstant()) {
+        float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
+        __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
+      } else if (rhs.IsStackSlot()) {
+        __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+      } else {
+        __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
+      }
+      GenerateFPJumps(cond, &true_label, &false_label);
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
+      if (rhs.IsConstant()) {
+        double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
+        __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
+      } else if (rhs.IsDoubleStackSlot()) {
+        __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
+      } else {
+        __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
+      }
+      GenerateFPJumps(cond, &true_label, &false_label);
+      break;
+    }
+  }
+
+  // Convert the jumps into the result.
+  Label done_label;
+
+  // false case: result = 0;
+  __ Bind(&false_label);
+  __ xorl(reg, reg);
+  __ jmp(&done_label);
+
+  // True case: result = 1
+  __ Bind(&true_label);
+  __ movl(reg, Immediate(1));
+  __ Bind(&done_label);
 }
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c2aa56b..4b90381 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
-  void GenerateRemFP(HRem *rem);
+  void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -202,6 +202,12 @@
                              Label* true_target,
                              Label* false_target,
                              Label* always_true_target);
+  void GenerateCompareTestAndBranch(HIf* if_inst,
+                                    HCondition* condition,
+                                    Label* true_target,
+                                    Label* false_target,
+                                    Label* always_true_target);
+  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   X86_64Assembler* const assembler_;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 62f90c2..337cf5b 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -54,6 +54,11 @@
   void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
   void VisitAdd(HAdd* instruction) OVERRIDE;
   void VisitAnd(HAnd* instruction) OVERRIDE;
+  void VisitCondition(HCondition* instruction) OVERRIDE;
+  void VisitGreaterThan(HGreaterThan* condition) OVERRIDE;
+  void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
+  void VisitLessThan(HLessThan* condition) OVERRIDE;
+  void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
   void VisitDiv(HDiv* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitNeg(HNeg* instruction) OVERRIDE;
@@ -330,7 +335,11 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       }
+    } else {
+      VisitCondition(equal);
     }
+  } else {
+    VisitCondition(equal);
   }
 }
 
@@ -358,7 +367,11 @@
         block->RemoveInstruction(not_equal);
         RecordSimplification();
       }
+    } else {
+      VisitCondition(not_equal);
     }
+  } else {
+    VisitCondition(not_equal);
   }
 }
 
@@ -485,6 +498,76 @@
   }
 }
 
+void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitLessThan(HLessThan* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
+  // Try to fold an HCompare into this HCondition.
+
+  // This simplification is currently only supported on x86 and x86_64.
+  // TODO: Implement it for ARM, ARM64 and MIPS64.
+  InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  if (instruction_set != kX86 && instruction_set != kX86_64) {
+    return;
+  }
+
+  HInstruction* left = condition->GetLeft();
+  HInstruction* right = condition->GetRight();
+  // We can only replace an HCondition which compares a Compare to 0.
+  // Both 'dx' and 'jack' generate a compare to 0 when compiling a
+  // condition with a long, float or double comparison as input.
+  if (!left->IsCompare() || !right->IsConstant() || right->AsIntConstant()->GetValue() != 0) {
+    // Conversion is not possible.
+    return;
+  }
+
+  // Is the Compare only used for this purpose?
+  if (!left->GetUses().HasOnlyOneUse()) {
+    // Someone else also wants the result of the compare.
+    return;
+  }
+
+  if (!left->GetEnvUses().IsEmpty()) {
+    // There is a reference to the compare result in an environment. Do we really need it?
+    if (GetGraph()->IsDebuggable()) {
+      return;
+    }
+
+    // We have to ensure that there are no deopt points in the sequence.
+    if (left->HasAnyEnvironmentUseBefore(condition)) {
+      return;
+    }
+  }
+
+  // Clean up any environment uses from the HCompare, if any.
+  left->RemoveEnvironmentUsers();
+
+  // We have decided to fold the HCompare into the HCondition. Transfer the information.
+  condition->SetBias(left->AsCompare()->GetBias());
+
+  // Replace the operands of the HCondition.
+  condition->ReplaceInput(left->InputAt(0), 0);
+  condition->ReplaceInput(left->InputAt(1), 1);
+
+  // Remove the HCompare.
+  left->GetBlock()->RemoveInstruction(left);
+
+  RecordSimplification();
+}
+
 void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b82e37c..588ab70 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -940,6 +940,9 @@
                              GetRight()->AsLongConstant()->GetValue());
     if (GetResultType() == Primitive::kPrimLong) {
       return GetBlock()->GetGraph()->GetLongConstant(value);
+    } else if (GetResultType() == Primitive::kPrimBoolean) {
+      // This can be the result of an HCondition evaluation.
+      return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value));
     } else {
       DCHECK_EQ(GetResultType(), Primitive::kPrimInt);
       return GetBlock()->GetGraph()->GetIntConstant(static_cast<int32_t>(value));
@@ -1647,4 +1650,38 @@
   return os;
 }
 
+bool HInstruction::HasAnyEnvironmentUseBefore(HInstruction* other) {
+  // For now, assume that instructions in different blocks may use the
+  // environment.
+  // TODO: Use the control flow to decide if this is true.
+  if (GetBlock() != other->GetBlock()) {
+    return true;
+  }
+
+  // We know that we are in the same block. Walk from 'this' to 'other',
+  // checking to see if there is any instruction with an environment.
+  HInstruction* current = this;
+  for (; current != other && current != nullptr; current = current->GetNext()) {
+    // This is a conservative check, as the instruction result may not be in
+    // the referenced environment.
+    if (current->HasEnvironment()) {
+      return true;
+    }
+  }
+
+  // We should have been called with 'this' before 'other' in the block.
+  // Just confirm this.
+  DCHECK(current != nullptr);
+  return false;
+}
+
+void HInstruction::RemoveEnvironmentUsers() {
+  for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
+    HUseListNode<HEnvironment*>* user_node = use_it.Current();
+    HEnvironment* user = user_node->GetUser();
+    user->SetRawEnvAt(user_node->GetIndex(), nullptr);
+  }
+  env_uses_.Clear();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 04c3963..0db1ac3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -325,6 +325,10 @@
     return invoke_type_;
   }
 
+  InstructionSet GetInstructionSet() const {
+    return instruction_set_;
+  }
+
  private:
   void VisitBlockForDominatorTree(HBasicBlock* block,
                                   HBasicBlock* predecessor,
@@ -1659,6 +1663,14 @@
 
   virtual bool NeedsDexCache() const { return false; }
 
+  // Does this instruction have any use in an environment before
+  // control flow hits 'other'?
+  bool HasAnyEnvironmentUseBefore(HInstruction* other);
+
+  // Remove all references to environment uses of this instruction.
+  // The caller must ensure that this is safe to do.
+  void RemoveEnvironmentUsers();
+
  protected:
   virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
   virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
@@ -2135,11 +2147,20 @@
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
 };
 
+// The comparison bias applies for floating point operations and indicates how NaN
+// comparisons are treated:
+enum ComparisonBias {
+  kNoBias,  // bias is not applicable (i.e. for long operation)
+  kGtBias,  // return 1 for NaN comparisons
+  kLtBias,  // return -1 for NaN comparisons
+};
+
 class HCondition : public HBinaryOperation {
  public:
   HCondition(HInstruction* first, HInstruction* second)
       : HBinaryOperation(Primitive::kPrimBoolean, first, second),
-        needs_materialization_(true) {}
+        needs_materialization_(true),
+        bias_(kNoBias) {}
 
   bool NeedsMaterialization() const { return needs_materialization_; }
   void ClearNeedsMaterialization() { needs_materialization_ = false; }
@@ -2152,11 +2173,24 @@
 
   virtual IfCondition GetCondition() const = 0;
 
+  virtual IfCondition GetOppositeCondition() const = 0;
+
+  bool IsGtBias() { return bias_ == kGtBias; }
+
+  void SetBias(ComparisonBias bias) { bias_ = bias; }
+
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return bias_ == other->AsCondition()->bias_;
+  }
+
  private:
   // For register allocation purposes, returns whether this instruction needs to be
   // materialized (that is, not just be in the processor flags).
   bool needs_materialization_;
 
+  // Needed if we merge a HCompare into a HCondition.
+  ComparisonBias bias_;
+
   DISALLOW_COPY_AND_ASSIGN(HCondition);
 };
 
@@ -2181,6 +2215,10 @@
     return kCondEQ;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondNE;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HEqual);
 };
@@ -2205,6 +2243,10 @@
     return kCondNE;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondEQ;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HNotEqual);
 };
@@ -2227,6 +2269,10 @@
     return kCondLT;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondGE;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HLessThan);
 };
@@ -2249,6 +2295,10 @@
     return kCondLE;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondGT;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual);
 };
@@ -2271,6 +2321,10 @@
     return kCondGT;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondLE;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HGreaterThan);
 };
@@ -2293,6 +2347,10 @@
     return kCondGE;
   }
 
+  IfCondition GetOppositeCondition() const OVERRIDE {
+    return kCondLT;
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual);
 };
@@ -2302,18 +2360,10 @@
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
 class HCompare : public HBinaryOperation {
  public:
-  // The bias applies for floating point operations and indicates how NaN
-  // comparisons are treated:
-  enum Bias {
-    kNoBias,  // bias is not applicable (i.e. for long operation)
-    kGtBias,  // return 1 for NaN comparisons
-    kLtBias,  // return -1 for NaN comparisons
-  };
-
   HCompare(Primitive::Type type,
            HInstruction* first,
            HInstruction* second,
-           Bias bias,
+           ComparisonBias bias,
            uint32_t dex_pc)
       : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias), dex_pc_(dex_pc) {
     DCHECK_EQ(type, first->GetType());
@@ -2338,6 +2388,8 @@
     return bias_ == other->AsCompare()->bias_;
   }
 
+  ComparisonBias GetBias() const { return bias_; }
+
   bool IsGtBias() { return bias_ == kGtBias; }
 
   uint32_t GetDexPc() const { return dex_pc_; }
@@ -2345,7 +2397,7 @@
   DECLARE_INSTRUCTION(Compare);
 
  private:
-  const Bias bias_;
+  const ComparisonBias bias_;
   const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(HCompare);