ARM(64): Implement the isInfinite intrinsics The initial implementation replaced the HInvoke node in the graph with several other HIRs based on the fact that the difference of infinities of opposite signs is a NaN value, i.e. the nodes were equivalent to the expression (x - x != x - x) && (x == x) (which performs mostly floating-point operations). It was subsequently abandoned in favor of another HIR implementation using the same algorithm as the current assembly code (with mostly integer operations), since it was faster in some simple microbenchmarks (isInfinite() in a loop). While the HIR approach had some significant advantages, such as being architecture-neutral (so all architectures supported by the compiler benefitted from the changes) and potentially enabling further optimizations, it also had several limitations, the most important being that it still needed a HInvoke node, which defeated its purpose. The reason is that the algorithm requires a raw conversion to an integer that preserves the bit representation of the value, which seems not to be expressible in another way - in particular, HTypeConversion does something entirely different. Another major problem is that MIPS release 6 has specialized floating-point classification instructions that are used in the intrinsic implementation, and which the compiler is unable to use in the general case (e.g. by recognizing a pattern in the graph), so the HIR approach resulted in a regression. This could be solved by doing architecture-specific optimizations earlier, but that change is beyond the scope of this patch. There were several other minor issues with the generated code such as left shifts not being merged into comparisons on ARM64. More importantly, on ARM Double.isInfinite() resulted in a sequence of 14 instructions (compared to 6 in the current implementation) due to the fact that a long is stored in a register pair, so operations such as left shifts have to be done with two instructions. This could be worked around by changing the HIR representation at the cost of increased code complexity. Given all these issues, the final decision was to implement the intrinsics using the standard architecture-specific approach. Change-Id: I7d575b794dce298faf09cb5f65e41794fa334f19

commit: a3ffea207c7f7a5a1a6467ba154c0c88f118aa34 [log] [tgz]
author: Anton Kirilov <anton.kirilov@linaro.org> Thu Apr 07 17:02:37 2016 +0100
committer: Anton Kirilov <anton.kirilov@linaro.org> Mon Apr 25 17:23:32 2016 +0100
tree: fd57187c92dfecfcc71ea035ef05b129c56bfa3b
parent: 27983255b19b633e91cf62883035b8af02b94eb0 [diff]
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 863dd1c..39a1313 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h

@@ -30,6 +30,10 @@
 // Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
 static constexpr bool kRoundIsPlusPointFive = false;
 
+// Positive floating-point infinities.
+static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
+static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
+
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:

diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 86b7bc1..146fea1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc

@@ -1985,6 +1985,56 @@
   __ Bind(&done);
 }
 
+void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ eor(out, out, ShifterOperand(infinity));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmAssembler* const assembler = GetAssembler();
+  LocationSummary* const locations = invoke->GetLocations();
+  const Register out = locations->Out().AsRegister<Register>();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double precision positive infinity.");
+  __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+  __ eor(out, out, ShifterOperand(infinity_high));
+  __ eor(out, out, ShifterOperand(infinity_high2));
+  // We don't care about the sign bit, so shift left.
+  __ orr(out, IP, ShifterOperand(out, LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
@@ -2001,8 +2051,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)

diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 04ae3a6..4fa292d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc

@@ -2201,9 +2201,46 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static void GenIsInfinite(LocationSummary* locations,
+                          bool is64bit,
+                          vixl::MacroAssembler* masm) {
+  Operand infinity;
+  Register out;
+
+  if (is64bit) {
+    infinity = kPositiveInfinityDouble;
+    out = XRegisterFrom(locations->Out());
+  } else {
+    infinity = kPositiveInfinityFloat;
+    out = WRegisterFrom(locations->Out());
+  }
+
+  const Register zero = vixl::Assembler::AppropriateZeroRegFor(out);
+
+  MoveFPToInt(locations, is64bit, masm);
+  __ Eor(out, out, infinity);
+  // We don't care about the sign bit, so shift left.
+  __ Cmp(zero, Operand(out, LSL, 1));
+  __ Cset(out, eq);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)

diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 19c6a22..46195c1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc

@@ -2283,10 +2283,10 @@
     // If one, or more, of the exponent bits is zero, then the number can't be infinite.
     if (type == Primitive::kPrimDouble) {
       __ MoveFromFpuHigh(TMP, in);
-      __ LoadConst32(AT, 0x7FF00000);
+      __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble));
     } else {
       __ Mfc1(TMP, in);
-      __ LoadConst32(AT, 0x7F800000);
+      __ LoadConst32(AT, kPositiveInfinityFloat);
     }
     __ Xor(TMP, TMP, AT);
commit	a3ffea207c7f7a5a1a6467ba154c0c88f118aa34	[log] [tgz]
author	Anton Kirilov <anton.kirilov@linaro.org>	Thu Apr 07 17:02:37 2016 +0100
committer	Anton Kirilov <anton.kirilov@linaro.org>	Mon Apr 25 17:23:32 2016 +0100
tree	fd57187c92dfecfcc71ea035ef05b129c56bfa3b
parent	27983255b19b633e91cf62883035b8af02b94eb0 [diff]