ARM(64): Implement the isInfinite intrinsics
The initial implementation replaced the HInvoke node in the graph
with several other HIRs based on the fact that the difference of
infinities of opposite signs is a NaN value, i.e. the nodes were
equivalent to the expression (x - x != x - x) && (x == x) (which
performs mostly floating-point operations). It was subsequently
abandoned in favor of another HIR implementation using the same
algorithm as the current assembly code (with mostly integer
operations), since it was faster in some simple microbenchmarks
(isInfinite() in a loop).
While the HIR approach had some significant advantages, such as
being architecture-neutral (so all architectures supported by the
compiler benefitted from the changes) and potentially enabling
further optimizations, it also had several limitations, the most
important being that it still needed a HInvoke node, which
defeated its purpose. The reason is that the algorithm requires a
raw conversion to an integer that preserves the bit representation
of the value, which seems not to be expressible in another way -
in particular, HTypeConversion does something entirely different.
Another major problem is that MIPS release 6 has specialized
floating-point classification instructions that are used in the
intrinsic implementation, and which the compiler is unable to use
in the general case (e.g. by recognizing a pattern in the graph),
so the HIR approach resulted in a regression. This could be solved
by doing architecture-specific optimizations earlier, but that
change is beyond the scope of this patch.
There were several other minor issues with the generated code
such as left shifts not being merged into comparisons on ARM64.
More importantly, on ARM Double.isInfinite() resulted in a
sequence of 14 instructions (compared to 6 in the current
implementation) due to the fact that a long is stored in a register
pair, so operations such as left shifts have to be done with two
instructions. This could be worked around by changing the HIR
representation at the cost of increased code complexity.
Given all these issues, the final decision was to implement the
intrinsics using the standard architecture-specific approach.
Change-Id: I7d575b794dce298faf09cb5f65e41794fa334f19
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 863dd1c..39a1313 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -30,6 +30,10 @@
// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751
static constexpr bool kRoundIsPlusPointFive = false;
+// Positive floating-point infinities.
+static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
+static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
+
// Recognize intrinsics from HInvoke nodes.
class IntrinsicsRecognizer : public HOptimization {
public:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 86b7bc1..146fea1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1985,6 +1985,56 @@
__ Bind(&done);
}
+void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) {
+ ArmAssembler* const assembler = GetAssembler();
+ LocationSummary* const locations = invoke->GetLocations();
+ const Register out = locations->Out().AsRegister<Register>();
+ // Shifting left by 1 bit makes the value encodable as an immediate operand;
+ // we don't care about the sign bit anyway.
+ constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+ __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>());
+ // We don't care about the sign bit, so shift left.
+ __ Lsl(out, out, 1);
+ __ eor(out, out, ShifterOperand(infinity));
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) {
+ ArmAssembler* const assembler = GetAssembler();
+ LocationSummary* const locations = invoke->GetLocations();
+ const Register out = locations->Out().AsRegister<Register>();
+ // The highest 32 bits of double precision positive infinity separated into
+ // two constants encodable as immediate operands.
+ constexpr uint32_t infinity_high = 0x7f000000U;
+ constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+ static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+ "The constants do not add up to the high 32 bits of double precision positive infinity.");
+ __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+ __ eor(out, out, ShifterOperand(infinity_high));
+ __ eor(out, out, ShifterOperand(infinity_high2));
+ // We don't care about the sign bit, so shift left.
+ __ orr(out, IP, ShifterOperand(out, LSL, 1));
+ // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+ __ clz(out, out);
+ // Any number less than 32 logically shifted right by 5 bits results in 0;
+ // the same operation on 32 yields 1.
+ __ Lsr(out, out, 5);
+}
+
UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
@@ -2001,8 +2051,6 @@
UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 04ae3a6..4fa292d 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2201,9 +2201,46 @@
__ Bind(slow_path->GetExitLabel());
}
+static void GenIsInfinite(LocationSummary* locations,
+ bool is64bit,
+ vixl::MacroAssembler* masm) {
+ Operand infinity;
+ Register out;
+
+ if (is64bit) {
+ infinity = kPositiveInfinityDouble;
+ out = XRegisterFrom(locations->Out());
+ } else {
+ infinity = kPositiveInfinityFloat;
+ out = WRegisterFrom(locations->Out());
+ }
+
+ const Register zero = vixl::Assembler::AppropriateZeroRegFor(out);
+
+ MoveFPToInt(locations, is64bit, masm);
+ __ Eor(out, out, infinity);
+ // We don't care about the sign bit, so shift left.
+ __ Cmp(zero, Operand(out, LSL, 1));
+ __ Cset(out, eq);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
+ GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
+ GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+}
+
UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 19c6a22..46195c1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2283,10 +2283,10 @@
// If one, or more, of the exponent bits is zero, then the number can't be infinite.
if (type == Primitive::kPrimDouble) {
__ MoveFromFpuHigh(TMP, in);
- __ LoadConst32(AT, 0x7FF00000);
+ __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble));
} else {
__ Mfc1(TMP, in);
- __ LoadConst32(AT, 0x7F800000);
+ __ LoadConst32(AT, kPositiveInfinityFloat);
}
__ Xor(TMP, TMP, AT);