Revert "Basic SIMD reduction support."
Fails 530-checker-lse on arm64.
Bug: 64091002, 65212948
This reverts commit cfa59b49cde265dc5329a7e6956445f9f7a75f15.
Change-Id: Icb5d6c805516db0a1d911c3ede9a246ccef89a22
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 18a55c8..9095ecd 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -27,13 +27,12 @@
using helpers::ARM64EncodableConstantOrRegister;
using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
+using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
-using helpers::OutputRegister;
-using helpers::VRegisterFrom;
-using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
+using helpers::WRegisterFrom;
#define __ GetVIXLAssembler()->
@@ -128,51 +127,20 @@
}
}
-void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister src = VRegisterFrom(locations->InAt(0));
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Umov(OutputRegister(instruction), src.V4S(), 0);
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Umov(OutputRegister(instruction), src.V2D(), 0);
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector unary operations.
@@ -201,46 +169,6 @@
}
}
-void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister src = VRegisterFrom(locations->InAt(0));
- VRegister dst = DRegisterFrom(locations->Out());
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ Addv(dst.S(), src.V4S());
- break;
- case HVecReduce::kMin:
- __ Sminv(dst.S(), src.V4S());
- break;
- case HVecReduce::kMax:
- __ Smaxv(dst.S(), src.V4S());
- break;
- }
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ Addp(dst.D(), src.V2D());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD min/max";
- UNREACHABLE();
- }
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -335,7 +263,6 @@
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
}
}
@@ -878,77 +805,6 @@
}
}
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- VRegister dst = VRegisterFrom(locations->Out());
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ Movi(dst.V16B(), 0);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7a11dff..527691d 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -73,11 +73,19 @@
}
}
-void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -104,14 +112,6 @@
}
}
-void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -621,14 +621,6 @@
}
}
-void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f..6bf28ab 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -88,11 +88,19 @@
}
}
-void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -125,14 +133,6 @@
}
}
-void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -818,14 +818,6 @@
}
}
-void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9d3a777..75bf7a7 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -91,11 +91,19 @@
}
}
-void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -128,14 +136,6 @@
}
}
-void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -822,14 +822,6 @@
}
}
-void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 37190f8..e7aec76 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -27,99 +27,9 @@
void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimLong:
- // Long needs extra temporary to load from the register pair.
- if (!is_zero) {
- locations->AddTemp(Location::RequiresFpuRegister());
- }
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(is_zero ? Location::RequiresFpuRegister()
- : Location::SameAsFirstInput());
-
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- __ xorps(dst, dst);
- return;
- }
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ punpcklbw(dst, dst);
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimLong: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
- __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(dst, tmp);
- __ punpcklqdq(dst, dst);
- break;
- }
- case Primitive::kPrimFloat:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimDouble:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(dst, dst, Immediate(0));
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimLong:
- // Long needs extra temporary to store into the register pair.
+ // Long needs extra temporary to load the register pair.
locations->AddTemp(Location::RequiresFpuRegister());
FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
@@ -127,8 +37,8 @@
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
@@ -141,34 +51,48 @@
}
}
-void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimInt:
- DCHECK_LE(4u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 16u);
- __ movd(locations->Out().AsRegister<Register>(), src);
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimLong: {
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
- __ pshufd(tmp, src, Immediate(1));
- __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
+ __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(reg, tmp);
+ __ punpcklqdq(reg, reg);
break;
}
case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -176,6 +100,22 @@
}
}
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (arena) LocationSummary(instruction);
@@ -197,73 +137,6 @@
}
}
-void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
- // Long reduction or min/max require a temporary.
- if (instruction->GetPackedType() == Primitive::kPrimLong ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
- instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(dst, src);
- __ phaddd(dst, dst);
- __ phaddd(dst, dst);
- break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
- }
- break;
- case Primitive::kPrimLong: {
- DCHECK_EQ(2u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ punpckhqdq(tmp, tmp);
- __ paddq(dst, tmp);
- break;
- case HVecReduce::kMin:
- case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -948,91 +821,6 @@
}
}
-void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimLong:
- // Long needs extra temporary to load from register pairs.
- if (!is_zero) {
- locations->AddTemp(Location::RequiresFpuRegister());
- }
- FALLTHROUGH_INTENDED;
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ xorps(dst, dst);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<Register>());
- break;
- case Primitive::kPrimLong: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ xorps(tmp, tmp);
- __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
- __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(dst, tmp);
- break;
- }
- case Primitive::kPrimFloat:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
- break;
- case Primitive::kPrimDouble:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -1080,7 +868,6 @@
case 8: scale = TIMES_8; break;
default: break;
}
- // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -1115,7 +902,7 @@
__ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
__ j(kNotZero, ¬_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 7051ba0..c7ee81c 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -27,8 +27,6 @@
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -36,89 +34,11 @@
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(is_zero ? Location::RequiresFpuRegister()
- : Location::SameAsFirstInput());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- __ xorps(dst, dst);
- return;
- }
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklbw(dst, dst);
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklwd(dst, dst);
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- __ pshufd(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- __ punpcklqdq(dst, dst);
- break;
- case Primitive::kPrimFloat:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(dst, dst, Immediate(0));
- break;
- case Primitive::kPrimDouble:
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(dst, dst, Immediate(0));
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
@@ -128,29 +48,44 @@
}
}
-void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegister<CpuRegister>(), src);
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(reg, reg, Immediate(0));
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(reg, reg);
break;
case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
case Primitive::kPrimDouble:
- DCHECK_LE(2u, instruction->GetVectorLength());
- DCHECK_LE(instruction->GetVectorLength(), 4u);
- DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -158,6 +93,22 @@
}
}
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (arena) LocationSummary(instruction);
@@ -179,73 +130,6 @@
}
}
-void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
- CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
- // Long reduction or min/max require a temporary.
- if (instruction->GetPackedType() == Primitive::kPrimLong ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
- instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(dst, src);
- __ phaddd(dst, dst);
- __ phaddd(dst, dst);
- break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
- }
- break;
- case Primitive::kPrimLong: {
- DCHECK_EQ(2u, instruction->GetVectorLength());
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
- case HVecReduce::kSum:
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ punpckhqdq(tmp, tmp);
- __ paddq(dst, tmp);
- break;
- case HVecReduce::kMin:
- case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
- }
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -930,81 +814,6 @@
}
}
-void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- HInstruction* input = instruction->InputAt(0);
- bool is_zero = IsZeroBitPattern(input);
-
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimLong:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
- : Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
- DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
-
- // Zero out all other elements first.
- __ xorps(dst, dst);
-
- // Shorthand for any type of zero.
- if (IsZeroBitPattern(instruction->InputAt(0))) {
- return;
- }
-
- // Set required elements.
- switch (instruction->GetPackedType()) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- case Primitive::kPrimInt:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
- break;
- case Primitive::kPrimLong:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- break;
- case Primitive::kPrimFloat:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
- break;
- case Primitive::kPrimDouble:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type";
- UNREACHABLE();
- }
-}
-
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -1052,7 +861,6 @@
case 8: scale = TIMES_8; break;
default: break;
}
- // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -1087,7 +895,7 @@
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
__ j(kNotZero, ¬_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
+ __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index f8f4eb2..027ba77 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -285,19 +285,6 @@
return false;
}
-// Translates operation to reduction kind.
-static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub()) {
- return HVecReduce::kSum;
- } else if (reduction->IsVecMin()) {
- return HVecReduce::kMin;
- } else if (reduction->IsVecMax()) {
- return HVecReduce::kMax;
- }
- LOG(FATAL) << "Unsupported SIMD reduction";
- UNREACHABLE();
-}
-
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -347,8 +334,7 @@
vector_peeling_candidate_(nullptr),
vector_runtime_test_a_(nullptr),
vector_runtime_test_b_(nullptr),
- vector_map_(nullptr),
- vector_permanent_map_(nullptr) {
+ vector_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -402,14 +388,11 @@
ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSafeMap<HInstruction*, HInstruction*> map(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
- ArenaSafeMap<HInstruction*, HInstruction*> perm(
- std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = ↦
- vector_permanent_map_ = &perm;
// Traverse.
TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -417,7 +400,6 @@
reductions_ = nullptr;
vector_refs_ = nullptr;
vector_map_ = nullptr;
- vector_permanent_map_ = nullptr;
}
}
@@ -621,6 +603,7 @@
// Vectorize loop, if possible and valid.
if (kEnableVectorization &&
TrySetSimpleLoopHeader(header, &main_phi) &&
+ reductions_->empty() && // TODO: possible with some effort
ShouldVectorize(node, body, trip_count) &&
TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
Vectorize(node, body, exit, trip_count);
@@ -819,13 +802,6 @@
/*unroll*/ 1);
}
- // Link reductions to their final uses.
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (i->first->IsPhi()) {
- i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
- }
- }
-
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
@@ -865,10 +841,21 @@
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
- vector_permanent_map_->clear(); // preserved over unrolling
for (uint32_t u = 0; u < unroll; u++) {
+ // Clear map, leaving loop invariants setup during unrolling.
+ if (u == 0) {
+ vector_map_->clear();
+ } else {
+ for (auto i = vector_map_->begin(); i != vector_map_->end(); ) {
+ if (i->second->IsVecReplicateScalar()) {
+ DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first));
+ ++i;
+ } else {
+ i = vector_map_->erase(i);
+ }
+ }
+ }
// Generate instruction map.
- vector_map_->clear();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
@@ -885,17 +872,9 @@
}
}
}
- // Generate the induction.
vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
Insert(vector_body_, vector_index_);
}
- // Finalize phi inputs for the reductions (if any).
- for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
- if (!i->first->IsPhi()) {
- DCHECK(i->second->IsPhi());
- GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
- }
- }
// Finalize phi inputs for the loop index.
phi->AddInput(lo);
phi->AddInput(vector_index_);
@@ -931,23 +910,6 @@
}
return false;
}
- // Accept a left-hand-side reduction for
- // (1) supported vector type,
- // (2) vectorizable right-hand-side value.
- auto redit = reductions_->find(instruction);
- if (redit != reductions_->end()) {
- Primitive::Type type = instruction->GetType();
- if (TrySetVectorType(type, &restrictions) &&
- VectorizeUse(node, instruction, generate_code, type, restrictions)) {
- if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
- }
- return true;
- }
- return false;
- }
// Branch back okay.
if (instruction->IsGoto()) {
return true;
@@ -1003,21 +965,6 @@
}
return true;
}
- } else if (instruction->IsPhi()) {
- // Accept particular phi operations.
- if (reductions_->find(instruction) != reductions_->end()) {
- // Deal with vector restrictions.
- if (HasVectorRestrictions(restrictions, kNoReduction)) {
- return false;
- }
- // Accept a reduction.
- if (generate_code) {
- GenerateVecReductionPhi(instruction->AsPhi());
- }
- return true;
- }
- // TODO: accept right-hand-side induction?
- return false;
} else if (instruction->IsTypeConversion()) {
// Accept particular type conversions.
HTypeConversion* conversion = instruction->AsTypeConversion();
@@ -1208,14 +1155,14 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
default:
break;
@@ -1227,11 +1174,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1240,10 +1187,8 @@
*restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
return false;
@@ -1255,12 +1200,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1269,10 +1213,10 @@
*restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
+ *restrictions |= kNoMinMax; // -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1284,23 +1228,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1312,23 +1256,23 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
+ *restrictions |= kNoMinMax; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1361,16 +1305,9 @@
return;
}
// In vector code, explicit scalar expansion is needed.
- HInstruction* vector = nullptr;
- auto it = vector_permanent_map_->find(org);
- if (it != vector_permanent_map_->end()) {
- vector = it->second; // reuse during unrolling
- } else {
- vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
- vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- }
- vector_map_->Put(org, vector);
+ HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_map_->Put(org, Insert(vector_preheader_, vector));
}
}
@@ -1425,78 +1362,6 @@
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
- HInstruction* vector = nullptr;
- if (vector_mode_ == kSequential) {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- } else {
- // Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
- if (it != vector_permanent_map_->end()) {
- vector = it->second;
- } else {
- HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
- vector_header_->AddPhi(new_phi);
- vector = new_phi;
- }
- }
- vector_map_->Put(phi, vector);
-}
-
-void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
- HInstruction* new_phi = vector_map_->Get(phi);
- HInstruction* new_init = reductions_->Get(phi);
- HInstruction* new_red = vector_map_->Get(reduction);
- // Link unrolled vector loop back to new phi.
- for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
- DCHECK(new_phi->IsVecOperation());
- }
- // Prepare the new initialization.
- if (vector_mode_ == kVector) {
- // Generate a [initial, 0, .., 0] vector.
- new_init = Insert(
- vector_preheader_,
- new (global_allocator_) HVecSetScalars(
- global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
- } else {
- new_init = ReduceAndExtractIfNeeded(new_init);
- }
- // Set the phi inputs.
- DCHECK(new_phi->IsPhi());
- new_phi->AsPhi()->AddInput(new_init);
- new_phi->AsPhi()->AddInput(new_red);
- // New feed value for next phi (safe mutation in iteration).
- reductions_->find(phi)->second = new_phi;
-}
-
-HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
- if (instruction->IsPhi()) {
- HInstruction* input = instruction->InputAt(1);
- if (input->IsVecOperation()) {
- Primitive::Type type = input->AsVecOperation()->GetPackedType();
- HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
- // Generate a vector reduction and scalar extract
- // x = REDUCE( [x_1, .., x_n] )
- // y = x_1
- // along the exit of the defining loop.
- HVecReduce::ReductionKind kind = GetReductionKind(input);
- HInstruction* reduce = new (global_allocator_) HVecReduce(
- global_allocator_, instruction, type, vector_length_, kind);
- exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
- instruction = new (global_allocator_) HVecExtractScalar(
- global_allocator_, reduce, type, vector_length_, 0);
- exit->InsertInstructionAfter(instruction, reduce);
- }
- }
- return instruction;
-}
-
#define GENERATE_VEC(x, y) \
if (vector_mode_ == kVector) { \
vector = (x); \
@@ -1677,9 +1542,10 @@
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
+ int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64Value(instruction->InputAt(1), 1)) {
+ IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
// Test for (a + b + c) >> 1 for optional constant c.
HInstruction* a = nullptr;
HInstruction* b = nullptr;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index ba9126c..49be8a3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -62,18 +62,17 @@
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1 << 0, // no multiplication
- kNoDiv = 1 << 1, // no division
- kNoShift = 1 << 2, // no shift
- kNoShr = 1 << 3, // no arithmetic shift right
- kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
- kNoSignedHAdd = 1 << 5, // no signed halving add
- kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
- kNoAbs = 1 << 7, // no absolute value
- kNoMinMax = 1 << 8, // no min/max
- kNoStringCharAt = 1 << 9, // no StringCharAt
- kNoReduction = 1 << 10, // no reduction
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 32, // no signed halving add
+ kNoUnroundedHAdd = 64, // no unrounded halving add
+ kNoAbs = 128, // no absolute value
+ kNoMinMax = 256, // no min/max
+ kNoStringCharAt = 512, // no StringCharAt
};
/*
@@ -156,9 +155,6 @@
HInstruction* opb,
HInstruction* offset,
Primitive::Type type);
- void GenerateVecReductionPhi(HPhi* phi);
- void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
- HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
void GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -257,10 +253,6 @@
// Contents reside in phase-local heap memory.
ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
- // Permanent mapping used during vectorization synthesis.
- // Contents reside in phase-local heap memory.
- ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
-
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 869fdd4..f60d532 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1374,8 +1374,7 @@
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
M(VecReplicateScalar, VecUnaryOperation) \
- M(VecExtractScalar, VecUnaryOperation) \
- M(VecReduce, VecUnaryOperation) \
+ M(VecSumReduce, VecUnaryOperation) \
M(VecCnv, VecUnaryOperation) \
M(VecNeg, VecUnaryOperation) \
M(VecAbs, VecUnaryOperation) \
@@ -7031,17 +7030,6 @@
return false;
}
-// Returns true iff instruction is the given integral constant.
-inline bool IsInt64Value(HInstruction* instruction, int64_t value) {
- int64_t val = 0;
- return IsInt64AndGet(instruction, &val) && val == value;
-}
-
-// Returns true iff instruction is a zero bit pattern.
-inline bool IsZeroBitPattern(HInstruction* instruction) {
- return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
-}
-
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 886d75e..6261171 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -63,10 +63,6 @@
// GetVectorLength() x GetPackedType() operations simultaneously.
class HVecOperation : public HVariableInputSizeInstruction {
public:
- // A SIMD operation looks like a FPU location.
- // TODO: we could introduce SIMD types in HIR.
- static constexpr Primitive::Type kSIMDType = Primitive::kPrimDouble;
-
HVecOperation(ArenaAllocator* arena,
Primitive::Type packed_type,
SideEffects side_effects,
@@ -93,9 +89,10 @@
return vector_length_ * Primitive::ComponentSize(GetPackedType());
}
- // Returns the type of the vector operation.
+ // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
Primitive::Type GetType() const OVERRIDE {
- return kSIMDType;
+ return Primitive::kPrimDouble;
}
// Returns the true component type packed in a vector.
@@ -223,11 +220,8 @@
DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
};
-// Packed type consistency checker ("same vector length" integral types may mix freely).
+// Packed type consistency checker (same vector length integral types may mix freely).
inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) {
- if (input->IsPhi()) {
- return input->GetType() == HVecOperation::kSIMDType; // carries SIMD
- }
DCHECK(input->IsVecOperation());
Primitive::Type input_type = input->AsVecOperation()->GetPackedType();
switch (input_type) {
@@ -271,77 +265,27 @@
DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
};
-// Extracts a particular scalar from the given vector,
-// viz. extract[ x1, .. , xn ] = x_i.
-//
-// TODO: for now only i == 1 case supported.
-class HVecExtractScalar FINAL : public HVecUnaryOperation {
- public:
- HVecExtractScalar(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- size_t index,
- uint32_t dex_pc = kNoDexPc)
+// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
+// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
+class HVecSumReduce FINAL : public HVecUnaryOperation {
+ HVecSumReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
: HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
- DCHECK_LT(index, vector_length);
- DCHECK_EQ(index, 0u);
}
- // Yields a single component in the vector.
- Primitive::Type GetType() const OVERRIDE {
- return GetPackedType();
- }
-
- // An extract needs to stay in place, since SIMD registers are not
- // kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
-
- DECLARE_INSTRUCTION(VecExtractScalar);
-
- private:
- DISALLOW_COPY_AND_ASSIGN(HVecExtractScalar);
-};
-
-// Reduces the given vector into the first element as sum/min/max,
-// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi
-// and the "-" denotes "don't care" (implementation dependent).
-class HVecReduce FINAL : public HVecUnaryOperation {
- public:
- enum ReductionKind {
- kSum = 1,
- kMin = 2,
- kMax = 3
- };
-
- HVecReduce(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- ReductionKind kind,
- uint32_t dex_pc = kNoDexPc)
- : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc),
- kind_(kind) {
- DCHECK(HasConsistentPackedTypes(input, packed_type));
- }
-
- ReductionKind GetKind() const { return kind_; }
+ // TODO: probably integral promotion
+ Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
- DCHECK(other->IsVecReduce());
- const HVecReduce* o = other->AsVecReduce();
- return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind();
- }
-
- DECLARE_INSTRUCTION(VecReduce);
+ DECLARE_INSTRUCTION(VecSumReduce);
private:
- const ReductionKind kind_;
-
- DISALLOW_COPY_AND_ASSIGN(HVecReduce);
+ DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
};
// Converts every component in the vector,
@@ -810,23 +754,20 @@
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
-// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
class HVecSetScalars FINAL : public HVecOperation {
- public:
HVecSetScalars(ArenaAllocator* arena,
HInstruction** scalars, // array
Primitive::Type packed_type,
size_t vector_length,
- size_t number_of_scalars,
uint32_t dex_pc = kNoDexPc)
: HVecOperation(arena,
packed_type,
SideEffects::None(),
- number_of_scalars,
+ /* number_of_inputs */ vector_length,
vector_length,
dex_pc) {
- for (size_t i = 0; i < number_of_scalars; i++) {
+ for (size_t i = 0; i < vector_length; i++) {
DCHECK(!scalars[i]->IsVecOperation());
SetRawInputAt(0, scalars[i]);
}
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index 5a56a2c..0238ea4 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -332,32 +332,4 @@
EXPECT_FALSE(v1->Equals(v3)); // different vector lengths
}
-TEST_F(NodesVectorTest, VectorKindMattersOnReduce) {
- HVecOperation* v0 = new (&allocator_)
- HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4);
-
- HVecReduce* v1 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kSum);
- HVecReduce* v2 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMin);
- HVecReduce* v3 = new (&allocator_) HVecReduce(
- &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMax);
-
- EXPECT_FALSE(v0->CanBeMoved());
- EXPECT_TRUE(v1->CanBeMoved());
- EXPECT_TRUE(v2->CanBeMoved());
- EXPECT_TRUE(v3->CanBeMoved());
-
- EXPECT_EQ(HVecReduce::kSum, v1->GetKind());
- EXPECT_EQ(HVecReduce::kMin, v2->GetKind());
- EXPECT_EQ(HVecReduce::kMax, v3->GetKind());
-
- EXPECT_TRUE(v1->Equals(v1));
- EXPECT_TRUE(v2->Equals(v2));
- EXPECT_TRUE(v3->Equals(v3));
-
- EXPECT_FALSE(v1->Equals(v2)); // different kinds
- EXPECT_FALSE(v1->Equals(v3));
-}
-
} // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 1d9d28a..510619f 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -215,12 +215,12 @@
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) {
- HandleSimpleArithmeticSIMD(instr);
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
-void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
- HandleSimpleArithmeticSIMD(instr);
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
@@ -283,8 +283,8 @@
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
- last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
}
void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
@@ -307,10 +307,6 @@
HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
- HandleSimpleArithmeticSIMD(instr);
-}
-
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index e1a80ec..63d5b7d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -83,8 +83,8 @@
M(SuspendCheck , unused) \
M(TypeConversion , unused) \
M(VecReplicateScalar , unused) \
- M(VecExtractScalar , unused) \
- M(VecReduce , unused) \
+ M(VecSetScalars , unused) \
+ M(VecSumReduce , unused) \
M(VecCnv , unused) \
M(VecNeg , unused) \
M(VecAbs , unused) \
@@ -103,7 +103,6 @@
M(VecShl , unused) \
M(VecShr , unused) \
M(VecUShr , unused) \
- M(VecSetScalars , unused) \
M(VecMultiplyAccumulate, unused) \
M(VecLoad , unused) \
M(VecStore , unused)
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 8208a9e..741b5fa 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -51,26 +51,6 @@
return sum;
}
- /// CHECK-START: int Main.reductionInt(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: int Main.reductionInt(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -79,28 +59,6 @@
return sum;
}
- /// CHECK-START: long Main.reductionLong(long[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<Long0>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: long Main.reductionLong(long[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Long0>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -109,90 +67,6 @@
return sum;
}
- private static byte reductionByteM1(byte[] x) {
- byte sum = -1;
- for (int i = 0; i < x.length; i++) {
- sum += x[i];
- }
- return sum;
- }
-
- private static short reductionShortM1(short[] x) {
- short sum = -1;
- for (int i = 0; i < x.length; i++) {
- sum += x[i];
- }
- return sum;
- }
-
- private static char reductionCharM1(char[] x) {
- char sum = 0xffff;
- for (int i = 0; i < x.length; i++) {
- sum += x[i];
- }
- return sum;
- }
-
- /// CHECK-START: int Main.reductionIntM1(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<ConsM1:i\d+>> IntConstant -1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: int Main.reductionIntM1(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<ConsM1:i\d+>> IntConstant -1 loop:none
- /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM1>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
- private static int reductionIntM1(int[] x) {
- int sum = -1;
- for (int i = 0; i < x.length; i++) {
- sum += x[i];
- }
- return sum;
- }
-
- /// CHECK-START: long Main.reductionLongM1(long[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<LongM1:j\d+>> LongConstant -1 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<LongM1>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: long Main.reductionLongM1(long[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<LongM1:j\d+>> LongConstant -1 loop:none
- /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<LongM1>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
- private static long reductionLongM1(long[] x) {
- long sum = -1L;
- for (int i = 0; i < x.length; i++) {
- sum += x[i];
- }
- return sum;
- }
-
private static byte reductionMinusByte(byte[] x) {
byte sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -217,26 +91,6 @@
return sum;
}
- /// CHECK-START: int Main.reductionMinusInt(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Sub [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: int Main.reductionMinusInt(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecSub [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinusInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -245,28 +99,6 @@
return sum;
}
- /// CHECK-START: long Main.reductionMinusLong(long[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<Long0>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Sub [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: long Main.reductionMinusLong(long[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Long0>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecSub [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionMinusLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -299,28 +131,6 @@
return min;
}
- /// CHECK-START: int Main.reductionMinInt(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<ConsM:i\d+>> IntConstant 2147483647 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: int Main.reductionMinInt(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<ConsM:i\d+>> IntConstant 2147483647 loop:none
- /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecMin [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinInt(int[] x) {
int min = Integer.MAX_VALUE;
for (int i = 0; i < x.length; i++) {
@@ -361,28 +171,6 @@
return max;
}
- /// CHECK-START: int Main.reductionMaxInt(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
- /// CHECK-DAG: <<ConsM:i\d+>> IntConstant -2147483648 loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<ConsM>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: InvokeStaticOrDirect [<<Phi2>>,<<Get>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: int Main.reductionMaxInt(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<ConsM:i\d+>> IntConstant -2147483648 loop:none
- /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
- /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsM>>] loop:none
- /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: VecMax [<<Phi2>>,<<Load>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMaxInt(int[] x) {
int max = Integer.MIN_VALUE;
for (int i = 0; i < x.length; i++) {
@@ -465,11 +253,6 @@
expectEquals(38070, reductionChar(xc));
expectEquals(365750, reductionInt(xi));
expectEquals(365750L, reductionLong(xl));
- expectEquals(-75, reductionByteM1(xb));
- expectEquals(-27467, reductionShortM1(xs));
- expectEquals(38069, reductionCharM1(xc));
- expectEquals(365749, reductionIntM1(xi));
- expectEquals(365749L, reductionLongM1(xl));
expectEquals(74, reductionMinusByte(xb));
expectEquals(27466, reductionMinusShort(xs));
expectEquals(27466, reductionMinusChar(xc));
diff --git a/test/665-checker-simd-zero/expected.txt b/test/665-checker-simd-zero/expected.txt
deleted file mode 100644
index b0aad4d..0000000
--- a/test/665-checker-simd-zero/expected.txt
+++ /dev/null
@@ -1 +0,0 @@
-passed
diff --git a/test/665-checker-simd-zero/info.txt b/test/665-checker-simd-zero/info.txt
deleted file mode 100644
index 55eca88..0000000
--- a/test/665-checker-simd-zero/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Functional tests on zero-out SIMD vectorization.
diff --git a/test/665-checker-simd-zero/src/Main.java b/test/665-checker-simd-zero/src/Main.java
deleted file mode 100644
index 66eea64..0000000
--- a/test/665-checker-simd-zero/src/Main.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Tests for zero vectorization.
- */
-public class Main {
-
- /// CHECK-START: void Main.zeroz(boolean[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zeroz(boolean[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zeroz(boolean[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = false;
- }
- }
-
- /// CHECK-START: void Main.zerob(byte[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zerob(byte[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zerob(byte[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zeroc(char[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zeroc(char[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zeroc(char[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zeros(short[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zeros(short[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zeros(short[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zeroi(int[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zeroi(int[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zeroi(int[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zerol(long[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zerol(long[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zerol(long[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zerof(float[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zerof(float[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zerof(float[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- /// CHECK-START: void Main.zerod(double[]) loop_optimization (before)
- /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>> outer_loop:none
- //
- /// CHECK-START-ARM64: void Main.zerod(double[]) loop_optimization (after)
- /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none
- /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
- /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
- /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
- private static void zerod(double[] x) {
- for (int i = 0; i < x.length; i++) {
- x[i] = 0;
- }
- }
-
- public static void main(String[] args) {
- int total = 1111;
-
- boolean[] xz = new boolean[total];
- byte[] xb = new byte[total];
- char[] xc = new char[total];
- short[] xs = new short[total];
- int[] xi = new int[total];
- long[] xl = new long[total];
- float[] xf = new float[total];
- double[] xd = new double[total];
-
- for (int i = 0; i < total; i++) {
- xz[i] = true;
- xb[i] = 1;
- xc[i] = 1;
- xs[i] = 1;
- xi[i] = 1;
- xl[i] = 1;
- xf[i] = 1;
- xd[i] = 1;
- }
-
- for (int i = 0; i < total; i++) {
- expectEquals(true, xz[i]);
- expectEquals(1, xb[i]);
- expectEquals(1, xc[i]);
- expectEquals(1, xs[i]);
- expectEquals(1, xi[i]);
- expectEquals(1, xl[i]);
- expectEquals(1, xf[i]);
- expectEquals(1, xd[i]);
- }
-
- zeroz(xz);
- zerob(xb);
- zeroc(xc);
- zeros(xs);
- zeroi(xi);
- zerol(xl);
- zerof(xf);
- zerod(xd);
-
- for (int i = 0; i < total; i++) {
- expectEquals(false, xz[i]);
- expectEquals(0, xb[i]);
- expectEquals(0, xc[i]);
- expectEquals(0, xs[i]);
- expectEquals(0, xi[i]);
- expectEquals(0, xl[i]);
- expectEquals(0, xf[i]);
- expectEquals(0, xd[i]);
- }
-
- System.out.println("passed");
- }
-
- private static void expectEquals(boolean expected, boolean result) {
- if (expected != result) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- private static void expectEquals(int expected, int result) {
- if (expected != result) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- private static void expectEquals(long expected, long result) {
- if (expected != result) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- private static void expectEquals(float expected, float result) {
- if (expected != result) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-
- private static void expectEquals(double expected, double result) {
- if (expected != result) {
- throw new Error("Expected: " + expected + ", found: " + result);
- }
- }
-}