[optimizing] Use callee-save registers for x86
Add ESI, EDI, EBP to available registers for non-baseline mode. Ensure
that they aren't used when byte addressible registers are needed.
Change-Id: Ie7130d4084c2ae9cfcd1e47c26eb3e5dcac1ebd6
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ba5f7d8..ed3f949 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -216,6 +216,29 @@
}
}
+void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
+ // The DCHECKS below check that a register is not specified twice in
+ // the summary. The out location can overlap with an input, so we need
+ // to special case it.
+ if (location.IsRegister()) {
+ DCHECK(is_out || !blocked_core_registers_[location.reg()]);
+ blocked_core_registers_[location.reg()] = true;
+ } else if (location.IsFpuRegister()) {
+ DCHECK(is_out || !blocked_fpu_registers_[location.reg()]);
+ blocked_fpu_registers_[location.reg()] = true;
+ } else if (location.IsFpuRegisterPair()) {
+ DCHECK(is_out || !blocked_fpu_registers_[location.AsFpuRegisterPairLow<int>()]);
+ blocked_fpu_registers_[location.AsFpuRegisterPairLow<int>()] = true;
+ DCHECK(is_out || !blocked_fpu_registers_[location.AsFpuRegisterPairHigh<int>()]);
+ blocked_fpu_registers_[location.AsFpuRegisterPairHigh<int>()] = true;
+ } else if (location.IsRegisterPair()) {
+ DCHECK(is_out || !blocked_core_registers_[location.AsRegisterPairLow<int>()]);
+ blocked_core_registers_[location.AsRegisterPairLow<int>()] = true;
+ DCHECK(is_out || !blocked_core_registers_[location.AsRegisterPairHigh<int>()]);
+ blocked_core_registers_[location.AsRegisterPairHigh<int>()] = true;
+ }
+}
+
void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
LocationSummary* locations = instruction->GetLocations();
if (locations == nullptr) return;
@@ -234,46 +257,19 @@
// Mark all fixed input, temp and output registers as used.
for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
- Location loc = locations->InAt(i);
- // The DCHECKS below check that a register is not specified twice in
- // the summary.
- if (loc.IsRegister()) {
- DCHECK(!blocked_core_registers_[loc.reg()]);
- blocked_core_registers_[loc.reg()] = true;
- } else if (loc.IsFpuRegister()) {
- DCHECK(!blocked_fpu_registers_[loc.reg()]);
- blocked_fpu_registers_[loc.reg()] = true;
- } else if (loc.IsFpuRegisterPair()) {
- DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]);
- blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true;
- DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]);
- blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true;
- } else if (loc.IsRegisterPair()) {
- DCHECK(!blocked_core_registers_[loc.AsRegisterPairLow<int>()]);
- blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true;
- DCHECK(!blocked_core_registers_[loc.AsRegisterPairHigh<int>()]);
- blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true;
- }
+ BlockIfInRegister(locations->InAt(i));
}
for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
Location loc = locations->GetTemp(i);
- // The DCHECKS below check that a register is not specified twice in
- // the summary.
- if (loc.IsRegister()) {
- DCHECK(!blocked_core_registers_[loc.reg()]);
- blocked_core_registers_[loc.reg()] = true;
- } else if (loc.IsFpuRegister()) {
- DCHECK(!blocked_fpu_registers_[loc.reg()]);
- blocked_fpu_registers_[loc.reg()] = true;
- } else {
- DCHECK(loc.GetPolicy() == Location::kRequiresRegister
- || loc.GetPolicy() == Location::kRequiresFpuRegister);
- }
+ BlockIfInRegister(loc);
+ }
+ Location result_location = locations->Out();
+ if (locations->OutputCanOverlapWithInputs()) {
+ BlockIfInRegister(result_location, /* is_out */ true);
}
- static constexpr bool kBaseline = true;
- SetupBlockedRegisters(kBaseline);
+ SetupBlockedRegisters(/* is_baseline */ true);
// Allocate all unallocated input locations.
for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
@@ -318,7 +314,6 @@
locations->SetTempAt(i, loc);
}
}
- Location result_location = locations->Out();
if (result_location.IsUnallocated()) {
switch (result_location.GetPolicy()) {
case Location::kAny:
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f46a36d..5146afa 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -378,6 +378,7 @@
void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void CompileInternal(CodeAllocator* allocator, bool is_baseline);
+ void BlockIfInRegister(Location location, bool is_out = false) const;
HGraph* const graph_;
const CompilerOptions& compiler_options_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c8f62c..87efa6c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -37,14 +37,13 @@
static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
+static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
static constexpr int kC2ConditionMask = 0x400;
-// Marker for places that can be updated once we don't follow the quick ABI.
-static constexpr bool kFollowsQuickABI = true;
static constexpr int kFakeReturnRegister = Register(8);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
@@ -371,8 +370,15 @@
}
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
- : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters,
- kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options),
+ : CodeGenerator(graph,
+ kNumberOfCpuRegisters,
+ kNumberOfXmmRegisters,
+ kNumberOfRegisterPairs,
+ ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+ arraysize(kCoreCalleeSaves))
+ | (1 << kFakeReturnRegister),
+ 0,
+ compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -427,18 +433,18 @@
return Location();
}
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[ECX_EDX] = true;
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
- // TODO: We currently don't use Quick's callee saved registers.
- DCHECK(kFollowsQuickABI);
- blocked_core_registers_[EBP] = true;
- blocked_core_registers_[ESI] = true;
- blocked_core_registers_[EDI] = true;
+ if (is_baseline) {
+ blocked_core_registers_[EBP] = true;
+ blocked_core_registers_[ESI] = true;
+ blocked_core_registers_[EDI] = true;
+ }
UpdateBlockedPairRegisters();
}
@@ -470,15 +476,33 @@
RecordPcInfo(nullptr, 0);
}
- if (!HasEmptyFrame()) {
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
- __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ if (HasEmptyFrame()) {
+ return;
}
+
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ pushl(reg);
+ }
+ }
+
+ __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
void CodeGeneratorX86::GenerateFrameExit() {
- if (!HasEmptyFrame()) {
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ if (HasEmptyFrame()) {
+ return;
+ }
+
+ __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+
+ for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ popl(reg);
+ }
}
}
@@ -907,7 +931,8 @@
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
if (comp->NeedsMaterialization()) {
- locations->SetOut(Location::RequiresRegister());
+ // We need a byte register.
+ locations->SetOut(Location::RegisterLocation(ECX));
}
}
@@ -1345,8 +1370,10 @@
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-byte' instruction.
- locations->SetInAt(0, Location::Any());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
+ // Make the output overlap to please the register allocator. This greatly simplifies
+ // the validation of the linear scan implementation
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
default:
@@ -3161,15 +3188,16 @@
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
+ // This location builder might end up asking to up to four registers, which is
+ // not currently possible for baseline. The situation in which we need four
+ // registers cannot be met by baseline though, because it has not run any
+ // optimization.
+
Primitive::Type value_type = instruction->GetComponentType();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- DCHECK(kFollowsQuickABI);
- bool not_enough_registers = needs_write_barrier
- && !instruction->GetValue()->IsConstant()
- && !instruction->GetIndex()->IsConstant();
- bool needs_runtime_call = instruction->NeedsTypeCheck() || not_enough_registers;
+ bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
diff --git a/test/456-baseline-array-set/expected.txt b/test/456-baseline-array-set/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/456-baseline-array-set/expected.txt
diff --git a/test/456-baseline-array-set/info.txt b/test/456-baseline-array-set/info.txt
new file mode 100644
index 0000000..cf4137a
--- /dev/null
+++ b/test/456-baseline-array-set/info.txt
@@ -0,0 +1,3 @@
+Test for optimizing on x86, where we could run out
+of available registers when using the baseline register
+allocator.
diff --git a/test/456-baseline-array-set/src/Main.java b/test/456-baseline-array-set/src/Main.java
new file mode 100644
index 0000000..5475b41
--- /dev/null
+++ b/test/456-baseline-array-set/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String[] args) {
+ doArrayAccess(new Integer(1), 0);
+ }
+
+ public static void doArrayAccess(Integer value, int index) {
+ try {
+ Integer[] array = new Integer[2];
+ // If we were to do optimization on the baseline register
+ // allocator, generating code for the array set would fail on x86.
+ array[index] = array[index + 1];
+ array[index] = value;
+ } catch (ArrayStoreException e) {
+ throw e;
+ }
+ }
+}