Improved and simplified loop optimizations.
Rationale:
Empty preheader simplification has been simplified
to a much more general empty block removal optimization
step. Incremental updating of induction variable
analysis enables repeated elimination or simplification
of induction cycles.
This enabled an extra layer of optimization for
e.g. Benchpress Loop (17.5us. -> 0.24us. -> 0.08us).
So the original 73x speedup is now multiplied
by another 3x, for a total of about 218x.
Test: 618-checker-induction et al.
Change-Id: I394699981481cdd5357e0531bce88cd48bd32879
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index aa3f268..adfe09b 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -343,14 +343,7 @@
for (i.Advance(); !i.Done(); i.Advance()) {
HInstruction* inst = i.Current();
DCHECK(!inst->IsControlFlow());
- if (!inst->HasSideEffects()
- && !inst->CanThrow()
- && !inst->IsSuspendCheck()
- && !inst->IsNativeDebugInfo()
- // If we added an explicit barrier then we should keep it.
- && !inst->IsMemoryBarrier()
- && !inst->IsParameterValue()
- && !inst->HasUses()) {
+ if (inst->IsDeadAndRemovable()) {
block->RemoveInstruction(inst);
MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 63850b3..df31e81 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -131,6 +131,14 @@
*/
void Replace(HInstruction* instruction, HInstruction* fetch, HInstruction* replacement);
+ /**
+ * Incrementally updates induction information for just the given loop.
+ */
+ void ReVisit(HLoopInformation* loop) {
+ induction_analysis_->induction_.erase(loop);
+ induction_analysis_->VisitLoop(loop);
+ }
+
private:
/*
* Enum used in IsConstant() request.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 93c6c20..33fa87d 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -69,33 +69,6 @@
i->GetNext() != nullptr && i->GetNext()->IsGoto();
}
-static HBasicBlock* TryRemovePreHeader(HBasicBlock* preheader, HBasicBlock* entry_block) {
- if (preheader->GetPredecessors().size() == 1) {
- HBasicBlock* entry = preheader->GetSinglePredecessor();
- HInstruction* anchor = entry->GetLastInstruction();
- // If the pre-header has a single predecessor we can remove it too if
- // either the pre-header just contains a goto, or if the predecessor
- // is not the entry block so we can push instructions backward
- // (moving computation into the entry block is too dangerous!).
- if (preheader->GetFirstInstruction() == nullptr ||
- preheader->GetFirstInstruction()->IsGoto() ||
- (entry != entry_block && anchor->IsGoto())) {
- // Push non-goto statements backward to empty the pre-header.
- for (HInstructionIterator it(preheader->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (!instruction->IsGoto()) {
- if (!instruction->CanBeMoved()) {
- return nullptr; // pushing failed to move all
- }
- it.Current()->MoveBefore(anchor);
- }
- }
- return entry;
- }
- }
- return nullptr;
-}
-
static void RemoveFromCycle(HInstruction* instruction) {
// A bit more elaborate than the usual instruction removal,
// since there may be a cycle in the use structure.
@@ -115,7 +88,8 @@
loop_allocator_(nullptr),
top_loop_(nullptr),
last_loop_(nullptr),
- iset_(nullptr) {
+ iset_(nullptr),
+ induction_simplication_count_(0) {
}
void HLoopOptimization::Run() {
@@ -211,11 +185,17 @@
void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
for ( ; node != nullptr; node = node->next) {
+ int current_induction_simplification_count = induction_simplication_count_;
if (node->inner != nullptr) {
TraverseLoopsInnerToOuter(node->inner);
}
- // Visit loop after its inner loops have been visited.
+ // Visit loop after its inner loops have been visited. If the induction of any inner
+ // loop has been simplified, recompute the induction information of this loop first.
+ if (current_induction_simplification_count != induction_simplication_count_) {
+ induction_range_.ReVisit(node->loop_info);
+ }
SimplifyInduction(node);
+ SimplifyBlocks(node);
RemoveIfEmptyLoop(node);
}
}
@@ -233,11 +213,41 @@
iset_->clear();
int32_t use_count = 0;
if (IsPhiInduction(phi, iset_) &&
- IsOnlyUsedAfterLoop(*node->loop_info, phi, &use_count) &&
+ IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
TryReplaceWithLastValue(phi, use_count, preheader)) {
for (HInstruction* i : *iset_) {
RemoveFromCycle(i);
}
+ induction_simplication_count_++;
+ }
+ }
+}
+
+void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
+ for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+ HBasicBlock* block = it.Current();
+ // Remove instructions that are dead, usually resulting from eliminating induction cycles.
+ for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
+ HInstruction* instruction = i.Current();
+ if (instruction->IsDeadAndRemovable()) {
+ block->RemoveInstruction(instruction);
+ }
+ }
+ // Remove trivial control flow blocks from the loop body, again usually resulting
+ // from eliminating induction cycles.
+ if (block->GetPredecessors().size() == 1 &&
+ block->GetSuccessors().size() == 1 &&
+ block->GetFirstInstruction()->IsGoto()) {
+ HBasicBlock* pred = block->GetSinglePredecessor();
+ HBasicBlock* succ = block->GetSingleSuccessor();
+ if (succ->GetPredecessors().size() == 1) {
+ pred->ReplaceSuccessor(block, succ);
+ block->ClearDominanceInformation();
+ block->SetDominator(pred); // needed by next disconnect.
+ block->DisconnectAndDelete();
+ pred->AddDominatedBlock(succ);
+ succ->SetDominator(pred);
+ }
}
}
}
@@ -272,41 +282,31 @@
int32_t use_count = 0;
if (IsEmptyHeader(header, iset_) &&
IsEmptyBody(body, iset_) &&
- IsOnlyUsedAfterLoop(*node->loop_info, header->GetFirstPhi(), &use_count) &&
+ IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
- HBasicBlock* entry = TryRemovePreHeader(preheader, graph_->GetEntryBlock());
body->DisconnectAndDelete();
exit->RemovePredecessor(header);
header->RemoveSuccessor(exit);
header->ClearDominanceInformation();
header->SetDominator(preheader); // needed by next disconnect.
header->DisconnectAndDelete();
- // If allowed, remove preheader too, which may expose next outer empty loop
- // Otherwise, link preheader directly to exit to restore the flow graph.
- if (entry != nullptr) {
- entry->ReplaceSuccessor(preheader, exit);
- entry->AddDominatedBlock(exit);
- exit->SetDominator(entry);
- preheader->DisconnectAndDelete();
- } else {
- preheader->AddSuccessor(exit);
- preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
- preheader->AddDominatedBlock(exit);
- exit->SetDominator(preheader);
- }
+ preheader->AddSuccessor(exit);
+ preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
+ preheader->AddDominatedBlock(exit);
+ exit->SetDominator(preheader);
// Update hierarchy.
RemoveLoop(node);
}
}
-bool HLoopOptimization::IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
+bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
/*out*/ int32_t* use_count) {
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
HInstruction* user = use.GetUser();
if (iset_->find(user) == iset_->end()) { // not excluded?
HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
- if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) {
+ if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) {
return false;
}
++*use_count;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index b2bf1c8..9c4b462 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -46,7 +46,7 @@
inner(nullptr),
previous(nullptr),
next(nullptr) {}
- const HLoopInformation* const loop_info;
+ HLoopInformation* const loop_info;
LoopNode* outer;
LoopNode* inner;
LoopNode* previous;
@@ -61,9 +61,10 @@
void TraverseLoopsInnerToOuter(LoopNode* node);
void SimplifyInduction(LoopNode* node);
+ void SimplifyBlocks(LoopNode* node);
void RemoveIfEmptyLoop(LoopNode* node);
- bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
+ bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
/*out*/ int32_t* use_count);
void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
@@ -87,6 +88,11 @@
// Contents reside in phase-local heap memory.
ArenaSet<HInstruction*>* iset_;
+ // Counter that tracks how many induction cycles have been simplified. Useful
+ // to trigger incremental updates of induction variable analysis of outer loops
+ // when the induction of inner loops has changed.
+ int32_t induction_simplication_count_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 828c0e5..348f99d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1931,6 +1931,19 @@
return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
}
+ bool IsDeadAndRemovable() const {
+ return
+ !HasSideEffects() &&
+ !CanThrow() &&
+ !IsSuspendCheck() &&
+ !IsControlFlow() &&
+ !IsNativeDebugInfo() &&
+ !IsParameterValue() &&
+ !HasUses() &&
+ // If we added an explicit barrier then we should keep it.
+ !IsMemoryBarrier();
+ }
+
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
// Aborts if this instruction and `other_instruction` are both phis.
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index 0ea85da..5c789cd 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -134,17 +134,20 @@
/// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none
/// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none
/// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-NOT: BoundsCheck
//
/// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (after)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-NOT: Phi loop:<<Loop>> outer_loop:none
/// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none
- /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-NOT: ArrayGet loop:<<Loop>> outer_loop:none
static void deadCycleWithException(int k) {
int dead = 0;
for (int i = 0; i < a.length; i++) {
a[i] = 4;
- // Increment value of dead cycle may throw exception.
+ // Increment value of dead cycle may throw exception. Dynamic
+ // BCE takes care of the bounds check though, which enables
+ // removing the ArrayGet after removing the dead cycle.
dead += a[k];
}
}
@@ -180,7 +183,17 @@
return closed; // only needs last value
}
- // TODO: move closed form even further out?
+ /// CHECK-START: int Main.closedFormNested() loop_optimization (before)
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi loop:<<Loop2>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: Return [<<Phi1>>] loop:none
+ //
+ /// CHECK-START: int Main.closedFormNested() loop_optimization (after)
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:loop{{B\d+}}
+ /// CHECK-DAG: Return loop:none
static int closedFormNested() {
int closed = 0;
for (int i = 0; i < 10; i++) {
@@ -191,6 +204,27 @@
return closed; // only needs last-value
}
+ /// CHECK-START: int Main.closedFormNestedAlt() loop_optimization (before)
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi loop:<<Loop2>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: Return [<<Phi1>>] loop:none
+ //
+ /// CHECK-START: int Main.closedFormNestedAlt() loop_optimization (after)
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:loop{{B\d+}}
+ /// CHECK-DAG: Return loop:none
+ static int closedFormNestedAlt() {
+ int closed = 12345;
+ for (int i = 0; i < 17; i++) {
+ for (int j = 0; j < 23; j++) {
+ closed += 7;
+ }
+ }
+ return closed; // only needs last-value
+ }
+
// TODO: taken test around closed form?
static int closedFormInductionUpN(int n) {
int closed = 12345;
@@ -220,9 +254,20 @@
}
// TODO: move closed form even further out?
- static int closedFormNestedNN(int n) {
- int closed = 0;
+ static int closedFormNestedNAlt(int n) {
+ int closed = 12345;
for (int i = 0; i < n; i++) {
+ for (int j = 0; j < 23; j++) {
+ closed += 7;
+ }
+ }
+ return closed; // only needs last-value
+ }
+
+ // TODO: move closed form even further out?
+ static int closedFormNestedMN(int m, int n) {
+ int closed = 0;
+ for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
closed++;
}
@@ -230,6 +275,17 @@
return closed; // only needs last-value
}
+ // TODO: move closed form even further out?
+ static int closedFormNestedMNAlt(int m, int n) {
+ int closed = 12345;
+ for (int i = 0; i < m; i++) {
+ for (int j = 0; j < n; j++) {
+ closed += 7;
+ }
+ }
+ return closed; // only needs last-value
+ }
+
/// CHECK-START: int Main.mainIndexReturned() loop_optimization (before)
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:{{B\d+}} outer_loop:none
/// CHECK-DAG: Return [<<Phi>>] loop:none
@@ -444,12 +500,15 @@
expectEquals(12395, closedFormInductionUp());
expectEquals(12295, closedFormInductionInAndDown(12345));
expectEquals(10 * 10, closedFormNested());
+ expectEquals(12345 + 17 * 23 * 7, closedFormNestedAlt());
for (int n = -4; n < 10; n++) {
int tc = (n <= 0) ? 0 : n;
expectEquals(12345 + tc * 5, closedFormInductionUpN(n));
expectEquals(12345 - tc * 5, closedFormInductionInAndDownN(12345, n));
expectEquals(tc * 10, closedFormNestedN(n));
- expectEquals(tc * tc, closedFormNestedNN(n));
+ expectEquals(12345 + tc * 23 * 7, closedFormNestedNAlt(n));
+ expectEquals(tc * (tc + 1), closedFormNestedMN(n, n + 1));
+ expectEquals(12345 + tc * (tc + 1) * 7, closedFormNestedMNAlt(n, n + 1));
}
expectEquals(10, mainIndexReturned());