Fix stack overflow for mutual recursion.
There was an error where we would have a pc that was in the method
which generated the stack overflow. This didn't work however
because the stack overflow check was before we stored the method in
the stack. The result was that the stack overflow handler had a PC
which wasnt necessarily in the method at the top of the stack. This
is now fixed by always restoring the link register before branching
to the throw entrypoint.
Slight code size regression on ARM/Mips (unmeasured). Regression on ARM
is 4 bytes of code per stack overflow check. Some of this regression is
mitigated by having one less GC safepoint.
Also adds test case for StackOverflowError issue (from bdc).
Tests passing: ARM, X86, Mips
Phone booting: ARM
Bug: https://code.google.com/p/android/issues/detail?id=66411
Bug: 12967914
Change-Id: I96fe667799458b58d1f86671e051968f7be78d5d
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index c63de69..f29e04b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -223,6 +223,9 @@
case kOpBlx:
opcode = kThumbBlxR;
break;
+ case kOpBx:
+ opcode = kThumbBx;
+ break;
default:
LOG(FATAL) << "Bad opcode " << op;
}
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index f018c61..8e31a25 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -16,6 +16,7 @@
#include "dex/compiler_ir.h"
#include "dex/compiler_internals.h"
+#include "dex/quick/arm/arm_lir.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
@@ -544,7 +545,9 @@
ThreadOffset func_offset(-1);
int v1 = lab->operands[2];
int v2 = lab->operands[3];
- bool target_x86 = (cu_->instruction_set == kX86);
+ const bool target_x86 = cu_->instruction_set == kX86;
+ const bool target_arm = cu_->instruction_set == kArm || cu_->instruction_set == kThumb2;
+ const bool target_mips = cu_->instruction_set == kMips;
switch (lab->operands[0]) {
case kThrowNullPointer:
func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowNullPointer);
@@ -602,21 +605,40 @@
func_offset =
QUICK_ENTRYPOINT_OFFSET(pThrowNoSuchMethod);
break;
- case kThrowStackOverflow:
+ case kThrowStackOverflow: {
func_offset = QUICK_ENTRYPOINT_OFFSET(pThrowStackOverflow);
// Restore stack alignment
+ int r_tgt = 0;
+ const int spill_size = (num_core_spills_ + num_fp_spills_) * 4;
if (target_x86) {
- OpRegImm(kOpAdd, TargetReg(kSp), frame_size_);
+ // - 4 to leave link register on stack.
+ OpRegImm(kOpAdd, TargetReg(kSp), frame_size_ - 4);
+ ClobberCalleeSave();
+ } else if (target_arm) {
+ r_tgt = r12;
+ LoadWordDisp(TargetReg(kSp), spill_size - 4, TargetReg(kLr));
+ OpRegImm(kOpAdd, TargetReg(kSp), spill_size);
+ ClobberCalleeSave();
+ LoadWordDisp(rARM_SELF, func_offset.Int32Value(), r_tgt);
} else {
- OpRegImm(kOpAdd, TargetReg(kSp), (num_core_spills_ + num_fp_spills_) * 4);
+ DCHECK(target_mips);
+ DCHECK_EQ(num_fp_spills_, 0); // FP spills currently don't happen on mips.
+ // LR is offset 0 since we push in reverse order.
+ LoadWordDisp(TargetReg(kSp), 0, TargetReg(kLr));
+ OpRegImm(kOpAdd, TargetReg(kSp), spill_size);
+ ClobberCalleeSave();
+ r_tgt = CallHelperSetup(func_offset); // Doesn't clobber LR.
+ DCHECK_NE(r_tgt, TargetReg(kLr));
}
- break;
+ CallHelper(r_tgt, func_offset, false /* MarkSafepointPC */, false /* UseLink */);
+ continue;
+ }
default:
LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
}
ClobberCalleeSave();
int r_tgt = CallHelperSetup(func_offset);
- CallHelper(r_tgt, func_offset, true /* MarkSafepointPC */);
+ CallHelper(r_tgt, func_offset, true /* MarkSafepointPC */, true /* UseLink */);
}
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2a0a23c..3a39669 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -33,7 +33,7 @@
/*
* To save scheduling time, helper calls are broken into two parts: generation of
- * the helper target address, and the actuall call to the helper. Because x86
+ * the helper target address, and the actual call to the helper. Because x86
* has a memory call operation, part 1 is a NOP for x86. For other targets,
* load arguments between the two parts.
*/
@@ -42,12 +42,13 @@
}
/* NOTE: if r_tgt is a temp, it will be freed following use */
-LIR* Mir2Lir::CallHelper(int r_tgt, ThreadOffset helper_offset, bool safepoint_pc) {
+LIR* Mir2Lir::CallHelper(int r_tgt, ThreadOffset helper_offset, bool safepoint_pc, bool use_link) {
LIR* call_inst;
+ OpKind op = use_link ? kOpBlx : kOpBx;
if (cu_->instruction_set == kX86) {
- call_inst = OpThreadMem(kOpBlx, helper_offset);
+ call_inst = OpThreadMem(op, helper_offset);
} else {
- call_inst = OpReg(kOpBlx, r_tgt);
+ call_inst = OpReg(op, r_tgt);
FreeTemp(r_tgt);
}
if (safepoint_pc) {
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 278fcef..dfa837c 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -138,7 +138,6 @@
#define r_FRESULT1 r_F1
// Regs not used for Mips.
-#define rMIPS_LR INVALID_REG
#define rMIPS_PC INVALID_REG
// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
@@ -278,6 +277,7 @@
#define rMIPS_RET1 r_RESULT1
#define rMIPS_INVOKE_TGT r_T9
#define rMIPS_COUNT INVALID_REG
+#define rMIPS_LR r_RA
enum MipsShiftEncodings {
kMipsLsl = 0x0,
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index a37ebd1..d41204f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -432,7 +432,7 @@
// Shared by all targets - implemented in gen_invoke.cc.
int CallHelperSetup(ThreadOffset helper_offset);
- LIR* CallHelper(int r_tgt, ThreadOffset helper_offset, bool safepoint_pc);
+ LIR* CallHelper(int r_tgt, ThreadOffset helper_offset, bool safepoint_pc, bool use_link = true);
void CallRuntimeHelperImm(ThreadOffset helper_offset, int arg0, bool safepoint_pc);
void CallRuntimeHelperReg(ThreadOffset helper_offset, int arg0, bool safepoint_pc);
void CallRuntimeHelperRegLocation(ThreadOffset helper_offset, RegLocation arg0,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e883432..93612dc 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -319,6 +319,7 @@
{ kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" },
{ kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" },
{ kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" },
+ { kX86JmpT, kJmp, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpT", "fs:[!0d]" },
{ kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH | REG_USE0, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" },
{ kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD | REG_USE0, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" },
{ kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD | REG_USE01, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" },
@@ -451,6 +452,8 @@
return 2; // opcode + rel8
} else if (lir->opcode == kX86Jmp32) {
return 5; // opcode + rel32
+ } else if (lir->opcode == kX86JmpT) {
+ return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit
} else {
DCHECK(lir->opcode == kX86JmpR);
return 2; // opcode + modrm
@@ -1349,7 +1352,13 @@
EmitRegCond(entry, lir->operands[0], lir->operands[1]);
break;
case kJmp: // lir operands - 0: rel
- EmitJmp(entry, lir->operands[0]);
+ if (entry->opcode == kX86JmpT) {
+ // This works since the instruction format for jmp and call is basically the same and
+ // EmitCallThread loads opcode info.
+ EmitCallThread(entry, lir->operands[0]);
+ } else {
+ EmitJmp(entry, lir->operands[0]);
+ }
break;
case kJcc: // lir operands - 0: rel, 1: CC, target assigned
EmitJcc(entry, lir->operands[0], lir->operands[1]);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 2be2aa9..6738866 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -224,15 +224,15 @@
LockTemp(rX86_ARG2);
/* Build frame, return address already on stack */
+ // TODO: 64 bit.
OpRegImm(kOpSub, rX86_SP, frame_size_ - 4);
/*
* We can safely skip the stack overflow check if we're
* a leaf *and* our frame size < fudge factor.
*/
- bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
- (static_cast<size_t>(frame_size_) <
- Thread::kStackOverflowReservedBytes));
+ const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
+ (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
NewLIR0(kPseudoMethodEntry);
/* Spill core callee saves */
SpillCoreRegs();
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c519bfe..d03d4f8 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -296,6 +296,7 @@
X86OpCode opcode = kX86Bkpt;
switch (op) {
case kOpBlx: opcode = kX86CallT; break;
+ case kOpBx: opcode = kX86JmpT; break;
default:
LOG(FATAL) << "Bad opcode: " << op;
break;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 643a3d5..8a813e8 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -362,6 +362,7 @@
kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned
kX86JmpR, // jmp reg; lir operands - 0: reg
+ kX86JmpT, // jmp fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
kX86CallR, // call reg; lir operands - 0: reg
kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp
kX86CallA, // call [base + index * scale + disp]
diff --git a/runtime/oat.cc b/runtime/oat.cc
index c01f77c..6fe5d10 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
namespace art {
const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '0', '7', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '0', '8', '\0' };
OatHeader::OatHeader() {
memset(this, 0, sizeof(*this));
diff --git a/test/018-stack-overflow/expected.txt b/test/018-stack-overflow/expected.txt
index 7797816..98b45b7 100644
--- a/test/018-stack-overflow/expected.txt
+++ b/test/018-stack-overflow/expected.txt
@@ -1,2 +1,3 @@
-caught SOE
+caught SOE in testSelfRecursion
+caught SOE in testMutualRecursion
SOE test done
diff --git a/test/018-stack-overflow/src/Main.java b/test/018-stack-overflow/src/Main.java
index f79c269..41adabc 100644
--- a/test/018-stack-overflow/src/Main.java
+++ b/test/018-stack-overflow/src/Main.java
@@ -19,17 +19,46 @@
*/
public class Main {
public static void main(String args[]) {
+ testSelfRecursion();
+ testMutualRecursion();
+ System.out.println("SOE test done");
+ }
+
+ private static void testSelfRecursion() {
try {
stackOverflowTestSub(0.0, 0.0, 0.0);
}
catch (StackOverflowError soe) {
- System.out.println("caught SOE");
+ System.out.println("caught SOE in testSelfRecursion");
}
- System.out.println("SOE test done");
}
- private static void stackOverflowTestSub(double pad1, double pad2,
- double pad3) {
+ private static void stackOverflowTestSub(double pad1, double pad2, double pad3) {
stackOverflowTestSub(pad1, pad2, pad3);
}
+
+ private static void testMutualRecursion() {
+ try {
+ foo(0.0, 0.0, 0.0);
+ }
+ catch (StackOverflowError soe) {
+ System.out.println("caught SOE in testMutualRecursion");
+ }
+ }
+
+ private static void foo(double pad1, double pad2, double pad3) {
+ bar(pad1, pad2, pad3);
+ }
+
+ private static void bar(double pad1, double pad2, double pad3) {
+ baz(pad1, pad2, pad3);
+ }
+
+ private static void baz(double pad1, double pad2, double pad3) {
+ qux(pad1, pad2, pad3);
+ }
+
+ private static void qux(double pad1, double pad2, double pad3) {
+ foo(pad1, pad2, pad3);
+ }
}