MIPS32: Pass more arguments in registers.
Specifically, use A0-A3,T0-T1 for non-floats and F8-F19 for floats.
Test: booted MIPS32R2 in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R2) on CI20
Test: test-art-target-gtest (MIPS32R2) on CI20
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R6) in QEMU
Test: test-art-target-gtest (MIPS32R6) in QEMU
Test: test-art-host-gtest
Change-Id: Ib8b0310a109d9f3d70119c1e605e54b013e60728
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index fe82878..bf1d4ea 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -134,13 +134,23 @@
// | Method* | ---
// | RA |
// | ... | callee saves
+ // | T1 | arg5
+ // | T0 | arg4
// | A3 | arg3
// | A2 | arg2
// | A1 | arg1
+ // | F19 |
+ // | F18 | f_arg5
+ // | F17 |
+ // | F16 | f_arg4
// | F15 |
- // | F14 | f_arg1
+ // | F14 | f_arg3
// | F13 |
- // | F12 | f_arg0
+ // | F12 | f_arg2
+ // | F11 |
+ // | F10 | f_arg1
+ // | F9 |
+ // | F8 | f_arg0
// | | padding
// | A0/Method* | <- sp
static constexpr bool kSplitPairAcrossRegisterAndStack = false;
@@ -148,14 +158,14 @@
static constexpr bool kQuickSoftFloatAbi = false;
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
static constexpr bool kQuickSkipOddFpRegisters = true;
- static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs.
- static constexpr size_t kNumQuickFprArgs = 4; // 2 arguments passed in FPRs. Floats can be passed
- // only in even numbered registers and each double
- // occupies two registers.
+ static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs.
+ static constexpr size_t kNumQuickFprArgs = 12; // 6 arguments passed in FPRs. Floats can be
+ // passed only in even numbered registers and each
+ // double occupies two registers.
static constexpr bool kGprFprLockstep = false;
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 32; // Offset of first GPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 76; // Offset of return address.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 8; // Offset of first FPR arg.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 56; // Offset of first GPR arg.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 108; // Offset of return address.
static size_t GprIndexToGprOffset(uint32_t gpr_index) {
return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
}
@@ -187,7 +197,7 @@
// | F12 | f_arg0
// | | padding
// | A0/Method* | <- sp
- // NOTE: for Mip64, when A0 is skipped, F0 is also skipped.
+ // NOTE: for Mip64, when A0 is skipped, F12 is also skipped.
static constexpr bool kSplitPairAcrossRegisterAndStack = false;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = false;
@@ -197,7 +207,7 @@
static constexpr size_t kNumQuickFprArgs = 7; // 7 arguments passed in FPRs.
static constexpr bool kGprFprLockstep = true;
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F1).
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F13).
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg (A1).
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 200; // Offset of return address.
static size_t GprIndexToGprOffset(uint32_t gpr_index) {
@@ -501,10 +511,16 @@
case Primitive::kPrimDouble:
case Primitive::kPrimLong:
if (kQuickSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) {
- if (cur_type_ == Primitive::kPrimLong && kAlignPairRegister && gpr_index_ == 0) {
- // Currently, this is only for ARM and MIPS, where the first available parameter
- // register is R1 (on ARM) or A1 (on MIPS). So we skip it, and use R2 (on ARM) or
- // A2 (on MIPS) instead.
+ if (cur_type_ == Primitive::kPrimLong &&
+#if defined(__mips__) && !defined(__LP64__)
+ (gpr_index_ == 0 || gpr_index_ == 2) &&
+#else
+ gpr_index_ == 0 &&
+#endif
+ kAlignPairRegister) {
+ // Currently, this is only for ARM and MIPS, where we align long parameters with
+ // even-numbered registers by skipping R1 (on ARM) or A1(A3) (on MIPS) and using
+ // R2 (on ARM) or A2(T0) (on MIPS) instead.
IncGprIndex();
}
is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) &&
@@ -2086,6 +2102,41 @@
// Note that the native code pointer will be automatically set by artFindNativeMethod().
}
+#if defined(__mips__) && !defined(__LP64__)
+ // On MIPS32 if the first two arguments are floating-point, we need to know their types
+ // so that art_quick_generic_jni_trampoline can correctly extract them from the stack
+ // and load into floating-point registers.
+ // Possible arrangements of first two floating-point arguments on the stack (32-bit FPU
+ // view):
+ // (1)
+ // | DOUBLE | DOUBLE | other args, if any
+ // | F12 | F13 | F14 | F15 |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (2)
+ // | DOUBLE | FLOAT | (PAD) | other args, if any
+ // | F12 | F13 | F14 | |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (3)
+ // | FLOAT | (PAD) | DOUBLE | other args, if any
+ // | F12 | | F14 | F15 |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (4)
+ // | FLOAT | FLOAT | other args, if any
+ // | F12 | F14 |
+ // | SP+0 | SP+4 | SP+8
+ // As you can see, only the last case (4) is special. In all others we can just
+ // load F12/F13 and F14/F15 in the same manner.
+ // Set bit 0 of the native code address to 1 in this case (valid code addresses
+ // are always a multiple of 4 on MIPS32, so we have 2 spare bits available).
+ if (nativeCode != nullptr &&
+ shorty != nullptr &&
+ shorty_len >= 3 &&
+ shorty[1] == 'F' &&
+ shorty[2] == 'F') {
+ nativeCode = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(nativeCode) | 1);
+ }
+#endif
+
// Return native code addr(lo) and bottom of alloca address(hi).
return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(visitor.GetBottomOfUsedArea()),
reinterpret_cast<uintptr_t>(nativeCode));