MIPS32: Improve stack alignment, use sdc1/ldc1, where possible.
- Ensure that SP is a multiple of 16 at all times, and
- Use ldc1/sdc1 to load/store FPU registers from/to 8-byte-aligned
locations wherever possible.
Use `export ART_MIPS32_CHECK_ALIGNMENT=true` when building Android
to enable the new runtime alignment checks.
Test: Boot & run tests on 32-bit version of QEMU, and CI-20.
Test: test/testrunner/testrunner.py --target --optimizing --32
Test: test-art-host-gtest
Test: test-art-target-gtest
Change-Id: Ia667004573f419fd006098fcfadf5834239cb485
diff --git a/build/art.go b/build/art.go
index 1bcaf51..452b348 100644
--- a/build/art.go
+++ b/build/art.go
@@ -97,6 +97,11 @@
asflags = append(asflags, "-DART_ENABLE_ADDRESS_SANITIZER=1")
}
+ if envTrue(ctx, "ART_MIPS32_CHECK_ALIGNMENT") {
+ // Enable the use of MIPS32 CHECK_ALIGNMENT macro for debugging purposes
+ asflags = append(asflags, "-DART_MIPS32_CHECK_ALIGNMENT")
+ }
+
return cflags, asflags
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 3ba107a..2f65e8c 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1300,7 +1300,7 @@
// automatically unspilled when the scratch scope object is destroyed).
ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
// If V0 spills onto the stack, SP-relative offsets need to be adjusted.
- int stack_offset = ensure_scratch.IsSpilled() ? kMipsWordSize : 0;
+ int stack_offset = ensure_scratch.IsSpilled() ? kStackAlignment : 0;
for (int i = 0; i <= (double_slot ? 1 : 0); i++, stack_offset += kMipsWordSize) {
__ LoadFromOffset(kLoadWord,
Register(ensure_scratch.GetRegister()),
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
index 36e932c..b63914f 100644
--- a/compiler/optimizing/emit_swap_mips_test.cc
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -238,14 +238,14 @@
DataType::Type::kInt32,
nullptr);
const char* expected =
- "addiu $sp, $sp, -4\n"
+ "addiu $sp, $sp, -16\n"
"sw $v0, 0($sp)\n"
- "lw $v0, 56($sp)\n"
- "lw $t8, 52($sp)\n"
- "sw $v0, 52($sp)\n"
- "sw $t8, 56($sp)\n"
+ "lw $v0, 68($sp)\n"
+ "lw $t8, 64($sp)\n"
+ "sw $v0, 64($sp)\n"
+ "sw $t8, 68($sp)\n"
"lw $v0, 0($sp)\n"
- "addiu $sp, $sp, 4\n";
+ "addiu $sp, $sp, 16\n";
DriverWrapper(moves_, expected, "TwoStackSlots");
}
@@ -261,18 +261,18 @@
DataType::Type::kInt64,
nullptr);
const char* expected =
- "addiu $sp, $sp, -4\n"
+ "addiu $sp, $sp, -16\n"
"sw $v0, 0($sp)\n"
- "lw $v0, 60($sp)\n"
- "lw $t8, 52($sp)\n"
- "sw $v0, 52($sp)\n"
- "sw $t8, 60($sp)\n"
- "lw $v0, 64($sp)\n"
- "lw $t8, 56($sp)\n"
- "sw $v0, 56($sp)\n"
- "sw $t8, 64($sp)\n"
+ "lw $v0, 72($sp)\n"
+ "lw $t8, 64($sp)\n"
+ "sw $v0, 64($sp)\n"
+ "sw $t8, 72($sp)\n"
+ "lw $v0, 76($sp)\n"
+ "lw $t8, 68($sp)\n"
+ "sw $v0, 68($sp)\n"
+ "sw $t8, 76($sp)\n"
"lw $v0, 0($sp)\n"
- "addiu $sp, $sp, 4\n";
+ "addiu $sp, $sp, 16\n";
DriverWrapper(moves_, expected, "TwoDoubleStackSlots");
}
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index fde55cb..1e82c4b 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -330,10 +330,10 @@
static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
- 0x08, 0x00, 0x80, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+ 0x08, 0x00, 0x80, 0x14, 0xF0, 0xFF, 0xBD, 0x27,
0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
- 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
+ 0x09, 0x00, 0x20, 0x00, 0x10, 0x00, 0xBD, 0x27,
};
static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F,
@@ -342,7 +342,7 @@
};
static constexpr uint8_t expected_cfi_kMips_adjust[] = {
0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
- 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+ 0x50, 0x0E, 0x50, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
};
// 0x00000000: addiu sp, sp, -64
@@ -356,8 +356,8 @@
// 0x00000010: sdc1 f22, +40(sp)
// 0x00000014: sdc1 f20, +32(sp)
// 0x00000018: bnez a0, 0x0000003c ; +36
-// 0x0000001c: addiu sp, sp, -4
-// 0x00000020: .cfi_def_cfa_offset: 68
+// 0x0000001c: addiu sp, sp, -16
+// 0x00000020: .cfi_def_cfa_offset: 80
// 0x00000020: sw ra, +0(sp)
// 0x00000024: nal
// 0x00000028: lui at, 2
@@ -365,7 +365,7 @@
// 0x00000030: addu at, at, ra
// 0x00000034: lw ra, +0(sp)
// 0x00000038: jr at
-// 0x0000003c: addiu sp, sp, 4
+// 0x0000003c: addiu sp, sp, 16
// 0x00000040: .cfi_def_cfa_offset: 64
// 0x00000040: nop
// ...
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index cbb2c0e..9545ca6 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1863,20 +1863,20 @@
}
void MipsAssembler::Push(Register rs) {
- IncreaseFrameSize(kMipsWordSize);
+ IncreaseFrameSize(kStackAlignment);
Sw(rs, SP, 0);
}
void MipsAssembler::Pop(Register rd) {
Lw(rd, SP, 0);
- DecreaseFrameSize(kMipsWordSize);
+ DecreaseFrameSize(kStackAlignment);
}
void MipsAssembler::PopAndReturn(Register rd, Register rt) {
bool reordering = SetReorder(false);
Lw(rd, SP, 0);
Jr(rt);
- DecreaseFrameSize(kMipsWordSize); // Single instruction in delay slot.
+ DecreaseFrameSize(kStackAlignment); // Single instruction in delay slot.
SetReorder(reordering);
}
@@ -4588,7 +4588,7 @@
Addu(AT, AT, RA);
Lw(RA, SP, 0);
Jr(AT);
- DecreaseFrameSize(kMipsWordSize);
+ DecreaseFrameSize(kStackAlignment);
break;
case Branch::kLongCondBranch:
// The comment on case 'Branch::kLongUncondBranch' applies here as well.
@@ -4608,7 +4608,7 @@
Addu(AT, AT, RA);
Lw(RA, SP, 0);
Jr(AT);
- DecreaseFrameSize(kMipsWordSize);
+ DecreaseFrameSize(kStackAlignment);
break;
case Branch::kLongCall:
DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 9397be4..b027d3a 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2803,7 +2803,7 @@
oss <<
".set noreorder\n"
"addiu $t0, $t1, 0x5678\n"
- "addiu $sp, $sp, -4\n"
+ "addiu $sp, $sp, -16\n"
"sw $ra, 0($sp)\n"
"bltzal $zero, .+4\n"
"lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
@@ -2811,11 +2811,11 @@
"addu $at, $at, $ra\n"
"lw $ra, 0($sp)\n"
"jalr $zero, $at\n"
- "addiu $sp, $sp, 4\n" <<
+ "addiu $sp, $sp, 16\n" <<
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
"addiu $t0, $t1, 0x5678\n"
- "addiu $sp, $sp, -4\n"
+ "addiu $sp, $sp, -16\n"
"sw $ra, 0($sp)\n"
"bltzal $zero, .+4\n"
"lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
@@ -2823,7 +2823,7 @@
"addu $at, $at, $ra\n"
"lw $ra, 0($sp)\n"
"jalr $zero, $at\n"
- "addiu $sp, $sp, 4\n";
+ "addiu $sp, $sp, 16\n";
std::string expected = oss.str();
DriverStr(expected, "LongBranchReorder");
EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 50095ae..fa51059 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -173,4 +173,30 @@
.set pop
.endm
+// This utility macro is used to check whether the address contained in
+// a register is suitably aligned. Default usage is confirm that the
+// address stored in $sp is a multiple of 16. It can be used for other
+// alignments, and for other base address registers, if needed.
+//
+// Enable this macro by running the shell command:
+//
+// export ART_MIPS32_CHECK_ALIGNMENT=true
+//
+// NOTE: The value of alignment must be a power of 2, and must fit in an
+// unsigned 15-bit integer. The macro won't behave as expected if these
+// conditions aren't met.
+//
+.macro CHECK_ALIGNMENT ba=$sp, tmp=$at, alignment=16
+#ifdef ART_MIPS32_CHECK_ALIGNMENT
+ .set push
+ .set noat
+ .set noreorder
+ andi \tmp, \ba, \alignment-1
+ beqz \tmp, .+12 # Skip break instruction if base address register (ba) is aligned
+ nop
+ break
+ .set pop
+#endif
+.endm
+
#endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_S_
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 2edd63f..bec5238 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -19,7 +19,7 @@
#include "asm_support.h"
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 112
#define FRAME_SIZE_SAVE_REFS_ONLY 48
#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
#define FRAME_SIZE_SAVE_EVERYTHING 256
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index ca1de0a..3f362de 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -42,7 +42,16 @@
// Core registers come first, from the highest down to the lowest.
for (uint32_t core_reg : HighToLowBits(frame_info.CoreSpillMask())) {
- gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
+ // If the $ZERO register shows up in the list of registers to
+ // be saved this was only done to properly align the floating
+ // point register save locations to addresses which are
+ // multiples of 8. We only store the address of a register in
+ // gprs_ if the register is not the $ZERO register. The $ZERO
+ // register is read-only so there's never a reason to save it
+ // on the stack.
+ if (core_reg != 0u) {
+ gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes());
+ }
++spill_pos;
}
DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask()));
@@ -97,7 +106,9 @@
void MipsContext::DoLongJump() {
uintptr_t gprs[kNumberOfCoreRegisters];
- uint32_t fprs[kNumberOfFRegisters];
+ // Align fprs[] so that art_quick_do_long_jump() can load FPU
+ // registers from it using the ldc1 instruction.
+ uint32_t fprs[kNumberOfFRegisters] __attribute__((aligned(8)));
for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) {
gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : MipsContext::kBadGprBase + i;
}
diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S
index 5c95071..2c0e750 100644
--- a/runtime/arch/mips/jni_entrypoints_mips.S
+++ b/runtime/arch/mips/jni_entrypoints_mips.S
@@ -28,8 +28,9 @@
.cfi_adjust_cfa_offset 48
sw $ra, 32($sp)
.cfi_rel_offset 31, 32
- SDu $f14, $f15, 24, $sp, $t0
- SDu $f12, $f13, 16, $sp, $t0
+ CHECK_ALIGNMENT $sp, $t0
+ sdc1 $f14, 24($sp)
+ sdc1 $f12, 16($sp)
sw $a3, 12($sp)
.cfi_rel_offset 7, 12
sw $a2, 8($sp)
@@ -45,8 +46,9 @@
lw $a1, 4($sp)
lw $a2, 8($sp)
lw $a3, 12($sp)
- LDu $f12, $f13, 16, $sp, $t0
- LDu $f14, $f15, 24, $sp, $t0
+ CHECK_ALIGNMENT $sp, $t0
+ ldc1 $f12, 16($sp)
+ ldc1 $f14, 24($sp)
lw $ra, 32($sp)
beq $v0, $zero, .Lno_native_code_found
addiu $sp, $sp, 48 # restore the stack
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index f6204bd..ee3f17d 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -37,45 +37,49 @@
* Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack
*/
.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
- addiu $sp, $sp, -96
- .cfi_adjust_cfa_offset 96
+ addiu $sp, $sp, -112
+ .cfi_adjust_cfa_offset 112
// Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 96)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 112)
#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected."
#endif
- sw $ra, 92($sp)
- .cfi_rel_offset 31, 92
- sw $s8, 88($sp)
- .cfi_rel_offset 30, 88
- sw $gp, 84($sp)
- .cfi_rel_offset 28, 84
- sw $s7, 80($sp)
- .cfi_rel_offset 23, 80
- sw $s6, 76($sp)
- .cfi_rel_offset 22, 76
- sw $s5, 72($sp)
- .cfi_rel_offset 21, 72
- sw $s4, 68($sp)
- .cfi_rel_offset 20, 68
- sw $s3, 64($sp)
- .cfi_rel_offset 19, 64
- sw $s2, 60($sp)
- .cfi_rel_offset 18, 60
- sw $s1, 56($sp)
- .cfi_rel_offset 17, 56
- sw $s0, 52($sp)
- .cfi_rel_offset 16, 52
+ sw $ra, 108($sp)
+ .cfi_rel_offset 31, 108
+ sw $s8, 104($sp)
+ .cfi_rel_offset 30, 104
+ sw $gp, 100($sp)
+ .cfi_rel_offset 28, 100
+ sw $s7, 96($sp)
+ .cfi_rel_offset 23, 96
+ sw $s6, 92($sp)
+ .cfi_rel_offset 22, 92
+ sw $s5, 88($sp)
+ .cfi_rel_offset 21, 88
+ sw $s4, 84($sp)
+ .cfi_rel_offset 20, 84
+ sw $s3, 80($sp)
+ .cfi_rel_offset 19, 80
+ sw $s2, 76($sp)
+ .cfi_rel_offset 18, 76
+ sw $s1, 72($sp)
+ .cfi_rel_offset 17, 72
+ sw $s0, 68($sp)
+ .cfi_rel_offset 16, 68
+ // 4-byte placeholder for register $zero, serving for alignment
+ // of the following double precision floating point registers.
- SDu $f30, $f31, 44, $sp, $t1
- SDu $f28, $f29, 36, $sp, $t1
- SDu $f26, $f27, 28, $sp, $t1
- SDu $f24, $f25, 20, $sp, $t1
- SDu $f22, $f23, 12, $sp, $t1
- SDu $f20, $f21, 4, $sp, $t1
+ CHECK_ALIGNMENT $sp, $t1
+ sdc1 $f30, 56($sp)
+ sdc1 $f28, 48($sp)
+ sdc1 $f26, 40($sp)
+ sdc1 $f24, 32($sp)
+ sdc1 $f22, 24($sp)
+ sdc1 $f20, 16($sp)
- # 1 word for holding Method*
+ # 1 word for holding Method* plus 12 bytes padding to keep contents of SP
+ # a multiple of 16.
lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
lw $t0, 0($t0)
@@ -216,12 +220,13 @@
.cfi_rel_offset 6, 60
sw $a1, 56($sp)
.cfi_rel_offset 5, 56
- SDu $f18, $f19, 48, $sp, $t8
- SDu $f16, $f17, 40, $sp, $t8
- SDu $f14, $f15, 32, $sp, $t8
- SDu $f12, $f13, 24, $sp, $t8
- SDu $f10, $f11, 16, $sp, $t8
- SDu $f8, $f9, 8, $sp, $t8
+ CHECK_ALIGNMENT $sp, $t8
+ sdc1 $f18, 48($sp)
+ sdc1 $f16, 40($sp)
+ sdc1 $f14, 32($sp)
+ sdc1 $f12, 24($sp)
+ sdc1 $f10, 16($sp)
+ sdc1 $f8, 8($sp)
# bottom will hold Method*
.endm
@@ -320,12 +325,13 @@
lw $a2, 60($sp)
.cfi_restore 6
RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
- LDu $f18, $f19, 48, $sp, $t8
- LDu $f16, $f17, 40, $sp, $t8
- LDu $f14, $f15, 32, $sp, $t8
- LDu $f12, $f13, 24, $sp, $t8
- LDu $f10, $f11, 16, $sp, $t8
- LDu $f8, $f9, 8, $sp, $t8
+ CHECK_ALIGNMENT $sp, $t8
+ ldc1 $f18, 48($sp)
+ ldc1 $f16, 40($sp)
+ ldc1 $f14, 32($sp)
+ ldc1 $f12, 24($sp)
+ ldc1 $f10, 16($sp)
+ ldc1 $f8, 8($sp)
addiu $sp, $sp, 112 # Pop frame.
.cfi_adjust_cfa_offset -112
.endm
@@ -412,22 +418,23 @@
1:
.cpload $ra
- SDu $f30, $f31, 136, $sp, $t1
- SDu $f28, $f29, 128, $sp, $t1
- SDu $f26, $f27, 120, $sp, $t1
- SDu $f24, $f25, 112, $sp, $t1
- SDu $f22, $f23, 104, $sp, $t1
- SDu $f20, $f21, 96, $sp, $t1
- SDu $f18, $f19, 88, $sp, $t1
- SDu $f16, $f17, 80, $sp, $t1
- SDu $f14, $f15, 72, $sp, $t1
- SDu $f12, $f13, 64, $sp, $t1
- SDu $f10, $f11, 56, $sp, $t1
- SDu $f8, $f9, 48, $sp, $t1
- SDu $f6, $f7, 40, $sp, $t1
- SDu $f4, $f5, 32, $sp, $t1
- SDu $f2, $f3, 24, $sp, $t1
- SDu $f0, $f1, 16, $sp, $t1
+ CHECK_ALIGNMENT $sp, $t1
+ sdc1 $f30, 136($sp)
+ sdc1 $f28, 128($sp)
+ sdc1 $f26, 120($sp)
+ sdc1 $f24, 112($sp)
+ sdc1 $f22, 104($sp)
+ sdc1 $f20, 96($sp)
+ sdc1 $f18, 88($sp)
+ sdc1 $f16, 80($sp)
+ sdc1 $f14, 72($sp)
+ sdc1 $f12, 64($sp)
+ sdc1 $f10, 56($sp)
+ sdc1 $f8, 48($sp)
+ sdc1 $f6, 40($sp)
+ sdc1 $f4, 32($sp)
+ sdc1 $f2, 24($sp)
+ sdc1 $f0, 16($sp)
# 3 words padding and 1 word for holding Method*
@@ -460,22 +467,23 @@
addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack
.cfi_adjust_cfa_offset -ARG_SLOT_SIZE
- LDu $f30, $f31, 136, $sp, $t1
- LDu $f28, $f29, 128, $sp, $t1
- LDu $f26, $f27, 120, $sp, $t1
- LDu $f24, $f25, 112, $sp, $t1
- LDu $f22, $f23, 104, $sp, $t1
- LDu $f20, $f21, 96, $sp, $t1
- LDu $f18, $f19, 88, $sp, $t1
- LDu $f16, $f17, 80, $sp, $t1
- LDu $f14, $f15, 72, $sp, $t1
- LDu $f12, $f13, 64, $sp, $t1
- LDu $f10, $f11, 56, $sp, $t1
- LDu $f8, $f9, 48, $sp, $t1
- LDu $f6, $f7, 40, $sp, $t1
- LDu $f4, $f5, 32, $sp, $t1
- LDu $f2, $f3, 24, $sp, $t1
- LDu $f0, $f1, 16, $sp, $t1
+ CHECK_ALIGNMENT $sp, $t1
+ ldc1 $f30, 136($sp)
+ ldc1 $f28, 128($sp)
+ ldc1 $f26, 120($sp)
+ ldc1 $f24, 112($sp)
+ ldc1 $f22, 104($sp)
+ ldc1 $f20, 96($sp)
+ ldc1 $f18, 88($sp)
+ ldc1 $f16, 80($sp)
+ ldc1 $f14, 72($sp)
+ ldc1 $f12, 64($sp)
+ ldc1 $f10, 56($sp)
+ ldc1 $f8, 48($sp)
+ ldc1 $f6, 40($sp)
+ ldc1 $f4, 32($sp)
+ ldc1 $f2, 24($sp)
+ ldc1 $f0, 16($sp)
lw $ra, 252($sp)
.cfi_restore 31
@@ -665,7 +673,8 @@
b .Losr_exit
sw $v1, 4($a2) # store v0/v1 into result
.Losr_fp_result:
- SDu $f0, $f1, 0, $a2, $t0 # store f0/f1 into result
+ CHECK_ALIGNMENT $a2, $t0, 8
+ sdc1 $f0, 0($a2) # store f0/f1 into result
.Losr_exit:
lw $ra, 44($sp)
.cfi_restore 31
@@ -701,26 +710,28 @@
END art_quick_osr_stub
/*
- * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_
+ * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_.
+ * Note that fprs_ is expected to be an address that is a multiple of 8.
* FIXME: just guessing about the shape of the jmpbuf. Where will pc be?
*/
ENTRY art_quick_do_long_jump
- LDu $f0, $f1, 0*8, $a1, $t1
- LDu $f2, $f3, 1*8, $a1, $t1
- LDu $f4, $f5, 2*8, $a1, $t1
- LDu $f6, $f7, 3*8, $a1, $t1
- LDu $f8, $f9, 4*8, $a1, $t1
- LDu $f10, $f11, 5*8, $a1, $t1
- LDu $f12, $f13, 6*8, $a1, $t1
- LDu $f14, $f15, 7*8, $a1, $t1
- LDu $f16, $f17, 8*8, $a1, $t1
- LDu $f18, $f19, 9*8, $a1, $t1
- LDu $f20, $f21, 10*8, $a1, $t1
- LDu $f22, $f23, 11*8, $a1, $t1
- LDu $f24, $f25, 12*8, $a1, $t1
- LDu $f26, $f27, 13*8, $a1, $t1
- LDu $f28, $f29, 14*8, $a1, $t1
- LDu $f30, $f31, 15*8, $a1, $t1
+ CHECK_ALIGNMENT $a1, $t1, 8
+ ldc1 $f0, 0*8($a1)
+ ldc1 $f2, 1*8($a1)
+ ldc1 $f4, 2*8($a1)
+ ldc1 $f6, 3*8($a1)
+ ldc1 $f8, 4*8($a1)
+ ldc1 $f10, 5*8($a1)
+ ldc1 $f12, 6*8($a1)
+ ldc1 $f14, 7*8($a1)
+ ldc1 $f16, 8*8($a1)
+ ldc1 $f18, 9*8($a1)
+ ldc1 $f20, 10*8($a1)
+ ldc1 $f22, 11*8($a1)
+ ldc1 $f24, 12*8($a1)
+ ldc1 $f26, 13*8($a1)
+ ldc1 $f28, 14*8($a1)
+ ldc1 $f30, 15*8($a1)
.set push
.set nomacro
@@ -1067,7 +1078,8 @@
jalr $zero, $ra
sw $v1, 4($t0) # store the other half of the result
5:
- SDu $f0, $f1, 0, $t0, $t1 # store floating point result
+ CHECK_ALIGNMENT $t0, $t1, 8
+ sdc1 $f0, 0($t0) # store floating point result
jalr $zero, $ra
nop
@@ -1225,7 +1237,8 @@
jalr $zero, $ra
sw $v1, 4($t0) # store the other half of the result
6:
- SDu $f0, $f1, 0, $t0, $t1 # store floating point result
+ CHECK_ALIGNMENT $t0, $t1, 8
+ sdc1 $f0, 0($t0) # store floating point result
jalr $zero, $ra
nop
@@ -2252,7 +2265,7 @@
move $a0, rSELF # pass Thread::Current
move $a2, $v0 # pass result
move $a3, $v1
- addiu $sp, $sp, -24 # reserve arg slots
+ addiu $sp, $sp, -32 # reserve arg slots
la $t9, artQuickGenericJniEndTrampoline
jalr $t9
s.d $f0, 16($sp) # pass result_f
@@ -3243,7 +3256,8 @@
lhu $v0, 16($sp) # Move char from JValue result to return value register.
.Lstore_double_result:
.Lstore_float_result:
- LDu $f0, $f1, 16, $sp, $t0 # Move double/float from JValue result to return value register.
+ CHECK_ALIGNMENT $sp, $t0
+ ldc1 $f0, 16($sp) # Move double/float from JValue result to return value register.
b .Lcleanup_and_return
nop
.Lstore_long_result:
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 45a21ab..8c86252 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -35,8 +35,24 @@
static constexpr uint32_t kMipsCalleeSaveArgSpills =
(1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | (1 << art::mips::T0) |
(1 << art::mips::T1);
+// We want to save all floating point register pairs at addresses
+// which are multiples of 8 so that we can eliminate use of the
+// SDu/LDu macros by using sdc1/ldc1 to store/load floating
+// register values using a single instruction. Because integer
+// registers are stored at the top of the frame, to achieve having
+// the floating point register pairs aligned on multiples of 8 the
+// number of integer registers saved must be even. Previously, the
+// only case in which we saved floating point registers beneath an
+// odd number of integer registers was when "type" is
+// CalleeSaveType::kSaveAllCalleeSaves. (There are other cases in
+// which an odd number of integer registers are saved but those
+// cases don't save any floating point registers. If no floating
+// point registers are saved we don't care if the number of integer
+// registers saved is odd or even). To save an even number of
+// integer registers in this particular case we add the ZERO
+// register to the list of registers which get saved.
static constexpr uint32_t kMipsCalleeSaveAllSpills =
- (1 << art::mips::S0) | (1 << art::mips::S1);
+ (1 << art::mips::ZERO) | (1 << art::mips::S0) | (1 << art::mips::S1);
static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
(1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) |
(1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) |