Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ |
| 19 | |
| 20 | #include "scheduler.h" |
| 21 | |
Vladimir Marko | 0a51605 | 2019-10-14 13:00:44 +0000 | [diff] [blame] | 22 | namespace art { |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 23 | namespace arm64 { |
| 24 | |
| 25 | static constexpr uint32_t kArm64MemoryLoadLatency = 5; |
| 26 | static constexpr uint32_t kArm64MemoryStoreLatency = 3; |
| 27 | |
| 28 | static constexpr uint32_t kArm64CallInternalLatency = 10; |
| 29 | static constexpr uint32_t kArm64CallLatency = 5; |
| 30 | |
| 31 | // AArch64 instruction latency. |
| 32 | // We currently assume that all arm64 CPUs share the same instruction latency list. |
| 33 | static constexpr uint32_t kArm64IntegerOpLatency = 2; |
| 34 | static constexpr uint32_t kArm64FloatingPointOpLatency = 5; |
| 35 | |
| 36 | |
| 37 | static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3; |
| 38 | static constexpr uint32_t kArm64DivDoubleLatency = 30; |
| 39 | static constexpr uint32_t kArm64DivFloatLatency = 15; |
| 40 | static constexpr uint32_t kArm64DivIntegerLatency = 5; |
| 41 | static constexpr uint32_t kArm64LoadStringInternalLatency = 7; |
| 42 | static constexpr uint32_t kArm64MulFloatingPointLatency = 6; |
| 43 | static constexpr uint32_t kArm64MulIntegerLatency = 6; |
| 44 | static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 45 | static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; |
| 46 | |
| 47 | static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; |
| 48 | static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; |
| 49 | static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; |
| 50 | static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; |
| 51 | static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; |
| 52 | static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; |
| 53 | static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; |
| 54 | static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; |
| 55 | static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; |
| 56 | static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 57 | |
| 58 | class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { |
| 59 | public: |
| 60 | // Default visitor for instructions not handled specifically below. |
Yi Kong | 3940254 | 2019-03-24 02:47:16 -0700 | [diff] [blame] | 61 | void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 62 | last_visited_latency_ = kArm64IntegerOpLatency; |
| 63 | } |
| 64 | |
| 65 | // We add a second unused parameter to be able to use this macro like the others |
| 66 | // defined in `nodes.h`. |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 67 | #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ |
| 68 | M(ArrayGet , unused) \ |
| 69 | M(ArrayLength , unused) \ |
| 70 | M(ArraySet , unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 71 | M(BoundsCheck , unused) \ |
| 72 | M(Div , unused) \ |
| 73 | M(InstanceFieldGet , unused) \ |
| 74 | M(InstanceOf , unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 75 | M(LoadString , unused) \ |
| 76 | M(Mul , unused) \ |
| 77 | M(NewArray , unused) \ |
| 78 | M(NewInstance , unused) \ |
| 79 | M(Rem , unused) \ |
| 80 | M(StaticFieldGet , unused) \ |
| 81 | M(SuspendCheck , unused) \ |
| 82 | M(TypeConversion , unused) \ |
| 83 | M(VecReplicateScalar , unused) \ |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 84 | M(VecExtractScalar , unused) \ |
| 85 | M(VecReduce , unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 86 | M(VecCnv , unused) \ |
| 87 | M(VecNeg , unused) \ |
| 88 | M(VecAbs , unused) \ |
| 89 | M(VecNot , unused) \ |
| 90 | M(VecAdd , unused) \ |
| 91 | M(VecHalvingAdd , unused) \ |
| 92 | M(VecSub , unused) \ |
| 93 | M(VecMul , unused) \ |
| 94 | M(VecDiv , unused) \ |
| 95 | M(VecMin , unused) \ |
| 96 | M(VecMax , unused) \ |
| 97 | M(VecAnd , unused) \ |
| 98 | M(VecAndNot , unused) \ |
| 99 | M(VecOr , unused) \ |
| 100 | M(VecXor , unused) \ |
| 101 | M(VecShl , unused) \ |
| 102 | M(VecShr , unused) \ |
| 103 | M(VecUShr , unused) \ |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 104 | M(VecSetScalars , unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 105 | M(VecMultiplyAccumulate, unused) \ |
| 106 | M(VecLoad , unused) \ |
| 107 | M(VecStore , unused) |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 108 | |
Vladimir Marko | e394622 | 2018-05-04 14:18:47 +0100 | [diff] [blame] | 109 | #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \ |
| 110 | M(BinaryOperation , unused) \ |
| 111 | M(Invoke , unused) |
| 112 | |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 113 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ |
| 114 | M(BitwiseNegatedRight, unused) \ |
| 115 | M(MultiplyAccumulate, unused) \ |
Anton Kirilov | 74234da | 2017-01-13 14:42:47 +0000 | [diff] [blame] | 116 | M(IntermediateAddress, unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 117 | M(IntermediateAddressIndex, unused) \ |
Anton Kirilov | 74234da | 2017-01-13 14:42:47 +0000 | [diff] [blame] | 118 | M(DataProcWithShifterOp, unused) |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 119 | |
| 120 | #define DECLARE_VISIT_INSTRUCTION(type, unused) \ |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 121 | void Visit##type(H##type* instruction) override; |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 122 | |
| 123 | FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
Vladimir Marko | e394622 | 2018-05-04 14:18:47 +0100 | [diff] [blame] | 124 | FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 125 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 126 | FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) |
| 127 | |
| 128 | #undef DECLARE_VISIT_INSTRUCTION |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 129 | |
| 130 | private: |
| 131 | void HandleSimpleArithmeticSIMD(HVecOperation *instr); |
| 132 | void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 133 | }; |
| 134 | |
| 135 | class HSchedulerARM64 : public HScheduler { |
| 136 | public: |
Vladimir Marko | ced0483 | 2018-07-26 14:42:17 +0100 | [diff] [blame] | 137 | explicit HSchedulerARM64(SchedulingNodeSelector* selector) |
| 138 | : HScheduler(&arm64_latency_visitor_, selector) {} |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 139 | ~HSchedulerARM64() override {} |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 140 | |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 141 | bool IsSchedulable(const HInstruction* instruction) const override { |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 142 | #define CASE_INSTRUCTION_KIND(type, unused) case \ |
| 143 | HInstruction::InstructionKind::k##type: |
| 144 | switch (instruction->GetKind()) { |
| 145 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) |
| 146 | return true; |
| 147 | FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) |
| 148 | return true; |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 149 | FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) |
| 150 | return true; |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 151 | default: |
| 152 | return HScheduler::IsSchedulable(instruction); |
| 153 | } |
| 154 | #undef CASE_INSTRUCTION_KIND |
| 155 | } |
| 156 | |
Artem Serov | 89ff8b2 | 2017-11-20 11:51:05 +0000 | [diff] [blame] | 157 | // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized |
| 158 | // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; |
| 159 | // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of |
| 160 | // SIMD&FP registers are callee saved) so don't reorder such vector instructions. |
| 161 | // |
| 162 | // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 163 | bool IsSchedulingBarrier(const HInstruction* instr) const override { |
Artem Serov | 89ff8b2 | 2017-11-20 11:51:05 +0000 | [diff] [blame] | 164 | return HScheduler::IsSchedulingBarrier(instr) || |
| 165 | instr->IsVecReduce() || |
| 166 | instr->IsVecExtractScalar() || |
| 167 | instr->IsVecSetScalars() || |
| 168 | instr->IsVecReplicateScalar(); |
| 169 | } |
| 170 | |
Alexandre Rames | 22aa54b | 2016-10-18 09:32:29 +0100 | [diff] [blame] | 171 | private: |
| 172 | SchedulingLatencyVisitorARM64 arm64_latency_visitor_; |
| 173 | DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); |
| 174 | }; |
| 175 | |
| 176 | } // namespace arm64 |
| 177 | } // namespace art |
| 178 | |
| 179 | #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ |