xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 19 | |
| 20 | #include "code_generator_arm_vixl.h" |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 21 | #include "scheduler.h" |
| 22 | |
Vladimir Marko | 0a51605 | 2019-10-14 13:00:44 +0000 | [diff] [blame] | 23 | namespace art { |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 24 | namespace arm { |
Roland Levillain | 9983e30 | 2017-07-14 14:34:22 +0100 | [diff] [blame] | 25 | // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 26 | typedef CodeGeneratorARMVIXL CodeGeneratorARMType; |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 27 | |
| 28 | // AArch32 instruction latencies. |
| 29 | // We currently assume that all ARM CPUs share the same instruction latency list. |
| 30 | // The following latencies were tuned based on performance experiments and |
| 31 | // automatic tuning using differential evolution approach on various benchmarks. |
| 32 | static constexpr uint32_t kArmIntegerOpLatency = 2; |
| 33 | static constexpr uint32_t kArmFloatingPointOpLatency = 11; |
| 34 | static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; |
| 35 | static constexpr uint32_t kArmMulIntegerLatency = 6; |
| 36 | static constexpr uint32_t kArmMulFloatingPointLatency = 11; |
| 37 | static constexpr uint32_t kArmDivIntegerLatency = 10; |
| 38 | static constexpr uint32_t kArmDivFloatLatency = 20; |
| 39 | static constexpr uint32_t kArmDivDoubleLatency = 25; |
| 40 | static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; |
| 41 | static constexpr uint32_t kArmMemoryLoadLatency = 9; |
| 42 | static constexpr uint32_t kArmMemoryStoreLatency = 9; |
| 43 | static constexpr uint32_t kArmMemoryBarrierLatency = 6; |
| 44 | static constexpr uint32_t kArmBranchLatency = 4; |
| 45 | static constexpr uint32_t kArmCallLatency = 5; |
| 46 | static constexpr uint32_t kArmCallInternalLatency = 29; |
| 47 | static constexpr uint32_t kArmLoadStringInternalLatency = 10; |
| 48 | static constexpr uint32_t kArmNopLatency = 2; |
| 49 | static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; |
| 50 | static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; |
| 51 | |
| 52 | class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { |
| 53 | public: |
| 54 | explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) |
| 55 | : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} |
| 56 | |
| 57 | // Default visitor for instructions not handled specifically below. |
Yi Kong | 3940254 | 2019-03-24 02:47:16 -0700 | [diff] [blame] | 58 | void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 59 | last_visited_latency_ = kArmIntegerOpLatency; |
| 60 | } |
| 61 | |
| 62 | // We add a second unused parameter to be able to use this macro like the others |
| 63 | // defined in `nodes.h`. |
Alex Light | 3a73ffb | 2021-01-25 14:11:05 +0000 | [diff] [blame] | 64 | #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ |
| 65 | M(ArrayGet, unused) \ |
| 66 | M(ArrayLength, unused) \ |
| 67 | M(ArraySet, unused) \ |
| 68 | M(Add, unused) \ |
| 69 | M(Sub, unused) \ |
| 70 | M(And, unused) \ |
| 71 | M(Or, unused) \ |
| 72 | M(Ror, unused) \ |
| 73 | M(Xor, unused) \ |
| 74 | M(Shl, unused) \ |
| 75 | M(Shr, unused) \ |
| 76 | M(UShr, unused) \ |
| 77 | M(Mul, unused) \ |
| 78 | M(Div, unused) \ |
| 79 | M(Condition, unused) \ |
| 80 | M(Compare, unused) \ |
| 81 | M(BoundsCheck, unused) \ |
| 82 | M(PredicatedInstanceFieldGet, unused) \ |
| 83 | M(InstanceFieldGet, unused) \ |
| 84 | M(InstanceFieldSet, unused) \ |
| 85 | M(InstanceOf, unused) \ |
| 86 | M(Invoke, unused) \ |
| 87 | M(LoadString, unused) \ |
| 88 | M(NewArray, unused) \ |
| 89 | M(NewInstance, unused) \ |
| 90 | M(Rem, unused) \ |
| 91 | M(StaticFieldGet, unused) \ |
| 92 | M(StaticFieldSet, unused) \ |
| 93 | M(SuspendCheck, unused) \ |
| 94 | M(TypeConversion, unused) |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 95 | |
| 96 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ |
| 97 | M(BitwiseNegatedRight, unused) \ |
| 98 | M(MultiplyAccumulate, unused) \ |
| 99 | M(IntermediateAddress, unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 100 | M(IntermediateAddressIndex, unused) \ |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 101 | M(DataProcWithShifterOp, unused) |
| 102 | |
| 103 | #define DECLARE_VISIT_INSTRUCTION(type, unused) \ |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 104 | void Visit##type(H##type* instruction) override; |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 105 | |
| 106 | FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 107 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 108 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) |
| 109 | |
| 110 | #undef DECLARE_VISIT_INSTRUCTION |
| 111 | |
| 112 | private: |
xueliang.zhong | bf9e21a | 2017-06-15 11:01:11 +0100 | [diff] [blame] | 113 | bool CanGenerateTest(HCondition* cond); |
| 114 | void HandleGenerateConditionWithZero(IfCondition cond); |
| 115 | void HandleGenerateLongTestConstant(HCondition* cond); |
| 116 | void HandleGenerateLongTest(HCondition* cond); |
| 117 | void HandleGenerateLongComparesAndJumps(); |
| 118 | void HandleGenerateTest(HCondition* cond); |
| 119 | void HandleGenerateConditionGeneric(HCondition* cond); |
| 120 | void HandleGenerateEqualLong(HCondition* cond); |
| 121 | void HandleGenerateConditionLong(HCondition* cond); |
| 122 | void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); |
| 123 | void HandleCondition(HCondition* instr); |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 124 | void HandleBinaryOperationLantencies(HBinaryOperation* instr); |
| 125 | void HandleBitwiseOperationLantencies(HBinaryOperation* instr); |
| 126 | void HandleShiftLatencies(HBinaryOperation* instr); |
| 127 | void HandleDivRemConstantIntegralLatencies(int32_t imm); |
| 128 | void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 129 | void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 130 | void HandleGenerateDataProcInstruction(bool internal_latency = false); |
| 131 | void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); |
| 132 | void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); |
| 133 | |
| 134 | // The latency setting for each HInstruction depends on how CodeGenerator may generate code, |
| 135 | // latency visitors may query CodeGenerator for such information for accurate latency settings. |
| 136 | CodeGeneratorARMType* codegen_; |
| 137 | }; |
| 138 | |
| 139 | class HSchedulerARM : public HScheduler { |
| 140 | public: |
Vladimir Marko | ced0483 | 2018-07-26 14:42:17 +0100 | [diff] [blame] | 141 | HSchedulerARM(SchedulingNodeSelector* selector, |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 142 | SchedulingLatencyVisitorARM* arm_latency_visitor) |
Vladimir Marko | ced0483 | 2018-07-26 14:42:17 +0100 | [diff] [blame] | 143 | : HScheduler(arm_latency_visitor, selector) {} |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 144 | ~HSchedulerARM() override {} |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 145 | |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 146 | bool IsSchedulable(const HInstruction* instruction) const override { |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 147 | #define CASE_INSTRUCTION_KIND(type, unused) case \ |
| 148 | HInstruction::InstructionKind::k##type: |
| 149 | switch (instruction->GetKind()) { |
| 150 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) |
| 151 | return true; |
| 152 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) |
| 153 | return true; |
| 154 | default: |
| 155 | return HScheduler::IsSchedulable(instruction); |
| 156 | } |
| 157 | #undef CASE_INSTRUCTION_KIND |
| 158 | } |
| 159 | |
| 160 | private: |
| 161 | DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); |
| 162 | }; |
| 163 | |
| 164 | } // namespace arm |
| 165 | } // namespace art |
| 166 | |
| 167 | #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |