blob: d11222d9f4d6d1eeee19171c56cf47243fcd3987 [file] [log] [blame]
xueliang.zhongf7caf682017-03-01 16:07:02 +00001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19
20#include "code_generator_arm_vixl.h"
xueliang.zhongf7caf682017-03-01 16:07:02 +000021#include "scheduler.h"
22
Vladimir Marko0a516052019-10-14 13:00:44 +000023namespace art {
xueliang.zhongf7caf682017-03-01 16:07:02 +000024namespace arm {
Roland Levillain9983e302017-07-14 14:34:22 +010025// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
xueliang.zhongf7caf682017-03-01 16:07:02 +000026typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
xueliang.zhongf7caf682017-03-01 16:07:02 +000027
28// AArch32 instruction latencies.
29// We currently assume that all ARM CPUs share the same instruction latency list.
30// The following latencies were tuned based on performance experiments and
31// automatic tuning using differential evolution approach on various benchmarks.
32static constexpr uint32_t kArmIntegerOpLatency = 2;
33static constexpr uint32_t kArmFloatingPointOpLatency = 11;
34static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
35static constexpr uint32_t kArmMulIntegerLatency = 6;
36static constexpr uint32_t kArmMulFloatingPointLatency = 11;
37static constexpr uint32_t kArmDivIntegerLatency = 10;
38static constexpr uint32_t kArmDivFloatLatency = 20;
39static constexpr uint32_t kArmDivDoubleLatency = 25;
40static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
41static constexpr uint32_t kArmMemoryLoadLatency = 9;
42static constexpr uint32_t kArmMemoryStoreLatency = 9;
43static constexpr uint32_t kArmMemoryBarrierLatency = 6;
44static constexpr uint32_t kArmBranchLatency = 4;
45static constexpr uint32_t kArmCallLatency = 5;
46static constexpr uint32_t kArmCallInternalLatency = 29;
47static constexpr uint32_t kArmLoadStringInternalLatency = 10;
48static constexpr uint32_t kArmNopLatency = 2;
49static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
50static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
51
52class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
53 public:
54 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
55 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
56
57 // Default visitor for instructions not handled specifically below.
Yi Kong39402542019-03-24 02:47:16 -070058 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
xueliang.zhongf7caf682017-03-01 16:07:02 +000059 last_visited_latency_ = kArmIntegerOpLatency;
60 }
61
62// We add a second unused parameter to be able to use this macro like the others
63// defined in `nodes.h`.
Alex Light3a73ffb2021-01-25 14:11:05 +000064#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
65 M(ArrayGet, unused) \
66 M(ArrayLength, unused) \
67 M(ArraySet, unused) \
68 M(Add, unused) \
69 M(Sub, unused) \
70 M(And, unused) \
71 M(Or, unused) \
72 M(Ror, unused) \
73 M(Xor, unused) \
74 M(Shl, unused) \
75 M(Shr, unused) \
76 M(UShr, unused) \
77 M(Mul, unused) \
78 M(Div, unused) \
79 M(Condition, unused) \
80 M(Compare, unused) \
81 M(BoundsCheck, unused) \
82 M(PredicatedInstanceFieldGet, unused) \
83 M(InstanceFieldGet, unused) \
84 M(InstanceFieldSet, unused) \
85 M(InstanceOf, unused) \
86 M(Invoke, unused) \
87 M(LoadString, unused) \
88 M(NewArray, unused) \
89 M(NewInstance, unused) \
90 M(Rem, unused) \
91 M(StaticFieldGet, unused) \
92 M(StaticFieldSet, unused) \
93 M(SuspendCheck, unused) \
94 M(TypeConversion, unused)
xueliang.zhongf7caf682017-03-01 16:07:02 +000095
96#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
97 M(BitwiseNegatedRight, unused) \
98 M(MultiplyAccumulate, unused) \
99 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100100 M(IntermediateAddressIndex, unused) \
xueliang.zhongf7caf682017-03-01 16:07:02 +0000101 M(DataProcWithShifterOp, unused)
102
103#define DECLARE_VISIT_INSTRUCTION(type, unused) \
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100104 void Visit##type(H##type* instruction) override;
xueliang.zhongf7caf682017-03-01 16:07:02 +0000105
106 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
107 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
108 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
109
110#undef DECLARE_VISIT_INSTRUCTION
111
112 private:
xueliang.zhongbf9e21a2017-06-15 11:01:11 +0100113 bool CanGenerateTest(HCondition* cond);
114 void HandleGenerateConditionWithZero(IfCondition cond);
115 void HandleGenerateLongTestConstant(HCondition* cond);
116 void HandleGenerateLongTest(HCondition* cond);
117 void HandleGenerateLongComparesAndJumps();
118 void HandleGenerateTest(HCondition* cond);
119 void HandleGenerateConditionGeneric(HCondition* cond);
120 void HandleGenerateEqualLong(HCondition* cond);
121 void HandleGenerateConditionLong(HCondition* cond);
122 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
123 void HandleCondition(HCondition* instr);
xueliang.zhongf7caf682017-03-01 16:07:02 +0000124 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
125 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
126 void HandleShiftLatencies(HBinaryOperation* instr);
127 void HandleDivRemConstantIntegralLatencies(int32_t imm);
128 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
129 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
130 void HandleGenerateDataProcInstruction(bool internal_latency = false);
131 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
132 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
133
134 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
135 // latency visitors may query CodeGenerator for such information for accurate latency settings.
136 CodeGeneratorARMType* codegen_;
137};
138
139class HSchedulerARM : public HScheduler {
140 public:
Vladimir Markoced04832018-07-26 14:42:17 +0100141 HSchedulerARM(SchedulingNodeSelector* selector,
xueliang.zhongf7caf682017-03-01 16:07:02 +0000142 SchedulingLatencyVisitorARM* arm_latency_visitor)
Vladimir Markoced04832018-07-26 14:42:17 +0100143 : HScheduler(arm_latency_visitor, selector) {}
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100144 ~HSchedulerARM() override {}
xueliang.zhongf7caf682017-03-01 16:07:02 +0000145
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100146 bool IsSchedulable(const HInstruction* instruction) const override {
xueliang.zhongf7caf682017-03-01 16:07:02 +0000147#define CASE_INSTRUCTION_KIND(type, unused) case \
148 HInstruction::InstructionKind::k##type:
149 switch (instruction->GetKind()) {
150 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
151 return true;
152 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
153 return true;
154 default:
155 return HScheduler::IsSchedulable(instruction);
156 }
157#undef CASE_INSTRUCTION_KIND
158 }
159
160 private:
161 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
162};
163
164} // namespace arm
165} // namespace art
166
167#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_