| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |
| #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |
| |
| #include "arch/instruction_set.h" |
| #include "arch/instruction_set_features.h" |
| #include "base/bit_field.h" |
| #include "driver/compiler_options.h" |
| #include "globals.h" |
| #include "locations.h" |
| #include "memory_region.h" |
| #include "nodes.h" |
| #include "stack_map_stream.h" |
| |
| namespace art { |
| |
| static size_t constexpr kVRegSize = 4; |
| |
| // Binary encoding of 2^32 for type double. |
| static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); |
| // Binary encoding of 2^31 for type double. |
| static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); |
| |
| // Maximum value for a primitive integer. |
| static int32_t constexpr kPrimIntMax = 0x7fffffff; |
| // Maximum value for a primitive long. |
| static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; |
| |
| class Assembler; |
| class CodeGenerator; |
| class DexCompilationUnit; |
| class ParallelMoveResolver; |
| class SrcMapElem; |
| template <class Alloc> |
| class SrcMap; |
| using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; |
| |
| class CodeAllocator { |
| public: |
| CodeAllocator() {} |
| virtual ~CodeAllocator() {} |
| |
| virtual uint8_t* Allocate(size_t size) = 0; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(CodeAllocator); |
| }; |
| |
| struct PcInfo { |
| uint32_t dex_pc; |
| uintptr_t native_pc; |
| }; |
| |
| class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { |
| public: |
| SlowPathCode() {} |
| virtual ~SlowPathCode() {} |
| |
| virtual void EmitNativeCode(CodeGenerator* codegen) = 0; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(SlowPathCode); |
| }; |
| |
| class CodeGenerator { |
| public: |
| // Compiles the graph to executable instructions. Returns whether the compilation |
| // succeeded. |
| void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); |
| void CompileOptimized(CodeAllocator* allocator); |
| static CodeGenerator* Create(HGraph* graph, |
| InstructionSet instruction_set, |
| const InstructionSetFeatures& isa_features, |
| const CompilerOptions& compiler_options); |
| virtual ~CodeGenerator() {} |
| |
| HGraph* GetGraph() const { return graph_; } |
| |
| HBasicBlock* GetNextBlockToEmit() const; |
| HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; |
| bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; |
| |
| size_t GetStackSlotOfParameter(HParameterValue* parameter) const { |
| // Note that this follows the current calling convention. |
| return GetFrameSize() |
| + kVRegSize // Art method |
| + parameter->GetIndex() * kVRegSize; |
| } |
| |
| virtual void Initialize() = 0; |
| virtual void Finalize(CodeAllocator* allocator); |
| virtual void GenerateFrameEntry() = 0; |
| virtual void GenerateFrameExit() = 0; |
| virtual void Bind(HBasicBlock* block) = 0; |
| virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; |
| virtual Assembler* GetAssembler() = 0; |
| virtual size_t GetWordSize() const = 0; |
| virtual size_t GetFloatingPointSpillSlotSize() const = 0; |
| virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; |
| void InitializeCodeGeneration(size_t number_of_spill_slots, |
| size_t maximum_number_of_live_core_registers, |
| size_t maximum_number_of_live_fp_registers, |
| size_t number_of_out_slots, |
| const GrowableArray<HBasicBlock*>& block_order); |
| int32_t GetStackSlot(HLocal* local) const; |
| Location GetTemporaryLocation(HTemporary* temp) const; |
| |
| uint32_t GetFrameSize() const { return frame_size_; } |
| void SetFrameSize(uint32_t size) { frame_size_ = size; } |
| uint32_t GetCoreSpillMask() const { return core_spill_mask_; } |
| uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } |
| |
| size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } |
| size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } |
| virtual void SetupBlockedRegisters(bool is_baseline) const = 0; |
| |
| virtual void ComputeSpillMask() { |
| core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; |
| DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; |
| fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; |
| } |
| |
| static uint32_t ComputeRegisterMask(const int* registers, size_t length) { |
| uint32_t mask = 0; |
| for (size_t i = 0, e = length; i < e; ++i) { |
| mask |= (1 << registers[i]); |
| } |
| return mask; |
| } |
| |
| virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; |
| virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; |
| virtual InstructionSet GetInstructionSet() const = 0; |
| |
| const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } |
| |
| // Saves the register in the stack. Returns the size taken on stack. |
| virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; |
| // Restores the register from the stack. Returns the size taken on stack. |
| virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; |
| virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { |
| UNUSED(stack_index, reg_id); |
| UNIMPLEMENTED(FATAL); |
| UNREACHABLE(); |
| } |
| virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { |
| UNUSED(stack_index, reg_id); |
| UNIMPLEMENTED(FATAL); |
| UNREACHABLE(); |
| } |
| virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; |
| |
| bool IsCoreCalleeSaveRegister(int reg) const { |
| return (core_callee_save_mask_ & (1 << reg)) != 0; |
| } |
| |
| bool IsFloatingPointCalleeSaveRegister(int reg) const { |
| return (fpu_callee_save_mask_ & (1 << reg)) != 0; |
| } |
| |
| void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); |
| bool CanMoveNullCheckToUser(HNullCheck* null_check); |
| void MaybeRecordImplicitNullCheck(HInstruction* instruction); |
| |
| void AddSlowPath(SlowPathCode* slow_path) { |
| slow_paths_.Add(slow_path); |
| } |
| |
| void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; |
| void BuildVMapTable(std::vector<uint8_t>* vector) const; |
| void BuildNativeGCMap( |
| std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; |
| void BuildStackMaps(std::vector<uint8_t>* vector); |
| void SaveLiveRegisters(LocationSummary* locations); |
| void RestoreLiveRegisters(LocationSummary* locations); |
| |
| bool IsLeafMethod() const { |
| return is_leaf_; |
| } |
| |
| void MarkNotLeaf() { |
| is_leaf_ = false; |
| requires_current_method_ = true; |
| } |
| |
| void SetRequiresCurrentMethod() { |
| requires_current_method_ = true; |
| } |
| |
| bool RequiresCurrentMethod() const { |
| return requires_current_method_; |
| } |
| |
| // Clears the spill slots taken by loop phis in the `LocationSummary` of the |
| // suspend check. This is called when the code generator generates code |
| // for the suspend check at the back edge (instead of where the suspend check |
| // is, which is the loop entry). At this point, the spill slots for the phis |
| // have not been written to. |
| void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; |
| |
| bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } |
| bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } |
| |
| // Helper that returns the pointer offset of an index in an object array. |
| // Note: this method assumes we always have the same pointer size, regardless |
| // of the architecture. |
| static size_t GetCacheOffset(uint32_t index); |
| |
| void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); |
| |
| static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { |
| if (kIsDebugBuild) { |
| if (type == Primitive::kPrimNot && value->IsIntConstant()) { |
| CHECK_EQ(value->AsIntConstant()->GetValue(), 0); |
| } |
| } |
| return type == Primitive::kPrimNot && !value->IsIntConstant(); |
| } |
| |
| void AddAllocatedRegister(Location location) { |
| allocated_registers_.Add(location); |
| } |
| |
| void AllocateLocations(HInstruction* instruction); |
| |
| // Tells whether the stack frame of the compiled method is |
| // considered "empty", that is either actually having a size of zero, |
| // or just containing the saved return address register. |
| bool HasEmptyFrame() const { |
| return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); |
| } |
| |
| static int32_t GetInt32ValueOf(HConstant* constant) { |
| if (constant->IsIntConstant()) { |
| return constant->AsIntConstant()->GetValue(); |
| } else if (constant->IsNullConstant()) { |
| return 0; |
| } else { |
| DCHECK(constant->IsFloatConstant()); |
| return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); |
| } |
| } |
| |
| static int64_t GetInt64ValueOf(HConstant* constant) { |
| if (constant->IsIntConstant()) { |
| return constant->AsIntConstant()->GetValue(); |
| } else if (constant->IsNullConstant()) { |
| return 0; |
| } else if (constant->IsFloatConstant()) { |
| return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); |
| } else if (constant->IsLongConstant()) { |
| return constant->AsLongConstant()->GetValue(); |
| } else { |
| DCHECK(constant->IsDoubleConstant()); |
| return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); |
| } |
| } |
| |
| protected: |
| CodeGenerator(HGraph* graph, |
| size_t number_of_core_registers, |
| size_t number_of_fpu_registers, |
| size_t number_of_register_pairs, |
| uint32_t core_callee_save_mask, |
| uint32_t fpu_callee_save_mask, |
| const CompilerOptions& compiler_options) |
| : frame_size_(0), |
| core_spill_mask_(0), |
| fpu_spill_mask_(0), |
| first_register_slot_in_slow_path_(0), |
| blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), |
| blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), |
| blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), |
| number_of_core_registers_(number_of_core_registers), |
| number_of_fpu_registers_(number_of_fpu_registers), |
| number_of_register_pairs_(number_of_register_pairs), |
| core_callee_save_mask_(core_callee_save_mask), |
| fpu_callee_save_mask_(fpu_callee_save_mask), |
| graph_(graph), |
| compiler_options_(compiler_options), |
| pc_infos_(graph->GetArena(), 32), |
| slow_paths_(graph->GetArena(), 8), |
| block_order_(nullptr), |
| current_block_index_(0), |
| is_leaf_(true), |
| requires_current_method_(false), |
| stack_map_stream_(graph->GetArena()) {} |
| |
| // Register allocation logic. |
| void AllocateRegistersLocally(HInstruction* instruction) const; |
| |
| // Backend specific implementation for allocating a register. |
| virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; |
| |
| static size_t FindFreeEntry(bool* array, size_t length); |
| static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); |
| |
| virtual Location GetStackLocation(HLoadLocal* load) const = 0; |
| |
| virtual ParallelMoveResolver* GetMoveResolver() = 0; |
| virtual HGraphVisitor* GetLocationBuilder() = 0; |
| virtual HGraphVisitor* GetInstructionVisitor() = 0; |
| |
| // Returns the location of the first spilled entry for floating point registers, |
| // relative to the stack pointer. |
| uint32_t GetFpuSpillStart() const { |
| return GetFrameSize() - FrameEntrySpillSize(); |
| } |
| |
| uint32_t GetFpuSpillSize() const { |
| return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); |
| } |
| |
| uint32_t GetCoreSpillSize() const { |
| return POPCOUNT(core_spill_mask_) * GetWordSize(); |
| } |
| |
| uint32_t FrameEntrySpillSize() const { |
| return GetFpuSpillSize() + GetCoreSpillSize(); |
| } |
| |
| bool HasAllocatedCalleeSaveRegisters() const { |
| // We check the core registers against 1 because it always comprises the return PC. |
| return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) |
| || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); |
| } |
| |
| bool CallPushesPC() const { |
| InstructionSet instruction_set = GetInstructionSet(); |
| return instruction_set == kX86 || instruction_set == kX86_64; |
| } |
| |
| // Arm64 has its own type for a label, so we need to templatize this method |
| // to share the logic. |
| template <typename T> |
| T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { |
| block = FirstNonEmptyBlock(block); |
| return raw_pointer_to_labels_array + block->GetBlockId(); |
| } |
| |
| // Frame size required for this method. |
| uint32_t frame_size_; |
| uint32_t core_spill_mask_; |
| uint32_t fpu_spill_mask_; |
| uint32_t first_register_slot_in_slow_path_; |
| |
| // Registers that were allocated during linear scan. |
| RegisterSet allocated_registers_; |
| |
| // Arrays used when doing register allocation to know which |
| // registers we can allocate. `SetupBlockedRegisters` updates the |
| // arrays. |
| bool* const blocked_core_registers_; |
| bool* const blocked_fpu_registers_; |
| bool* const blocked_register_pairs_; |
| size_t number_of_core_registers_; |
| size_t number_of_fpu_registers_; |
| size_t number_of_register_pairs_; |
| const uint32_t core_callee_save_mask_; |
| const uint32_t fpu_callee_save_mask_; |
| |
| private: |
| void InitLocationsBaseline(HInstruction* instruction); |
| size_t GetStackOffsetOfSavedRegister(size_t index); |
| void CompileInternal(CodeAllocator* allocator, bool is_baseline); |
| void BlockIfInRegister(Location location, bool is_out = false) const; |
| |
| HGraph* const graph_; |
| const CompilerOptions& compiler_options_; |
| |
| GrowableArray<PcInfo> pc_infos_; |
| GrowableArray<SlowPathCode*> slow_paths_; |
| |
| // The order to use for code generation. |
| const GrowableArray<HBasicBlock*>* block_order_; |
| |
| // The current block index in `block_order_` of the block |
| // we are generating code for. |
| size_t current_block_index_; |
| |
| // Whether the method is a leaf method. |
| bool is_leaf_; |
| |
| // Whether an instruction in the graph accesses the current method. |
| bool requires_current_method_; |
| |
| StackMapStream stack_map_stream_; |
| |
| DISALLOW_COPY_AND_ASSIGN(CodeGenerator); |
| }; |
| |
| template <typename C, typename F> |
| class CallingConvention { |
| public: |
| CallingConvention(const C* registers, |
| size_t number_of_registers, |
| const F* fpu_registers, |
| size_t number_of_fpu_registers) |
| : registers_(registers), |
| number_of_registers_(number_of_registers), |
| fpu_registers_(fpu_registers), |
| number_of_fpu_registers_(number_of_fpu_registers) {} |
| |
| size_t GetNumberOfRegisters() const { return number_of_registers_; } |
| size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } |
| |
| C GetRegisterAt(size_t index) const { |
| DCHECK_LT(index, number_of_registers_); |
| return registers_[index]; |
| } |
| |
| F GetFpuRegisterAt(size_t index) const { |
| DCHECK_LT(index, number_of_fpu_registers_); |
| return fpu_registers_[index]; |
| } |
| |
| size_t GetStackOffsetOf(size_t index) const { |
| // We still reserve the space for parameters passed by registers. |
| // Add one for the method pointer. |
| return (index + 1) * kVRegSize; |
| } |
| |
| private: |
| const C* registers_; |
| const size_t number_of_registers_; |
| const F* fpu_registers_; |
| const size_t number_of_fpu_registers_; |
| |
| DISALLOW_COPY_AND_ASSIGN(CallingConvention); |
| }; |
| |
| } // namespace art |
| |
| #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |