blob: b8f4572abd26103700c5be5527063826505777c1 [file] [log] [blame]
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
#include "base/bit_field.h"
#include "driver/compiler_options.h"
#include "globals.h"
#include "locations.h"
#include "memory_region.h"
#include "nodes.h"
#include "stack_map_stream.h"
namespace art {
static size_t constexpr kVRegSize = 4;
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
// Maximum value for a primitive integer.
static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
class Assembler;
class CodeGenerator;
class DexCompilationUnit;
class ParallelMoveResolver;
class SrcMapElem;
template <class Alloc>
class SrcMap;
using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
class CodeAllocator {
public:
CodeAllocator() {}
virtual ~CodeAllocator() {}
virtual uint8_t* Allocate(size_t size) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
};
struct PcInfo {
uint32_t dex_pc;
uintptr_t native_pc;
};
class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
public:
SlowPathCode() {}
virtual ~SlowPathCode() {}
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
class CodeGenerator {
public:
// Compiles the graph to executable instructions. Returns whether the compilation
// succeeded.
void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
void CompileOptimized(CodeAllocator* allocator);
static CodeGenerator* Create(HGraph* graph,
InstructionSet instruction_set,
const InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options);
virtual ~CodeGenerator() {}
HGraph* GetGraph() const { return graph_; }
HBasicBlock* GetNextBlockToEmit() const;
HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
// Note that this follows the current calling convention.
return GetFrameSize()
+ kVRegSize // Art method
+ parameter->GetIndex() * kVRegSize;
}
virtual void Initialize() = 0;
virtual void Finalize(CodeAllocator* allocator);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_number_of_live_core_registers,
size_t maximum_number_of_live_fp_registers,
size_t number_of_out_slots,
const GrowableArray<HBasicBlock*>& block_order);
int32_t GetStackSlot(HLocal* local) const;
Location GetTemporaryLocation(HTemporary* temp) const;
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
virtual void ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
}
static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
uint32_t mask = 0;
for (size_t i = 0, e = length; i < e; ++i) {
mask |= (1 << registers[i]);
}
return mask;
}
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
// Saves the register in the stack. Returns the size taken on stack.
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
bool IsCoreCalleeSaveRegister(int reg) const {
return (core_callee_save_mask_ & (1 << reg)) != 0;
}
bool IsFloatingPointCalleeSaveRegister(int reg) const {
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
void AddSlowPath(SlowPathCode* slow_path) {
slow_paths_.Add(slow_path);
}
void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
void BuildVMapTable(std::vector<uint8_t>* vector) const;
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
void BuildStackMaps(std::vector<uint8_t>* vector);
void SaveLiveRegisters(LocationSummary* locations);
void RestoreLiveRegisters(LocationSummary* locations);
bool IsLeafMethod() const {
return is_leaf_;
}
void MarkNotLeaf() {
is_leaf_ = false;
requires_current_method_ = true;
}
void SetRequiresCurrentMethod() {
requires_current_method_ = true;
}
bool RequiresCurrentMethod() const {
return requires_current_method_;
}
// Clears the spill slots taken by loop phis in the `LocationSummary` of the
// suspend check. This is called when the code generator generates code
// for the suspend check at the back edge (instead of where the suspend check
// is, which is the loop entry). At this point, the spill slots for the phis
// have not been written to.
void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
// Helper that returns the pointer offset of an index in an object array.
// Note: this method assumes we always have the same pointer size, regardless
// of the architecture.
static size_t GetCacheOffset(uint32_t index);
void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
if (kIsDebugBuild) {
if (type == Primitive::kPrimNot && value->IsIntConstant()) {
CHECK_EQ(value->AsIntConstant()->GetValue(), 0);
}
}
return type == Primitive::kPrimNot && !value->IsIntConstant();
}
void AddAllocatedRegister(Location location) {
allocated_registers_.Add(location);
}
void AllocateLocations(HInstruction* instruction);
// Tells whether the stack frame of the compiled method is
// considered "empty", that is either actually having a size of zero,
// or just containing the saved return address register.
bool HasEmptyFrame() const {
return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
}
static int32_t GetInt32ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else {
DCHECK(constant->IsFloatConstant());
return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
}
}
static int64_t GetInt64ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else if (constant->IsFloatConstant()) {
return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
} else if (constant->IsLongConstant()) {
return constant->AsLongConstant()->GetValue();
} else {
DCHECK(constant->IsDoubleConstant());
return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
}
}
protected:
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
size_t number_of_fpu_registers,
size_t number_of_register_pairs,
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options)
: frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
number_of_core_registers_(number_of_core_registers),
number_of_fpu_registers_(number_of_fpu_registers),
number_of_register_pairs_(number_of_register_pairs),
core_callee_save_mask_(core_callee_save_mask),
fpu_callee_save_mask_(fpu_callee_save_mask),
graph_(graph),
compiler_options_(compiler_options),
pc_infos_(graph->GetArena(), 32),
slow_paths_(graph->GetArena(), 8),
block_order_(nullptr),
current_block_index_(0),
is_leaf_(true),
requires_current_method_(false),
stack_map_stream_(graph->GetArena()) {}
// Register allocation logic.
void AllocateRegistersLocally(HInstruction* instruction) const;
// Backend specific implementation for allocating a register.
virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
static size_t FindFreeEntry(bool* array, size_t length);
static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
virtual ParallelMoveResolver* GetMoveResolver() = 0;
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
return GetFrameSize() - FrameEntrySpillSize();
}
uint32_t GetFpuSpillSize() const {
return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
}
uint32_t GetCoreSpillSize() const {
return POPCOUNT(core_spill_mask_) * GetWordSize();
}
uint32_t FrameEntrySpillSize() const {
return GetFpuSpillSize() + GetCoreSpillSize();
}
bool HasAllocatedCalleeSaveRegisters() const {
// We check the core registers against 1 because it always comprises the return PC.
return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
|| (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
}
bool CallPushesPC() const {
InstructionSet instruction_set = GetInstructionSet();
return instruction_set == kX86 || instruction_set == kX86_64;
}
// Arm64 has its own type for a label, so we need to templatize this method
// to share the logic.
template <typename T>
T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
block = FirstNonEmptyBlock(block);
return raw_pointer_to_labels_array + block->GetBlockId();
}
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
uint32_t fpu_spill_mask_;
uint32_t first_register_slot_in_slow_path_;
// Registers that were allocated during linear scan.
RegisterSet allocated_registers_;
// Arrays used when doing register allocation to know which
// registers we can allocate. `SetupBlockedRegisters` updates the
// arrays.
bool* const blocked_core_registers_;
bool* const blocked_fpu_registers_;
bool* const blocked_register_pairs_;
size_t number_of_core_registers_;
size_t number_of_fpu_registers_;
size_t number_of_register_pairs_;
const uint32_t core_callee_save_mask_;
const uint32_t fpu_callee_save_mask_;
private:
void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void CompileInternal(CodeAllocator* allocator, bool is_baseline);
void BlockIfInRegister(Location location, bool is_out = false) const;
HGraph* const graph_;
const CompilerOptions& compiler_options_;
GrowableArray<PcInfo> pc_infos_;
GrowableArray<SlowPathCode*> slow_paths_;
// The order to use for code generation.
const GrowableArray<HBasicBlock*>* block_order_;
// The current block index in `block_order_` of the block
// we are generating code for.
size_t current_block_index_;
// Whether the method is a leaf method.
bool is_leaf_;
// Whether an instruction in the graph accesses the current method.
bool requires_current_method_;
StackMapStream stack_map_stream_;
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
template <typename C, typename F>
class CallingConvention {
public:
CallingConvention(const C* registers,
size_t number_of_registers,
const F* fpu_registers,
size_t number_of_fpu_registers)
: registers_(registers),
number_of_registers_(number_of_registers),
fpu_registers_(fpu_registers),
number_of_fpu_registers_(number_of_fpu_registers) {}
size_t GetNumberOfRegisters() const { return number_of_registers_; }
size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
C GetRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_registers_);
return registers_[index];
}
F GetFpuRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_fpu_registers_);
return fpu_registers_[index];
}
size_t GetStackOffsetOf(size_t index) const {
// We still reserve the space for parameters passed by registers.
// Add one for the method pointer.
return (index + 1) * kVRegSize;
}
private:
const C* registers_;
const size_t number_of_registers_;
const F* fpu_registers_;
const size_t number_of_fpu_registers_;
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_