Merge "Deduplicate stackmaps at BitTable level."
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index da6c711..e1b6575 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -292,6 +292,12 @@
}
}
+template<typename Writer, typename Builder>
+ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) {
+ out.WriteBit(false); // Is not deduped.
+ bit_table.Encode(out);
+}
+
size_t StackMapStream::PrepareForFillIn() {
DCHECK_EQ(out_.size(), 0u);
@@ -309,13 +315,13 @@
EncodeUnsignedLeb128(&out_, fp_spill_mask_);
EncodeUnsignedLeb128(&out_, num_dex_registers_);
BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&out_, out_.size() * kBitsPerByte);
- stack_maps_.Encode(out);
- inline_infos_.Encode(out);
- register_masks_.Encode(out);
- stack_masks_.Encode(out);
- dex_register_masks_.Encode(out);
- dex_register_maps_.Encode(out);
- dex_register_catalog_.Encode(out);
+ EncodeTable(out, stack_maps_);
+ EncodeTable(out, inline_infos_);
+ EncodeTable(out, register_masks_);
+ EncodeTable(out, stack_masks_);
+ EncodeTable(out, dex_register_masks_);
+ EncodeTable(out, dex_register_maps_);
+ EncodeTable(out, dex_register_catalog_);
return out_.size();
}
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 42f9789..16a9216 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -758,4 +758,48 @@
stack_map2.GetStackMaskIndex());
}
+TEST(StackMapTest, TestDedupeBitTables) {
+ MallocArenaPool pool;
+ ArenaStack arena_stack(&pool);
+ ScopedArenaAllocator allocator(&arena_stack);
+ StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
+
+ stream.BeginStackMapEntry(0, 64 * kPcAlign);
+ stream.AddDexRegisterEntry(Kind::kInStack, 0);
+ stream.AddDexRegisterEntry(Kind::kConstant, -2);
+ stream.EndStackMapEntry();
+
+ stream.EndMethod();
+ std::vector<uint8_t> memory(stream.PrepareForFillIn());
+ MemoryRegion region(memory.data(), memory.size());
+ stream.FillInCodeInfo(region);
+
+ std::vector<uint8_t> out;
+ CodeInfo::DedupeMap dedupe_map;
+ size_t deduped1 = CodeInfo::Dedupe(&out, memory.data(), &dedupe_map);
+ size_t deduped2 = CodeInfo::Dedupe(&out, memory.data(), &dedupe_map);
+
+ for (size_t deduped : { deduped1, deduped2 }) {
+ CodeInfo code_info(out.data() + deduped);
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+
+ ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind());
+ ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes());
+ ASSERT_EQ(-2, dex_register_map[1].GetConstant());
+ }
+
+ ASSERT_GT(memory.size() * 2, out.size());
+}
+
} // namespace art
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 20ae19a..d34f7b5 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -26,6 +26,7 @@
#include "base/bit_vector-inl.h"
#include "base/enums.h"
#include "base/file_magic.h"
+#include "base/indenter.h"
#include "base/logging.h" // For VLOG
#include "base/os.h"
#include "base/safe_map.h"
@@ -65,6 +66,7 @@
#include "profile/profile_compilation_info.h"
#include "quicken_info.h"
#include "scoped_thread_state_change-inl.h"
+#include "stack_map.h"
#include "utils/dex_cache_arrays_layout-inl.h"
#include "vdex_file.h"
#include "verifier/verifier_deps.h"
@@ -1443,6 +1445,8 @@
class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
public:
+ static constexpr bool kDebugVerifyDedupedCodeInfo = false;
+
InitMapMethodVisitor(OatWriter* writer, size_t offset)
: OatDexMethodVisitor(writer, offset) {}
@@ -1456,15 +1460,24 @@
DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(), 0u);
ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
- uint32_t map_size = map.size() * sizeof(map[0]);
- if (map_size != 0u) {
- size_t offset = dedupe_map_.GetOrCreate(
- map.data(),
- [this, map_size]() {
- uint32_t new_offset = offset_;
- offset_ += map_size;
- return new_offset;
- });
+ if (map.size() != 0u) {
+ // Deduplicate the inner bittables within the CodeInfo.
+ std::vector<uint8_t>* data = &writer_->code_info_data_;
+ size_t offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() {
+ size_t deduped_offset = CodeInfo::Dedupe(data, map.data(), &dedupe_bit_table_);
+ if (kDebugVerifyDedupedCodeInfo) {
+ InstructionSet isa = writer_->GetCompilerOptions().GetInstructionSet();
+ MethodInfo method_info(compiled_method->GetMethodInfo().data());
+ std::stringstream old_code_info;
+ VariableIndentationOutputStream old_vios(&old_code_info);
+ std::stringstream new_code_info;
+ VariableIndentationOutputStream new_vios(&new_code_info);
+ CodeInfo(map.data()).Dump(&old_vios, 0, true, isa, method_info);
+ CodeInfo(data->data() + deduped_offset).Dump(&new_vios, 0, true, isa, method_info);
+ DCHECK_EQ(old_code_info.str(), new_code_info.str());
+ }
+ return offset_ + deduped_offset;
+ });
// Code offset is not initialized yet, so set the map offset to 0u-offset.
DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
oat_class->method_headers_[method_offsets_index_].SetVmapTableOffset(0u - offset);
@@ -1476,9 +1489,13 @@
}
private:
- // Deduplication is already done on a pointer basis by the compiler driver,
- // so we can simply compare the pointers to find out if things are duplicated.
- SafeMap<const uint8_t*, uint32_t> dedupe_map_;
+ // Deduplicate at CodeInfo level. The value is byte offset within code_info_data_.
+ // This deduplicates the whole CodeInfo object without going into the inner tables.
+ // The compiler already deduplicated the pointers but it did not dedupe the tables.
+ SafeMap<const uint8_t*, size_t> dedupe_code_info_;
+
+ // Deduplicate at BitTable level. The value is bit offset within code_info_data_.
+ std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> dedupe_bit_table_;
};
class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor {
@@ -2028,68 +2045,6 @@
}
};
-class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
- public:
- WriteMapMethodVisitor(OatWriter* writer,
- OutputStream* out,
- const size_t file_offset,
- size_t relative_offset)
- : OatDexMethodVisitor(writer, relative_offset),
- out_(out),
- file_offset_(file_offset) {}
-
- bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE {
- OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
- const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
-
- if (HasCompiledCode(compiled_method)) {
- size_t file_offset = file_offset_;
- OutputStream* out = out_;
-
- uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset();
- uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
- ++method_offsets_index_;
-
- DCHECK((compiled_method->GetVmapTable().size() == 0u && map_offset == 0u) ||
- (compiled_method->GetVmapTable().size() != 0u && map_offset != 0u))
- << compiled_method->GetVmapTable().size() << " " << map_offset << " "
- << dex_file_->PrettyMethod(it.GetMemberIndex());
-
- // If vdex is enabled, only emit the map for compiled code. The quickening info
- // is emitted in the vdex already.
- if (map_offset != 0u) {
- // Transform map_offset to actual oat data offset.
- map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
- DCHECK_NE(map_offset, 0u);
- DCHECK_LE(map_offset, offset_) << dex_file_->PrettyMethod(it.GetMemberIndex());
-
- ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
- size_t map_size = map.size() * sizeof(map[0]);
- if (map_offset == offset_) {
- // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
- if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
- ReportWriteFailure(it);
- return false;
- }
- offset_ += map_size;
- }
- }
- DCHECK_OFFSET_();
- }
-
- return true;
- }
-
- private:
- OutputStream* const out_;
- size_t const file_offset_;
-
- void ReportWriteFailure(const ClassDataItemIterator& it) {
- PLOG(ERROR) << "Failed to write map for "
- << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
- }
-};
-
class OatWriter::WriteMethodInfoVisitor : public OatDexMethodVisitor {
public:
WriteMethodInfoVisitor(OatWriter* writer,
@@ -2236,7 +2191,7 @@
InitMapMethodVisitor visitor(this, offset);
bool success = VisitDexMethods(&visitor);
DCHECK(success);
- offset = visitor.GetOffset();
+ offset += code_info_data_.size();
}
{
InitMethodInfoVisitor visitor(this, offset);
@@ -3090,13 +3045,11 @@
size_t OatWriter::WriteMaps(OutputStream* out, size_t file_offset, size_t relative_offset) {
{
- size_t vmap_tables_offset = relative_offset;
- WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
- if (UNLIKELY(!VisitDexMethods(&visitor))) {
+ if (UNLIKELY(!out->WriteFully(code_info_data_.data(), code_info_data_.size()))) {
return 0;
}
- relative_offset = visitor.GetOffset();
- size_vmap_table_ = relative_offset - vmap_tables_offset;
+ relative_offset += code_info_data_.size();
+ size_vmap_table_ = code_info_data_.size();
}
{
size_t method_infos_offset = relative_offset;
diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h
index 1618810..d14ddab 100644
--- a/dex2oat/linker/oat_writer.h
+++ b/dex2oat/linker/oat_writer.h
@@ -384,6 +384,8 @@
dchecked_vector<debug::MethodDebugInfo> method_info_;
+ std::vector<uint8_t> code_info_data_;
+
const CompilerDriver* compiler_driver_;
const CompilerOptions& compiler_options_;
ImageWriter* image_writer_;
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 07c1611..7d8de39 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -28,11 +28,33 @@
// abstracting away the bit start offset to avoid needing passing as an argument everywhere.
class BitMemoryRegion FINAL : public ValueObject {
public:
+ struct Less {
+ constexpr bool operator()(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) const {
+ if (lhs.size_in_bits() != rhs.size_in_bits()) {
+ return lhs.size_in_bits() < rhs.size_in_bits();
+ }
+ size_t bit = 0;
+ constexpr size_t kNumBits = BitSizeOf<uint32_t>();
+ for (; bit + kNumBits <= lhs.size_in_bits(); bit += kNumBits) {
+ uint32_t lhs_bits = lhs.LoadBits(bit, kNumBits);
+ uint32_t rhs_bits = rhs.LoadBits(bit, kNumBits);
+ if (lhs_bits != rhs_bits) {
+ return lhs_bits < rhs_bits;
+ }
+ }
+ size_t num_bits = lhs.size_in_bits() - bit;
+ return lhs.LoadBits(bit, num_bits) < rhs.LoadBits(bit, num_bits);
+ }
+ };
+
BitMemoryRegion() = default;
+ ALWAYS_INLINE BitMemoryRegion(void* data, size_t bit_start, size_t bit_size)
+ : data_(reinterpret_cast<uintptr_t*>(AlignDown(data, sizeof(uintptr_t)))),
+ bit_start_(bit_start + 8 * (reinterpret_cast<uintptr_t>(data) % sizeof(uintptr_t))),
+ bit_size_(bit_size) {
+ }
ALWAYS_INLINE explicit BitMemoryRegion(MemoryRegion region)
- : data_(reinterpret_cast<uintptr_t*>(AlignDown(region.pointer(), sizeof(uintptr_t)))),
- bit_start_(8 * (reinterpret_cast<uintptr_t>(region.pointer()) % sizeof(uintptr_t))),
- bit_size_(region.size_in_bits()) {
+ : BitMemoryRegion(region.begin(), /* bit_start */ 0, region.size_in_bits()) {
}
ALWAYS_INLINE BitMemoryRegion(MemoryRegion region, size_t bit_offset, size_t bit_length)
: BitMemoryRegion(region) {
@@ -176,9 +198,8 @@
class BitMemoryReader {
public:
- explicit BitMemoryReader(const uint8_t* data, size_t bit_offset = 0) {
- MemoryRegion region(const_cast<uint8_t*>(data), BitsToBytesRoundUp(bit_offset));
- finished_region_ = BitMemoryRegion(region, 0, bit_offset);
+ explicit BitMemoryReader(const uint8_t* data, size_t bit_offset = 0)
+ : finished_region_(const_cast<uint8_t*>(data), /* bit_start */ 0, bit_offset) {
DCHECK_EQ(GetBitOffset(), bit_offset);
}
@@ -188,10 +209,19 @@
return finished_region_.Extend(bit_length);
}
+ // Get the most recently read bits.
+ ALWAYS_INLINE BitMemoryRegion Tail(size_t bit_length) {
+ return finished_region_.Subregion(finished_region_.size_in_bits() - bit_length, bit_length);
+ }
+
ALWAYS_INLINE uint32_t ReadBits(size_t bit_length) {
return finished_region_.Extend(bit_length).LoadBits(0, bit_length);
}
+ ALWAYS_INLINE bool ReadBit() {
+ return finished_region_.Extend(1).LoadBit(0);
+ }
+
private:
// Represents all of the bits which were read so far. There is no upper bound.
// Therefore, by definition, the "cursor" is always at the end of the region.
@@ -215,6 +245,7 @@
ALWAYS_INLINE BitMemoryRegion Allocate(size_t bit_length) {
out_->resize(BitsToBytesRoundUp(bit_offset_ + bit_length));
BitMemoryRegion region(MemoryRegion(out_->data(), out_->size()), bit_offset_, bit_length);
+ DCHECK_LE(bit_length, std::numeric_limits<size_t>::max() - bit_offset_) << "Overflow";
bit_offset_ += bit_length;
return region;
}
@@ -223,6 +254,14 @@
Allocate(bit_length).StoreBits(0, value, bit_length);
}
+ ALWAYS_INLINE void WriteBit(bool value) {
+ Allocate(1).StoreBit(0, value);
+ }
+
+ ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) {
+ Allocate(region.size_in_bits()).StoreBits(0, region, region.size_in_bits());
+ }
+
private:
Vector* out_;
size_t bit_offset_;
diff --git a/runtime/oat.h b/runtime/oat.h
index 3939eec..c286f46 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- // Last oat version changed reason: Remove frame info from OatQuickMethodHeader.
- static constexpr uint8_t kOatVersion[] = { '1', '5', '6', '\0' };
+ // Last oat version changed reason: Deduplicate stackmaps at BitTable level.
+ static constexpr uint8_t kOatVersion[] = { '1', '5', '7', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 62b9f35..e8746bc 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -31,6 +31,24 @@
: CodeInfo(header->GetOptimizedCodeInfoPtr(), flags) {
}
+template<typename Accessor>
+ALWAYS_INLINE static void DecodeTable(BitTable<Accessor>& table,
+ BitMemoryReader& reader,
+ const uint8_t* data) {
+ bool is_deduped = reader.ReadBit();
+ if (is_deduped) {
+ // 'data' points to the start of the reader's data.
+ uint32_t current_bit_offset = reader.GetBitOffset();
+ uint32_t bit_offset_backwards = DecodeVarintBits(reader) - current_bit_offset;
+ uint32_t byte_offset_backwards = BitsToBytesRoundUp(bit_offset_backwards);
+ BitMemoryReader reader2(data - byte_offset_backwards,
+ byte_offset_backwards * kBitsPerByte - bit_offset_backwards);
+ table.Decode(reader2);
+ } else {
+ table.Decode(reader);
+ }
+}
+
void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
const uint8_t* begin = data;
frame_size_in_bytes_ = DecodeUnsignedLeb128(&data);
@@ -38,19 +56,57 @@
fp_spill_mask_ = DecodeUnsignedLeb128(&data);
number_of_dex_registers_ = DecodeUnsignedLeb128(&data);
BitMemoryReader reader(data, /* bit_offset */ 0);
- stack_maps_.Decode(reader);
- inline_infos_.Decode(reader);
+ DecodeTable(stack_maps_, reader, data);
+ DecodeTable(inline_infos_, reader, data);
if (flags & DecodeFlags::InlineInfoOnly) {
return;
}
- register_masks_.Decode(reader);
- stack_masks_.Decode(reader);
- dex_register_masks_.Decode(reader);
- dex_register_maps_.Decode(reader);
- dex_register_catalog_.Decode(reader);
+ DecodeTable(register_masks_, reader, data);
+ DecodeTable(stack_masks_, reader, data);
+ DecodeTable(dex_register_masks_, reader, data);
+ DecodeTable(dex_register_maps_, reader, data);
+ DecodeTable(dex_register_catalog_, reader, data);
size_in_bits_ = (data - begin) * kBitsPerByte + reader.GetBitOffset();
}
+template<typename Accessor>
+ALWAYS_INLINE static void DedupeTable(BitMemoryWriter<std::vector<uint8_t>>& writer,
+ BitMemoryReader& reader,
+ CodeInfo::DedupeMap* dedupe_map) {
+ bool is_deduped = reader.ReadBit();
+ DCHECK(!is_deduped);
+ BitTable<Accessor> bit_table(reader);
+ BitMemoryRegion region = reader.Tail(bit_table.BitSize());
+ auto it = dedupe_map->insert(std::make_pair(region, writer.GetBitOffset() + 1 /* dedupe bit */));
+ if (it.second /* new bit table */ || region.size_in_bits() < 32) {
+ writer.WriteBit(false); // Is not deduped.
+ writer.WriteRegion(region);
+ } else {
+ writer.WriteBit(true); // Is deduped.
+ EncodeVarintBits(writer, writer.GetBitOffset() - it.first->second);
+ }
+}
+
+size_t CodeInfo::Dedupe(std::vector<uint8_t>* out, const uint8_t* in, DedupeMap* dedupe_map) {
+ // Remember the current offset in the output buffer so that we can return it later.
+ const size_t result = out->size();
+ // Copy the header which encodes QuickMethodFrameInfo.
+ EncodeUnsignedLeb128(out, DecodeUnsignedLeb128(&in));
+ EncodeUnsignedLeb128(out, DecodeUnsignedLeb128(&in));
+ EncodeUnsignedLeb128(out, DecodeUnsignedLeb128(&in));
+ EncodeUnsignedLeb128(out, DecodeUnsignedLeb128(&in));
+ BitMemoryReader reader(in, /* bit_offset */ 0);
+ BitMemoryWriter<std::vector<uint8_t>> writer(out, /* bit_offset */ out->size() * kBitsPerByte);
+ DedupeTable<StackMap>(writer, reader, dedupe_map);
+ DedupeTable<InlineInfo>(writer, reader, dedupe_map);
+ DedupeTable<RegisterMask>(writer, reader, dedupe_map);
+ DedupeTable<MaskInfo>(writer, reader, dedupe_map);
+ DedupeTable<MaskInfo>(writer, reader, dedupe_map);
+ DedupeTable<DexRegisterMapInfo>(writer, reader, dedupe_map);
+ DedupeTable<DexRegisterInfo>(writer, reader, dedupe_map);
+ return result;
+}
+
BitTable<StackMap>::const_iterator CodeInfo::BinarySearchNativePc(uint32_t packed_pc) const {
return std::partition_point(
stack_maps_.begin(),
@@ -217,10 +273,7 @@
bool verbose,
InstructionSet instruction_set,
const MethodInfo& method_info) const {
- vios->Stream()
- << "CodeInfo"
- << " BitSize=" << size_in_bits_
- << "\n";
+ vios->Stream() << "CodeInfo\n";
ScopedIndentation indent1(vios);
DumpTable<StackMap>(vios, "StackMaps", stack_maps_, verbose);
DumpTable<RegisterMask>(vios, "RegisterMasks", register_masks_, verbose);
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 928f0f2..909aaa5 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -418,6 +418,16 @@
DecodeUnsignedLeb128(&data));
}
+ typedef std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> DedupeMap;
+
+ // Copy CodeInfo data while de-duplicating the internal bit tables.
+ // The 'out' vector must be reused between Dedupe calls (it does not have to be empty).
+ // The 'dedupe_map' stores the bit offsets of bit tables within the 'out' vector.
+ // It returns the byte offset of the copied CodeInfo within the 'out' vector.
+ static size_t Dedupe(std::vector<uint8_t>* out,
+ const uint8_t* in,
+ /*inout*/ DedupeMap* dedupe_map);
+
private:
// Returns lower bound (fist stack map which has pc greater or equal than the desired one).
// It ignores catch stack maps at the end (it is the same as if they had maximum pc value).