Use partial TLAB regions
Instead of having 256K TLAB regions, have 256K TLABs split into
16K regions. This fixes pathological cases with multithreaded
allocation that caused many GCs since each thread reserving
256K would often bump the counter past the GC start threshold. Now
threads only bump the counter every 16K.
System wide results (average of 5 samples on N6P):
Total GC time 60s after starting shell: 45s -> 24s
Average .Heap PSS 60s after starting shell: 57900k -> 58682k
BinaryTrees gets around 5% slower, numbers are noisy.
Boot time: 13.302 -> 12.899 (average of 100 runs)
Bug: 35872915
Bug: 36216292
Test: test-art-host
(cherry picked from commit bf48003fa32d2845f2213c0ba31af6677715662d)
Change-Id: I5ab22420124eeadc0a53519c70112274101dfb39
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4a25610..28dd627 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -135,6 +135,13 @@
static const char* kRegionSpaceName = "main space (region space)";
+// If true, we log all GCs in the both the foreground and background. Used for debugging.
+static constexpr bool kLogAllGCs = false;
+
+// How much we grow the TLAB if we can do it.
+static constexpr size_t kPartialTlabSize = 16 * KB;
+static constexpr bool kUsePartialTlabs = true;
+
#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
// 300 MB (0x12c00000) - (default non-moving space capacity).
static uint8_t* const kPreferredAllocSpaceBegin =
@@ -2762,7 +2769,7 @@
const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes();
// Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC
// (mutator time blocked >= long_pause_log_threshold_).
- bool log_gc = gc_cause == kGcCauseExplicit;
+ bool log_gc = kLogAllGCs || gc_cause == kGcCauseExplicit;
if (!log_gc && CareAboutPauseTimes()) {
// GC for alloc pauses the allocating thread, so consider it as a pause.
log_gc = duration > long_gc_log_threshold_ ||
@@ -4182,7 +4189,21 @@
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
const AllocatorType allocator_type = GetCurrentAllocator();
- if (allocator_type == kAllocatorTypeTLAB) {
+ if (kUsePartialTlabs && alloc_size <= self->TlabRemainingCapacity()) {
+ DCHECK_GT(alloc_size, self->TlabSize());
+ // There is enough space if we grow the TLAB. Lets do that. This increases the
+ // TLAB bytes.
+ const size_t min_expand_size = alloc_size - self->TlabSize();
+ const size_t expand_bytes = std::max(
+ min_expand_size,
+ std::min(self->TlabRemainingCapacity() - self->TlabSize(), kPartialTlabSize));
+ if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, expand_bytes, grow))) {
+ return nullptr;
+ }
+ *bytes_tl_bulk_allocated = expand_bytes;
+ self->ExpandTlab(expand_bytes);
+ DCHECK_LE(alloc_size, self->TlabSize());
+ } else if (allocator_type == kAllocatorTypeTLAB) {
DCHECK(bump_pointer_space_ != nullptr);
const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
@@ -4202,15 +4223,18 @@
if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
space::RegionSpace::kRegionSize,
grow))) {
+ const size_t new_tlab_size = kUsePartialTlabs
+ ? std::max(alloc_size, kPartialTlabSize)
+ : gc::space::RegionSpace::kRegionSize;
// Try to allocate a tlab.
- if (!region_space_->AllocNewTlab(self)) {
+ if (!region_space_->AllocNewTlab(self, new_tlab_size)) {
// Failed to allocate a tlab. Try non-tlab.
return region_space_->AllocNonvirtual<false>(alloc_size,
bytes_allocated,
usable_size,
bytes_tl_bulk_allocated);
}
- *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+ *bytes_tl_bulk_allocated = new_tlab_size;
// Fall-through to using the TLAB below.
} else {
// Check OOME for a non-tlab allocation.