Use partial TLAB regions Instead of having 256K TLAB regions, have 256K TLABs split into 16K regions. This fixes pathological cases with multithreaded allocation that caused many GCs since each thread reserving 256K would often bump the counter past the GC start threshold. Now threads only bump the counter every 16K. System wide results (average of 5 samples on N6P): Total GC time 60s after starting shell: 45s -> 24s Average .Heap PSS 60s after starting shell: 57900k -> 58682k BinaryTrees gets around 5% slower, numbers are noisy. Boot time: 13.302 -> 12.899 (average of 100 runs) Bug: 35872915 Bug: 36216292 Test: test-art-host (cherry picked from commit bf48003fa32d2845f2213c0ba31af6677715662d) Change-Id: I5ab22420124eeadc0a53519c70112274101dfb39

commit: 6bc7774426cc0b6bbab5566fa62b3c509455e583 [log] [tgz]
author: Mathieu Chartier <mathieuc@google.com> Tue Apr 18 17:46:23 2017 -0700
committer: Mathieu Chartier <mathieuc@google.com> Wed Apr 19 21:26:56 2017 -0700
tree: 06c47a48c43924e8cdc80ed3ec31b8fddb4b39b6
parent: 8d0f3aaf28358697ec812955cdf975ca6c6ff901 [diff] [blame]
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4a25610..28dd627 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -135,6 +135,13 @@
 
 static const char* kRegionSpaceName = "main space (region space)";
 
+// If true, we log all GCs in the both the foreground and background. Used for debugging.
+static constexpr bool kLogAllGCs = false;
+
+// How much we grow the TLAB if we can do it.
+static constexpr size_t kPartialTlabSize = 16 * KB;
+static constexpr bool kUsePartialTlabs = true;
+
 #if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
 // 300 MB (0x12c00000) - (default non-moving space capacity).
 static uint8_t* const kPreferredAllocSpaceBegin =
@@ -2762,7 +2769,7 @@
   const std::vector<uint64_t>& pause_times = GetCurrentGcIteration()->GetPauseTimes();
   // Print the GC if it is an explicit GC (e.g. Runtime.gc()) or a slow GC
   // (mutator time blocked >= long_pause_log_threshold_).
-  bool log_gc = gc_cause == kGcCauseExplicit;
+  bool log_gc = kLogAllGCs || gc_cause == kGcCauseExplicit;
   if (!log_gc && CareAboutPauseTimes()) {
     // GC for alloc pauses the allocating thread, so consider it as a pause.
     log_gc = duration > long_gc_log_threshold_ ||
@@ -4182,7 +4189,21 @@
                                        size_t* usable_size,
                                        size_t* bytes_tl_bulk_allocated) {
   const AllocatorType allocator_type = GetCurrentAllocator();
-  if (allocator_type == kAllocatorTypeTLAB) {
+  if (kUsePartialTlabs && alloc_size <= self->TlabRemainingCapacity()) {
+    DCHECK_GT(alloc_size, self->TlabSize());
+    // There is enough space if we grow the TLAB. Lets do that. This increases the
+    // TLAB bytes.
+    const size_t min_expand_size = alloc_size - self->TlabSize();
+    const size_t expand_bytes = std::max(
+        min_expand_size,
+        std::min(self->TlabRemainingCapacity() - self->TlabSize(), kPartialTlabSize));
+    if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, expand_bytes, grow))) {
+      return nullptr;
+    }
+    *bytes_tl_bulk_allocated = expand_bytes;
+    self->ExpandTlab(expand_bytes);
+    DCHECK_LE(alloc_size, self->TlabSize());
+  } else if (allocator_type == kAllocatorTypeTLAB) {
     DCHECK(bump_pointer_space_ != nullptr);
     const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
     if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
@@ -4202,15 +4223,18 @@
       if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
                                             space::RegionSpace::kRegionSize,
                                             grow))) {
+        const size_t new_tlab_size = kUsePartialTlabs
+            ? std::max(alloc_size, kPartialTlabSize)
+            : gc::space::RegionSpace::kRegionSize;
         // Try to allocate a tlab.
-        if (!region_space_->AllocNewTlab(self)) {
+        if (!region_space_->AllocNewTlab(self, new_tlab_size)) {
           // Failed to allocate a tlab. Try non-tlab.
           return region_space_->AllocNonvirtual<false>(alloc_size,
                                                        bytes_allocated,
                                                        usable_size,
                                                        bytes_tl_bulk_allocated);
         }
-        *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize;
+        *bytes_tl_bulk_allocated = new_tlab_size;
         // Fall-through to using the TLAB below.
       } else {
         // Check OOME for a non-tlab allocation.
commit	6bc7774426cc0b6bbab5566fa62b3c509455e583	[log] [tgz]
author	Mathieu Chartier <mathieuc@google.com>	Tue Apr 18 17:46:23 2017 -0700
committer	Mathieu Chartier <mathieuc@google.com>	Wed Apr 19 21:26:56 2017 -0700
tree	06c47a48c43924e8cdc80ed3ec31b8fddb4b39b6
parent	8d0f3aaf28358697ec812955cdf975ca6c6ff901 [diff] [blame]