Thread local bump pointer allocator. Added a thread local allocator to the heap, each thread has three pointers which specify the thread local buffer: start, cur, and end. When the remaining space in the thread local buffer isn't large enough for the allocation, the allocator allocates a new thread local buffer using the bump pointer allocator. The bump pointer space had to be modified to accomodate thread local buffers. These buffers are called "blocks", where a block is a buffer which contains a set of adjacent objects. Blocks aren't necessarily full and may have wasted memory towards the end. Blocks have an 8 byte header which specifies their size and is required for traversing bump pointer spaces. Memory usage is in between full bump pointer and ROSAlloc since madvised memory limits wasted ram to an average of 1/2 page per block. Added a runtime option -XX:UseTLAB which specifies whether or not to use the thread local allocator. Its a NOP if the garbage collector is not the semispace collector. TODO: Smarter block accounting to prevent us reading objects until we either hit the end of the block or GetClass() == null which signifies that the block isn't 100% full. This would provide a slight speedup to BumpPointerSpace::Walk. Timings: -XX:HeapMinFree=4m -XX:HeapMaxFree=8m -Xmx48m ritzperf memalloc: Dalvik -Xgc:concurrent: 11678 Dalvik -Xgc:noconcurrent: 6697 -Xgc:MS: 5978 -Xgc:SS: 4271 -Xgc:CMS: 4150 -Xgc:SS -XX:UseTLAB: 3255 Bug: 9986565 Bug: 12042213 Change-Id: Ib7e1d4b199a8199f3b1de94b0a7b6e1730689cad

commit: 692fafd9778141fa6ef0048c9569abd7ee0253bf [log] [tgz]
author: Mathieu Chartier <mathieuc@google.com> Fri Nov 29 17:24:40 2013 -0800
committer: Mathieu Chartier <mathieuc@google.com> Mon Dec 16 16:57:37 2013 -0800
tree: 63ce2c7d4be6af2524a5f442195c8c8b6f5cc955
parent: 07dc96d370c4844c7a279c01cedf24a272b9f4f3 [diff] [blame]
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 99f084a..9fb5760 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h

@@ -32,7 +32,7 @@
 namespace art {
 namespace gc {
 
-template <bool kInstrumented, typename PreFenceVisitor>
+template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
 inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Class* klass,
                                                       size_t byte_count, AllocatorType allocator,
                                                       const PreFenceVisitor& pre_fence_visitor) {
@@ -43,13 +43,13 @@
   self->AssertThreadSuspensionIsAllowable();
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
-  if (allocator != kAllocatorTypeLOS && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
+  if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
     return AllocLargeObject<kInstrumented, PreFenceVisitor>(self, klass, byte_count,
                                                             pre_fence_visitor);
   }
   mirror::Object* obj;
-  size_t bytes_allocated;
   AllocationTimer alloc_timer(this, &obj);
+  size_t bytes_allocated;
   obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated);
   if (UNLIKELY(obj == nullptr)) {
     obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &klass);
@@ -89,7 +89,11 @@
   } else {
     DCHECK(!Dbg::IsAllocTrackingEnabled());
   }
-  if (concurrent_gc_) {
+  // concurrent_gc_ isn't known at compile time so we can optimize by not checking it for
+  // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
+  // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
+  // the allocator_type should be constant propagated.
+  if (AllocatorMayHaveConcurrentGC(allocator) && concurrent_gc_) {
     CheckConcurrentGC(self, new_num_bytes_allocated, obj);
   }
   if (kIsDebugBuild) {
@@ -105,15 +109,15 @@
 inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class* klass,
                                               size_t byte_count,
                                               const PreFenceVisitor& pre_fence_visitor) {
-  return AllocObjectWithAllocator<kInstrumented, PreFenceVisitor>(self, klass, byte_count,
-                                                                  kAllocatorTypeLOS,
-                                                                  pre_fence_visitor);
+  return AllocObjectWithAllocator<kInstrumented, false, PreFenceVisitor>(self, klass, byte_count,
+                                                                         kAllocatorTypeLOS,
+                                                                         pre_fence_visitor);
 }
 
 template <const bool kInstrumented, const bool kGrow>
 inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type,
                                            size_t alloc_size, size_t* bytes_allocated) {
-  if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(alloc_size))) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) {
     return nullptr;
   }
   if (kInstrumented) {
@@ -153,6 +157,21 @@
       DCHECK(ret == nullptr || large_object_space_->Contains(ret));
       break;
     }
+    case kAllocatorTypeTLAB: {
+      alloc_size = RoundUp(alloc_size, space::BumpPointerSpace::kAlignment);
+      if (UNLIKELY(self->TLABSize() < alloc_size)) {
+        // Try allocating a new thread local buffer, if the allocaiton fails the space must be
+        // full so return nullptr.
+        if (!bump_pointer_space_->AllocNewTLAB(self, alloc_size + kDefaultTLABSize)) {
+          return nullptr;
+        }
+      }
+      // The allocation can't fail.
+      ret = self->AllocTLAB(alloc_size);
+      DCHECK(ret != nullptr);
+      *bytes_allocated = alloc_size;
+      break;
+    }
     default: {
       LOG(FATAL) << "Invalid allocator type";
       ret = nullptr;
@@ -194,14 +213,14 @@
   return byte_count >= kLargeObjectThreshold && have_zygote_space_ && c->IsPrimitiveArray();
 }
 
-template <const bool kGrow>
-inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size) {
+template <bool kGrow>
+inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size) {
   size_t new_footprint = num_bytes_allocated_ + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
     }
-    if (!concurrent_gc_) {
+    if (!AllocatorMayHaveConcurrentGC(allocator_type) || !concurrent_gc_) {
       if (!kGrow) {
         return true;
       }
commit	692fafd9778141fa6ef0048c9569abd7ee0253bf	[log] [tgz]
author	Mathieu Chartier <mathieuc@google.com>	Fri Nov 29 17:24:40 2013 -0800
committer	Mathieu Chartier <mathieuc@google.com>	Mon Dec 16 16:57:37 2013 -0800
tree	63ce2c7d4be6af2524a5f442195c8c8b6f5cc955
parent	07dc96d370c4844c7a279c01cedf24a272b9f4f3 [diff] [blame]