New free list large object space.

More memory efficient, uses a std::set instead of multiset. Each
large object allocation has sizeof(size_t) * 2 bytes of overhead
without taking into account the std::set overhead. If debug is
enabled, then objects are mprotected as they are freed.

Added a large object test to space test.

Change-Id: I3e714e1afbed49208a4a7e60f9ef7d701a56801b
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 74f7b1b..668ff1d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -150,16 +150,6 @@
     }
   }
 
-  // Allocate the large object space.
-  const bool kUseFreeListSpaceForLOS  = false;
-  if (kUseFreeListSpaceForLOS) {
-    large_object_space_ = space::FreeListSpace::Create("large object space", NULL, capacity);
-  } else {
-    large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
-  }
-  CHECK(large_object_space_ != NULL) << "Failed to create large object space";
-  AddDiscontinuousSpace(large_object_space_);
-
   alloc_space_ = space::DlMallocSpace::Create(Runtime::Current()->IsZygote() ? "zygote space" : "alloc space",
                                               initial_size,
                                               growth_limit, capacity,
@@ -168,6 +158,16 @@
   alloc_space_->SetFootprintLimit(alloc_space_->Capacity());
   AddContinuousSpace(alloc_space_);
 
+  // Allocate the large object space.
+  const bool kUseFreeListSpaceForLOS = false;
+  if (kUseFreeListSpaceForLOS) {
+    large_object_space_ = space::FreeListSpace::Create("large object space", NULL, capacity);
+  } else {
+    large_object_space_ = space::LargeObjectMapSpace::Create("large object space");
+  }
+  CHECK(large_object_space_ != NULL) << "Failed to create large object space";
+  AddDiscontinuousSpace(large_object_space_);
+
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
   byte* heap_begin = continuous_spaces_.front()->Begin();
   size_t heap_capacity = continuous_spaces_.back()->End() - continuous_spaces_.front()->Begin();
@@ -448,8 +448,7 @@
   DCHECK_GE(byte_count, sizeof(mirror::Object));
 
   mirror::Object* obj = NULL;
-  size_t size = 0;
-  size_t bytes_allocated;
+  size_t bytes_allocated = 0;
   uint64_t allocation_start = 0;
   if (UNLIKELY(kMeasureAllocationTime)) {
     allocation_start = NanoTime() / kTimeAdjust;
@@ -457,14 +456,12 @@
 
   // We need to have a zygote space or else our newly allocated large object can end up in the
   // Zygote resulting in it being prematurely freed.
-  // We can only do this for primive objects since large objects will not be within the card table
+  // We can only do this for primitive objects since large objects will not be within the card table
   // range. This also means that we rely on SetClass not dirtying the object's card.
   bool large_object_allocation =
       byte_count >= large_object_threshold_ && have_zygote_space_ && c->IsPrimitiveArray();
   if (UNLIKELY(large_object_allocation)) {
-    size = RoundUp(byte_count, kPageSize);
-    obj = Allocate(self, large_object_space_, size, &bytes_allocated);
-    DCHECK(obj == NULL || size == bytes_allocated);
+    obj = Allocate(self, large_object_space_, byte_count, &bytes_allocated);
     // Make sure that our large object didn't get placed anywhere within the space interval or else
     // it breaks the immune range.
     DCHECK(obj == NULL ||
@@ -472,9 +469,6 @@
            reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
   } else {
     obj = Allocate(self, alloc_space_, byte_count, &bytes_allocated);
-    DCHECK(obj == NULL || size <= bytes_allocated);
-    size = bytes_allocated;
-
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
@@ -484,12 +478,12 @@
 
     // Record allocation after since we want to use the atomic add for the atomic fence to guard
     // the SetClass since we do not want the class to appear NULL in another thread.
-    RecordAllocation(size, obj);
+    RecordAllocation(bytes_allocated, obj);
 
     if (Dbg::IsAllocTrackingEnabled()) {
       Dbg::RecordAllocation(c, byte_count);
     }
-    if (static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_) {
+    if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_)) {
       // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
       SirtRef<mirror::Object> ref(self, obj);
       RequestConcurrentGC(self);
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index 849157f..5481141 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -45,9 +45,8 @@
             << ") not in bounds of allocation space " << *this;
     }
     size_t allocation_size = AllocationSizeNonvirtual(result);
-    if (bytes_allocated != NULL) {
-      *bytes_allocated = allocation_size;
-    }
+    DCHECK(bytes_allocated != NULL);
+    *bytes_allocated = allocation_size;
     num_bytes_allocated_ += allocation_size;
     total_bytes_allocated_ += allocation_size;
     ++total_objects_allocated_;
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index c498ef8..6d52c26 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -79,7 +79,7 @@
 
   // Perform a mspace_inspect_all which calls back for each allocation chunk. The chunk may not be
   // in use, indicated by num_bytes equaling zero.
-  void Walk(WalkCallback callback, void* arg);
+  void Walk(WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
 
   // Returns the number of bytes that the space has currently obtained from the system. This is
   // greater or equal to the amount of live data in the space.
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 832c6fa..a174c0a 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -66,9 +66,8 @@
   large_objects_.push_back(obj);
   mem_maps_.Put(obj, mem_map);
   size_t allocation_size = mem_map->Size();
-  if (bytes_allocated != NULL) {
-    *bytes_allocated = allocation_size;
-  }
+  DCHECK(bytes_allocated != NULL);
+  *bytes_allocated = allocation_size;
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   ++num_objects_allocated_;
@@ -141,89 +140,97 @@
       end_(end),
       mem_map_(mem_map),
       lock_("free list space lock", kAllocSpaceLock) {
-  chunks_.resize(Size() / kAlignment + 1);
-  // Add a dummy chunk so we don't need to handle chunks having no next chunk.
-  chunks_.back().SetSize(kAlignment, false);
-  // Start out with one large free chunk.
-  AddFreeChunk(begin_, end_ - begin_, NULL);
+  free_end_ = end - begin;
 }
 
 FreeListSpace::~FreeListSpace() {}
 
-void FreeListSpace::AddFreeChunk(void* address, size_t size, Chunk* previous) {
-  Chunk* chunk = ChunkFromAddr(address);
-  chunk->SetSize(size, true);
-  chunk->SetPrevious(previous);
-  Chunk* next_chunk = GetNextChunk(chunk);
-  next_chunk->SetPrevious(chunk);
-  free_chunks_.insert(chunk);
-}
-
-FreeListSpace::Chunk* FreeListSpace::ChunkFromAddr(void* address) {
-  size_t offset = reinterpret_cast<byte*>(address) - Begin();
-  DCHECK(IsAligned<kAlignment>(offset));
-  DCHECK_LT(offset, Size());
-  return &chunks_[offset / kAlignment];
-}
-
-void* FreeListSpace::AddrFromChunk(Chunk* chunk) {
-  return reinterpret_cast<void*>(Begin() + (chunk - &chunks_.front()) * kAlignment);
-}
-
-void FreeListSpace::RemoveFreeChunk(Chunk* chunk) {
-  // TODO: C++0x
-  // TODO: Improve performance, this might be slow.
-  std::pair<FreeChunks::iterator, FreeChunks::iterator> range = free_chunks_.equal_range(chunk);
-  for (FreeChunks::iterator it = range.first; it != range.second; ++it) {
-    if (*it == chunk) {
-      free_chunks_.erase(it);
-      return;
-    }
+void FreeListSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) {
+  MutexLock mu(Thread::Current(), lock_);
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  AllocationHeader* cur_header = reinterpret_cast<AllocationHeader*>(Begin());
+  while (reinterpret_cast<uintptr_t>(cur_header) < free_end_start) {
+    cur_header = cur_header->GetNextNonFree();
+    size_t alloc_size = cur_header->AllocationSize();
+    byte* byte_start = reinterpret_cast<byte*>(cur_header->GetObjectAddress());
+    byte* byte_end = byte_start + alloc_size - sizeof(AllocationHeader);
+    callback(byte_start, byte_end, alloc_size, arg);
+    callback(NULL, NULL, 0, arg);
+    cur_header = reinterpret_cast<AllocationHeader*>(byte_end);
   }
 }
 
-void FreeListSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) {
-  MutexLock mu(Thread::Current(), lock_);
-  for (Chunk* chunk = &chunks_.front(); chunk < &chunks_.back(); ) {
-    if (!chunk->IsFree()) {
-      size_t size = chunk->GetSize();
-      void* begin = AddrFromChunk(chunk);
-      void* end = reinterpret_cast<void*>(reinterpret_cast<byte*>(begin) + size);
-      callback(begin, end, size, arg);
-      callback(NULL, NULL, 0, arg);
-    }
-    chunk = GetNextChunk(chunk);
+void FreeListSpace::RemoveFreePrev(AllocationHeader* header) {
+  CHECK(!header->IsFree());
+  CHECK_GT(header->GetPrevFree(), size_t(0));
+  FreeBlocks::iterator found = free_blocks_.lower_bound(header);
+  CHECK(found != free_blocks_.end());
+  CHECK_EQ(*found, header);
+  free_blocks_.erase(found);
+}
+
+FreeListSpace::AllocationHeader* FreeListSpace::GetAllocationHeader(const mirror::Object* obj) {
+  DCHECK(Contains(obj));
+  return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(obj) -
+      sizeof(AllocationHeader));
+}
+
+FreeListSpace::AllocationHeader* FreeListSpace::AllocationHeader::GetNextNonFree() {
+  // We know that there has to be at least one object after us or else we would have
+  // coalesced with the free end region. May be worth investigating a better way to do this
+  // as it may be expensive for large allocations.
+  for (uintptr_t pos = reinterpret_cast<uintptr_t>(this);; pos += kAlignment) {
+    AllocationHeader* cur = reinterpret_cast<AllocationHeader*>(pos);
+    if (!cur->IsFree()) return cur;
   }
 }
 
 size_t FreeListSpace::Free(Thread* self, mirror::Object* obj) {
   MutexLock mu(self, lock_);
-  CHECK(Contains(obj));
-  // Check adjacent chunks to see if we need to combine.
-  Chunk* chunk = ChunkFromAddr(obj);
-  CHECK(!chunk->IsFree());
-
-  size_t allocation_size = chunk->GetSize();
-  if (kIsDebugBuild) {
-    memset(obj, 0xEB, allocation_size);
+  DCHECK(Contains(obj));
+  AllocationHeader* header = GetAllocationHeader(obj);
+  CHECK(IsAligned<kAlignment>(header));
+  size_t allocation_size = header->AllocationSize();
+  DCHECK_GT(allocation_size, size_t(0));
+  DCHECK(IsAligned<kAlignment>(allocation_size));
+  // Look at the next chunk.
+  AllocationHeader* next_header = header->GetNextAllocationHeader();
+  // Calculate the start of the end free block.
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  size_t header_prev_free = header->GetPrevFree();
+  size_t new_free_size = allocation_size;
+  if (header_prev_free) {
+    new_free_size += header_prev_free;
+    RemoveFreePrev(header);
   }
-  madvise(obj, allocation_size, MADV_DONTNEED);
-  num_objects_allocated_--;
-  num_bytes_allocated_ -= allocation_size;
-  Chunk* prev = chunk->GetPrevious();
-  Chunk* next = GetNextChunk(chunk);
-
-  // Combine any adjacent free chunks
-  size_t extra_size = chunk->GetSize();
-  if (next->IsFree()) {
-    extra_size += next->GetSize();
-    RemoveFreeChunk(next);
-  }
-  if (prev != NULL && prev->IsFree()) {
-    RemoveFreeChunk(prev);
-    AddFreeChunk(AddrFromChunk(prev), prev->GetSize() + extra_size, prev->GetPrevious());
+  if (reinterpret_cast<uintptr_t>(next_header) >= free_end_start) {
+    // Easy case, the next chunk is the end free region.
+    CHECK_EQ(reinterpret_cast<uintptr_t>(next_header), free_end_start);
+    free_end_ += new_free_size;
   } else {
-    AddFreeChunk(AddrFromChunk(chunk), extra_size, prev);
+    AllocationHeader* new_free_header;
+    DCHECK(IsAligned<kAlignment>(next_header));
+    if (next_header->IsFree()) {
+      // Find the next chunk by reading each page until we hit one with non-zero chunk.
+      AllocationHeader* next_next_header = next_header->GetNextNonFree();
+      DCHECK(IsAligned<kAlignment>(next_next_header));
+      DCHECK(IsAligned<kAlignment>(next_next_header->AllocationSize()));
+      RemoveFreePrev(next_next_header);
+      new_free_header = next_next_header;
+      new_free_size += next_next_header->GetPrevFree();
+    } else {
+      new_free_header = next_header;
+    }
+    new_free_header->prev_free_ = new_free_size;
+    free_blocks_.insert(new_free_header);
+  }
+  --num_objects_allocated_;
+  DCHECK_LE(allocation_size, num_bytes_allocated_);
+  num_bytes_allocated_ -= allocation_size;
+  madvise(header, allocation_size, MADV_DONTNEED);
+  if (kIsDebugBuild) {
+    // Can't disallow reads since we use them to find next chunks during coalescing.
+    mprotect(header, allocation_size, PROT_READ);
   }
   return allocation_size;
 }
@@ -232,53 +239,91 @@
   return mem_map_->HasAddress(obj);
 }
 
-FreeListSpace::Chunk* FreeListSpace::GetNextChunk(Chunk* chunk) {
-  return chunk + chunk->GetSize() / kAlignment;
-}
-
 size_t FreeListSpace::AllocationSize(const mirror::Object* obj) {
-  Chunk* chunk = ChunkFromAddr(const_cast<mirror::Object*>(obj));
-  CHECK(!chunk->IsFree());
-  return chunk->GetSize();
+  AllocationHeader* header = GetAllocationHeader(obj);
+  DCHECK(Contains(obj));
+  DCHECK(!header->IsFree());
+  return header->AllocationSize();
 }
 
 mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   MutexLock mu(self, lock_);
-  num_bytes = RoundUp(num_bytes, kAlignment);
-  Chunk temp;
-  temp.SetSize(num_bytes);
+  size_t allocation_size = RoundUp(num_bytes + sizeof(AllocationHeader), kAlignment);
+  AllocationHeader temp;
+  temp.SetPrevFree(allocation_size);
+  temp.SetAllocationSize(0);
+  AllocationHeader* new_header;
   // Find the smallest chunk at least num_bytes in size.
-  FreeChunks::iterator found = free_chunks_.lower_bound(&temp);
-  if (found == free_chunks_.end()) {
-    // Out of memory, or too much fragmentation.
-    return NULL;
-  }
-  Chunk* chunk = *found;
-  free_chunks_.erase(found);
-  CHECK(chunk->IsFree());
-  void* addr = AddrFromChunk(chunk);
-  size_t chunk_size = chunk->GetSize();
-  chunk->SetSize(num_bytes);
-  if (chunk_size > num_bytes) {
-    // Split the chunk into two chunks.
-    Chunk* new_chunk = GetNextChunk(chunk);
-    AddFreeChunk(AddrFromChunk(new_chunk), chunk_size - num_bytes, chunk);
+  FreeBlocks::iterator found = free_blocks_.lower_bound(&temp);
+  if (found != free_blocks_.end()) {
+    AllocationHeader* header = *found;
+    free_blocks_.erase(found);
+
+    // Fit our object in the previous free header space.
+    new_header = header->GetPrevFreeAllocationHeader();
+
+    // Remove the newly allocated block from the header and update the prev_free_.
+    header->prev_free_ -= allocation_size;
+    if (header->prev_free_ > 0) {
+      // If there is remaining space, insert back into the free set.
+      free_blocks_.insert(header);
+    }
+  } else {
+    // Try to steal some memory from the free space at the end of the space.
+    if (LIKELY(free_end_ >= allocation_size)) {
+      // Fit our object at the start of the end free block.
+      new_header = reinterpret_cast<AllocationHeader*>(end_ - free_end_);
+      free_end_ -= allocation_size;
+    } else {
+      return NULL;
+    }
   }
 
-  if (bytes_allocated != NULL) {
-    *bytes_allocated = num_bytes;
+  DCHECK(bytes_allocated != NULL);
+  *bytes_allocated = allocation_size;
+
+  // Need to do these inside of the lock.
+  ++num_objects_allocated_;
+  ++total_objects_allocated_;
+  num_bytes_allocated_ += allocation_size;
+  total_bytes_allocated_ += allocation_size;
+
+  // We always put our object at the start of the free block, there can not be another free block
+  // before it.
+  if (kIsDebugBuild) {
+    mprotect(new_header, allocation_size, PROT_READ | PROT_WRITE);
   }
-  num_objects_allocated_++;
-  total_objects_allocated_++;
-  num_bytes_allocated_ += num_bytes;
-  total_bytes_allocated_ += num_bytes;
-  return reinterpret_cast<mirror::Object*>(addr);
+  new_header->SetPrevFree(0);
+  new_header->SetAllocationSize(allocation_size);
+  return new_header->GetObjectAddress();
 }
 
 void FreeListSpace::Dump(std::ostream& os) const {
+  MutexLock mu(Thread::Current(), const_cast<Mutex&>(lock_));
   os << GetName() << " -"
      << " begin: " << reinterpret_cast<void*>(Begin())
-     << " end: " << reinterpret_cast<void*>(End());
+     << " end: " << reinterpret_cast<void*>(End()) << "\n";
+  uintptr_t free_end_start = reinterpret_cast<uintptr_t>(end_) - free_end_;
+  AllocationHeader* cur_header = reinterpret_cast<AllocationHeader*>(Begin());
+  while (reinterpret_cast<uintptr_t>(cur_header) < free_end_start) {
+    byte* free_start = reinterpret_cast<byte*>(cur_header);
+    cur_header = cur_header->GetNextNonFree();
+    byte* free_end = reinterpret_cast<byte*>(cur_header);
+    if (free_start != free_end) {
+      os << "Free block at address: " << reinterpret_cast<const void*>(free_start)
+         << " of length " << free_end - free_start << " bytes\n";
+    }
+    size_t alloc_size = cur_header->AllocationSize();
+    byte* byte_start = reinterpret_cast<byte*>(cur_header->GetObjectAddress());
+    byte* byte_end = byte_start + alloc_size - sizeof(AllocationHeader);
+    os << "Large object at address: " << reinterpret_cast<const void*>(free_start)
+       << " of length " << byte_end - byte_start << " bytes\n";
+    cur_header = reinterpret_cast<AllocationHeader*>(byte_end);
+  }
+  if (free_end_) {
+    os << "Free block at address: " << reinterpret_cast<const void*>(free_end_start)
+       << " of length " << free_end_ << " bytes\n";
+  }
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 31659db..a703e86 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -85,7 +85,7 @@
   size_t AllocationSize(const mirror::Object* obj);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* ptr);
-  void Walk(DlMallocSpace::WalkCallback, void* arg);
+  void Walk(DlMallocSpace::WalkCallback, void* arg) LOCKS_EXCLUDED(lock_);
   // TODO: disabling thread safety analysis as this may be called when we already hold lock_.
   bool Contains(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS;
 
@@ -103,7 +103,6 @@
 };
 
 // A continuous large object space with a free-list to handle holes.
-// TODO: this implementation is buggy.
 class FreeListSpace : public LargeObjectSpace {
  public:
   virtual ~FreeListSpace();
@@ -113,7 +112,7 @@
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
-  void Walk(DlMallocSpace::WalkCallback callback, void* arg);
+  void Walk(DlMallocSpace::WalkCallback callback, void* arg) LOCKS_EXCLUDED(lock_);
 
   // Address at which the space begins.
   byte* Begin() const {
@@ -135,57 +134,100 @@
  private:
   static const size_t kAlignment = kPageSize;
 
-  class Chunk {
+  class AllocationHeader {
    public:
-    static const size_t kFreeFlag = 0x80000000;
+    // Returns the allocation size, includes the header.
+    size_t AllocationSize() const {
+      return alloc_size_;
+    }
 
-    struct SortBySize {
-      bool operator()(const Chunk* a, const Chunk* b) const {
-        return a->GetSize() < b->GetSize();
+    // Updates the allocation size in the header, the allocation size includes the header itself.
+    void SetAllocationSize(size_t size) {
+      DCHECK(IsAligned<kPageSize>(size));
+      alloc_size_ = size;
+    }
+
+    bool IsFree() const {
+      return AllocationSize() == 0;
+    }
+
+    // Returns the previous free allocation header by using the prev_free_ member to figure out
+    // where it is. If prev free is 0 then we just return ourself.
+    AllocationHeader* GetPrevFreeAllocationHeader() {
+      return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(this) - prev_free_);
+    }
+
+    // Returns the address of the object associated with this allocation header.
+    mirror::Object* GetObjectAddress() {
+      return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(this) + sizeof(*this));
+    }
+
+    // Returns the next allocation header after the object associated with this allocation header.
+    AllocationHeader* GetNextAllocationHeader() {
+      DCHECK_NE(alloc_size_, 0U);
+      return reinterpret_cast<AllocationHeader*>(reinterpret_cast<uintptr_t>(this) + alloc_size_);
+    }
+
+    // Returns how many free bytes there is before the block.
+    size_t GetPrevFree() const {
+      return prev_free_;
+    }
+
+    // Update the size of the free block prior to the allocation.
+    void SetPrevFree(size_t prev_free) {
+      DCHECK(IsAligned<kPageSize>(prev_free));
+      prev_free_ = prev_free;
+    }
+
+    // Finds and returns the next non free allocation header after ourself.
+    // TODO: Optimize, currently O(n) for n free following pages.
+    AllocationHeader* GetNextNonFree();
+
+    // Used to implement best fit object allocation. Each allocation has an AllocationHeader which
+    // contains the size of the previous free block preceding it. Implemented in such a way that we
+    // can also find the iterator for any allocation header pointer.
+    class SortByPrevFree {
+     public:
+      bool operator()(const AllocationHeader* a, const AllocationHeader* b) const {
+        if (a->GetPrevFree() < b->GetPrevFree()) return true;
+        if (a->GetPrevFree() > b->GetPrevFree()) return false;
+        if (a->AllocationSize() < b->AllocationSize()) return true;
+        if (a->AllocationSize() > b->AllocationSize()) return false;
+        return reinterpret_cast<uintptr_t>(a) < reinterpret_cast<uintptr_t>(b);
       }
     };
 
-    bool IsFree() const {
-      return (m_size & kFreeFlag) != 0;
-    }
-
-    void SetSize(size_t size, bool is_free = false) {
-      m_size = size | (is_free ? kFreeFlag : 0);
-    }
-
-    size_t GetSize() const {
-      return m_size & (kFreeFlag - 1);
-    }
-
-    Chunk* GetPrevious() {
-      return m_previous;
-    }
-
-    void SetPrevious(Chunk* previous) {
-      m_previous = previous;
-      DCHECK(m_previous == NULL ||
-            (m_previous != NULL && m_previous + m_previous->GetSize() / kAlignment == this));
-    }
-
    private:
-    size_t m_size;
-    Chunk* m_previous;
+    // Contains the size of the previous free block, if 0 then the memory preceding us is an
+    // allocation.
+    size_t prev_free_;
+
+    // Allocation size of this object, 0 means that the allocation header is free memory.
+    size_t alloc_size_;
+
+    friend class FreeListSpace;
   };
 
   FreeListSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end);
-  void AddFreeChunk(void* address, size_t size, Chunk* previous) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  Chunk* ChunkFromAddr(void* address) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void* AddrFromChunk(Chunk* chunk) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  void RemoveFreeChunk(Chunk* chunk) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  Chunk* GetNextChunk(Chunk* chunk);
 
-  typedef std::multiset<Chunk*, Chunk::SortBySize> FreeChunks;
+  // Removes header from the free blocks set by finding the corresponding iterator and erasing it.
+  void RemoveFreePrev(AllocationHeader* header) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Finds the allocation header corresponding to obj.
+  AllocationHeader* GetAllocationHeader(const mirror::Object* obj);
+
+  typedef std::set<AllocationHeader*, AllocationHeader::SortByPrevFree,
+                   accounting::GCAllocator<AllocationHeader*> > FreeBlocks;
+
   byte* const begin_;
   byte* const end_;
+
+  // There is not footer for any allocations at the end of the space, so we keep track of how much
+  // free space there is at the end manually.
   UniquePtr<MemMap> mem_map_;
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  std::vector<Chunk> chunks_ GUARDED_BY(lock_);
-  FreeChunks free_chunks_ GUARDED_BY(lock_);
+  size_t free_end_ GUARDED_BY(lock_);
+  FreeBlocks free_blocks_ GUARDED_BY(lock_);
 };
 
 }  // namespace space
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 66b8c11..455168c 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "dlmalloc_space.h"
+#include "large_object_space.h"
 
 #include "common_test.h"
 #include "globals.h"
@@ -37,6 +38,11 @@
   }
 };
 
+static size_t test_rand(size_t* seed) {
+  *seed = *seed * 1103515245 + 12345;
+  return *seed;
+}
+
 TEST_F(SpaceTest, Init) {
   {
     // Init < max == growth
@@ -80,6 +86,7 @@
 // allocations after the ZygoteSpace is created. The test should also do some GCs to ensure that
 // the GC works with the ZygoteSpace.
 TEST_F(SpaceTest, ZygoteSpace) {
+    size_t dummy = 0;
     DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
     ASSERT_TRUE(space != NULL);
 
@@ -88,11 +95,11 @@
     Thread* self = Thread::Current();
 
     // Succeeds, fits without adjusting the footprint limit.
-    mirror::Object* ptr1 = space->Alloc(self, 1 * MB, NULL);
+    mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr2 = space->Alloc(self, 8 * MB, NULL);
+    mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
@@ -102,11 +109,11 @@
     EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr4 = space->Alloc(self, 8 * MB, NULL);
+    mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr4 == NULL);
 
     // Also fails, requires a higher allowed footprint.
-    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, NULL);
+    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr5 == NULL);
 
     // Release some memory.
@@ -116,7 +123,7 @@
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, NULL);
+    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
     EXPECT_TRUE(ptr6 != NULL);
 
     // Final clean up.
@@ -125,22 +132,22 @@
     EXPECT_LE(1U * MB, free1);
 
     // Make sure that the zygote space isn't directly at the start of the space.
-    space->Alloc(self, 1U * MB, NULL);
+    space->Alloc(self, 1U * MB, &dummy);
     space = space->CreateZygoteSpace("alloc space");
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
     AddContinuousSpace(space);
 
     // Succeeds, fits without adjusting the footprint limit.
-    ptr1 = space->Alloc(self, 1 * MB, NULL);
+    ptr1 = space->Alloc(self, 1 * MB, &dummy);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    ptr2 = space->Alloc(self, 8 * MB, NULL);
+    ptr2 = space->Alloc(self, 8 * MB, &dummy);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
-    ptr3 = space->AllocWithGrowth(self, 2 * MB, NULL);
+    ptr3 = space->AllocWithGrowth(self, 2 * MB, &dummy);
     EXPECT_TRUE(ptr3 != NULL);
     space->Free(self, ptr3);
 
@@ -151,6 +158,7 @@
 }
 
 TEST_F(SpaceTest, AllocAndFree) {
+  size_t dummy = 0;
   DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
   ASSERT_TRUE(space != NULL);
   Thread* self = Thread::Current();
@@ -159,11 +167,11 @@
   AddContinuousSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
-  mirror::Object* ptr1 = space->Alloc(self, 1 * MB, NULL);
+  mirror::Object* ptr1 = space->Alloc(self, 1 * MB, &dummy);
   EXPECT_TRUE(ptr1 != NULL);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, NULL);
+  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr2 == NULL);
 
   // Succeeds, adjusts the footprint.
@@ -173,11 +181,11 @@
   EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, NULL);
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr4 == NULL);
 
   // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, NULL);
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy);
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
@@ -187,7 +195,7 @@
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, NULL);
+  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, &dummy);
   EXPECT_TRUE(ptr6 != NULL);
 
   // Final clean up.
@@ -196,6 +204,67 @@
   EXPECT_LE(1U * MB, free1);
 }
 
+TEST_F(SpaceTest, LargeObjectTest) {
+  size_t rand_seed = 0;
+  for (size_t i = 0; i < 2; ++i) {
+    LargeObjectSpace* los = NULL;
+    if (i == 0) {
+      los = space::LargeObjectMapSpace::Create("large object space");
+    } else {
+      los = space::FreeListSpace::Create("large object space", NULL, 128 * MB);
+    }
+
+    static const size_t num_allocations = 64;
+    static const size_t max_allocation_size = 0x100000;
+    std::vector<std::pair<mirror::Object*, size_t> > requests;
+
+    for (size_t phase = 0; phase < 2; ++phase) {
+      while (requests.size() < num_allocations) {
+        size_t request_size = test_rand(&rand_seed) % max_allocation_size;
+        size_t allocation_size = 0;
+        mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size);
+        ASSERT_TRUE(obj != NULL);
+        ASSERT_EQ(allocation_size, los->AllocationSize(obj));
+        ASSERT_GE(allocation_size, request_size);
+        // Fill in our magic value.
+        byte magic = (request_size & 0xFF) | 1;
+        memset(obj, magic, request_size);
+        requests.push_back(std::make_pair(obj, request_size));
+      }
+
+      // "Randomly" shuffle the requests.
+      for (size_t k = 0; k < 10; ++k) {
+        for (size_t j = 0; j < requests.size(); ++j) {
+          std::swap(requests[j], requests[test_rand(&rand_seed) % requests.size()]);
+        }
+      }
+
+      // Free 1 / 2 the allocations the first phase, and all the second phase.
+      size_t limit = !phase ? requests.size() / 2 : 0;
+      while (requests.size() > limit) {
+        mirror::Object* obj = requests.back().first;
+        size_t request_size = requests.back().second;
+        requests.pop_back();
+        byte magic = (request_size & 0xFF) | 1;
+        for (size_t k = 0; k < request_size; ++k) {
+          ASSERT_EQ(reinterpret_cast<const byte*>(obj)[k], magic);
+        }
+        ASSERT_GE(los->Free(Thread::Current(), obj), request_size);
+      }
+    }
+
+    size_t bytes_allocated = 0;
+    // Checks that the coalescing works.
+    mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated);
+    EXPECT_TRUE(obj != NULL);
+    los->Free(Thread::Current(), obj);
+
+    EXPECT_EQ(0U, los->GetBytesAllocated());
+    EXPECT_EQ(0U, los->GetObjectsAllocated());
+    delete los;
+  }
+}
+
 TEST_F(SpaceTest, AllocAndFreeList) {
   DlMallocSpace* space(DlMallocSpace::Create("test", 4 * MB, 16 * MB, 16 * MB, NULL));
   ASSERT_TRUE(space != NULL);
@@ -207,7 +276,9 @@
   // Succeeds, fits without adjusting the max allowed footprint.
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->Alloc(self, 16, NULL);
+    size_t allocation_size = 0;
+    lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -219,7 +290,9 @@
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, NULL);
+    size_t allocation_size = 0;
+    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
+    EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -230,11 +303,6 @@
   }
 }
 
-static size_t test_rand() {
-  // TODO: replace this with something random yet deterministic
-  return rand();
-}
-
 void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(DlMallocSpace* space, intptr_t object_size,
                                                     int round, size_t growth_limit) {
   if (((object_size > 0 && object_size >= static_cast<intptr_t>(growth_limit))) ||
@@ -267,6 +335,7 @@
   size_t last_object = 0;  // last object for which allocation succeeded
   size_t amount_allocated = 0;  // amount of space allocated
   Thread* self = Thread::Current();
+  size_t rand_seed = 123456789;
   for (size_t i = 0; i < max_objects; i++) {
     size_t alloc_fails = 0;  // number of failed allocations
     size_t max_fails = 30;  // number of times we fail allocation before giving up
@@ -275,22 +344,24 @@
       if (object_size > 0) {
         alloc_size = object_size;
       } else {
-        alloc_size = test_rand() % static_cast<size_t>(-object_size);
+        alloc_size = test_rand(&rand_seed) % static_cast<size_t>(-object_size);
         if (alloc_size < 8) {
           alloc_size = 8;
         }
       }
       mirror::Object* object;
+      size_t bytes_allocated = 0;
       if (round <= 1) {
-        object = space->Alloc(self, alloc_size, NULL);
+        object = space->Alloc(self, alloc_size, &bytes_allocated);
       } else {
-        object = space->AllocWithGrowth(self, alloc_size, NULL);
+        object = space->AllocWithGrowth(self, alloc_size, &bytes_allocated);
       }
       footprint = mspace_footprint(mspace);
       EXPECT_GE(space->Size(), footprint);  // invariant
       if (object != NULL) {  // allocation succeeded
         lots_of_objects.get()[i] = object;
         size_t allocation_size = space->AllocationSize(object);
+        EXPECT_EQ(bytes_allocated, allocation_size);
         if (object_size > 0) {
           EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
         } else {
@@ -360,10 +431,11 @@
   // All memory was released, try a large allocation to check freed memory is being coalesced
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
+  size_t bytes_allocated = 0;
   if (round <= 1) {
-    large_object = space->Alloc(self, three_quarters_space, NULL);
+    large_object = space->Alloc(self, three_quarters_space, &bytes_allocated);
   } else {
-    large_object = space->AllocWithGrowth(self, three_quarters_space, NULL);
+    large_object = space->AllocWithGrowth(self, three_quarters_space, &bytes_allocated);
   }
   EXPECT_TRUE(large_object != NULL);