Enable concurrent sweeping for non-concurrent GC.

Refactored the GarbageCollector to let all of the phases be run by
the collector's RunPhases virtual method. This lets the GC decide
which phases should be concurrent and reduces how much baked in GC
logic resides in GarbageCollector.

Enabled concurrent sweeping in the semi space and non concurrent
mark sweep GCs. Changed the semi-space collector to have a swap semi
spaces boolean which can be changed with a setter.

Fixed tests to pass with GSS collector, there was an error related to
the large object space limit.

Before (EvaluateAndApplyChanges):
GSS paused GC time 7.81s/7.81s, score: 3920

After (EvaluateAndApplyChanges):
GSS paused GC time 6.94s/7.71s, score: 3900

Benchmark score doesn't go up since the GC happens in the allocating
thread. There is a slight reduction in pause times experienced by
other threads (0.8s total).

Added options for pre sweeping GC heap verification and pre sweeping
rosalloc verification.

Bug: 14226004
Bug: 14250892
Bug: 14386356

Change-Id: Ib557d0590c1ed82a639d0f0281ba67cf8cae938c
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index ed140e0..646fce6 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -70,7 +70,7 @@
 template<size_t kAlignment> template<typename Visitor>
 inline void SpaceBitmap<kAlignment>::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
                                                       const Visitor& visitor) const {
-  DCHECK_LT(visit_begin, visit_end);
+  DCHECK_LE(visit_begin, visit_end);
 #if 0
   for (uintptr_t i = visit_begin; i < visit_end; i += kAlignment) {
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(i);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 5c7cce2..a805809 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -65,8 +65,9 @@
     return offset / kAlignment / kBitsPerWord;
   }
 
-  static uintptr_t IndexToOffset(size_t index) ALWAYS_INLINE {
-    return static_cast<uintptr_t>(index * kAlignment * kBitsPerWord);
+  template<typename T>
+  static T IndexToOffset(T index) {
+    return static_cast<T>(index * kAlignment * kBitsPerWord);
   }
 
   // Bits are packed in the obvious way.
@@ -158,8 +159,8 @@
   }
 
   // Size in bytes of the memory that the bitmaps spans.
-  size_t HeapSize() const {
-    return IndexToOffset(Size() / kWordSize);
+  uint64_t HeapSize() const {
+    return IndexToOffset<uint64_t>(Size() / kWordSize);
   }
 
   uintptr_t HeapBegin() const {
@@ -167,8 +168,8 @@
   }
 
   // The maximum address which the bitmap can span. (HeapBegin() <= object < HeapLimit()).
-  uintptr_t HeapLimit() const {
-    return HeapBegin() + static_cast<uintptr_t>(HeapSize());
+  uint64_t HeapLimit() const {
+    return static_cast<uint64_t>(HeapBegin()) + HeapSize();
   }
 
   // Set the max address which can covered by the bitmap.
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index ab26a9c..ce7c75a 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -33,10 +33,7 @@
 
   ~ConcurrentCopying() {}
 
-  virtual void InitializePhase() OVERRIDE {}
-  virtual void MarkingPhase() OVERRIDE {}
-  virtual void ReclaimPhase() OVERRIDE {}
-  virtual void FinishPhase() OVERRIDE {}
+  virtual void RunPhases() OVERRIDE {}
   virtual GcType GetGcType() const OVERRIDE {
     return kGcTypePartial;
   }
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 615ec98..f9a6abe 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -14,10 +14,7 @@
  * limitations under the License.
  */
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
 #include <stdio.h>
-#include <cutils/trace.h>
 
 #include "garbage_collector.h"
 
@@ -46,9 +43,6 @@
   ResetCumulativeStatistics();
 }
 
-void GarbageCollector::PausePhase() {
-}
-
 void GarbageCollector::RegisterPause(uint64_t nano_length) {
   pause_times_.push_back(nano_length);
 }
@@ -62,7 +56,6 @@
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   uint64_t start_time = NanoTime();
   timings_.Reset();
@@ -70,88 +63,12 @@
   duration_ns_ = 0;
   clear_soft_references_ = clear_soft_references;
   gc_cause_ = gc_cause;
-
   // Reset stats.
   freed_bytes_ = 0;
   freed_large_object_bytes_ = 0;
   freed_objects_ = 0;
   freed_large_objects_ = 0;
-
-  CollectorType collector_type = GetCollectorType();
-  switch (collector_type) {
-    case kCollectorTypeMS:      // Fall through.
-    case kCollectorTypeSS:      // Fall through.
-    case kCollectorTypeGSS: {
-      InitializePhase();
-      // Pause is the entire length of the GC.
-      uint64_t pause_start = NanoTime();
-      ATRACE_BEGIN("Application threads suspended");
-      // Mutator lock may be already exclusively held when we do garbage collections for changing
-      // the current collector / allocator during process state updates.
-      if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
-        // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
-        RevokeAllThreadLocalBuffers();
-        MarkingPhase();
-        PausePhase();
-        ReclaimPhase();
-        // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
-      } else {
-        ATRACE_BEGIN("Suspending mutator threads");
-        thread_list->SuspendAll();
-        ATRACE_END();
-        GetHeap()->PreGcRosAllocVerification(&timings_);
-        RevokeAllThreadLocalBuffers();
-        MarkingPhase();
-        PausePhase();
-        ReclaimPhase();
-        GetHeap()->PostGcRosAllocVerification(&timings_);
-        ATRACE_BEGIN("Resuming mutator threads");
-        thread_list->ResumeAll();
-        ATRACE_END();
-      }
-      ATRACE_END();
-      RegisterPause(NanoTime() - pause_start);
-      FinishPhase();
-      break;
-    }
-    case kCollectorTypeCMS: {
-      InitializePhase();
-      CHECK(!Locks::mutator_lock_->IsExclusiveHeld(self));
-      {
-        ReaderMutexLock mu(self, *Locks::mutator_lock_);
-        MarkingPhase();
-      }
-      uint64_t pause_start = NanoTime();
-      ATRACE_BEGIN("Suspending mutator threads");
-      thread_list->SuspendAll();
-      ATRACE_END();
-      ATRACE_BEGIN("All mutator threads suspended");
-      GetHeap()->PreGcRosAllocVerification(&timings_);
-      PausePhase();
-      RevokeAllThreadLocalBuffers();
-      GetHeap()->PostGcRosAllocVerification(&timings_);
-      ATRACE_END();
-      uint64_t pause_end = NanoTime();
-      ATRACE_BEGIN("Resuming mutator threads");
-      thread_list->ResumeAll();
-      ATRACE_END();
-      RegisterPause(pause_end - pause_start);
-      {
-        ReaderMutexLock mu(self, *Locks::mutator_lock_);
-        ReclaimPhase();
-      }
-      FinishPhase();
-      break;
-    }
-    case kCollectorTypeCC: {
-      // To be implemented.
-      break;
-    }
-    default: {
-      LOG(FATAL) << "Unreachable collector type=" << static_cast<size_t>(collector_type);
-      break;
-    }
-  }
+  RunPhases();  // Run all the GC phases.
   // Add the current timings to the cumulative timings.
   cumulative_timings_.AddLogger(timings_);
   // Update cumulative statistics with how many bytes the GC iteration freed.
@@ -159,6 +76,12 @@
   total_freed_bytes_ += GetFreedBytes() + GetFreedLargeObjectBytes();
   uint64_t end_time = NanoTime();
   duration_ns_ = end_time - start_time;
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    // The entire GC was paused, clear the fake pauses which might be in the pause times and add
+    // the whole GC duration.
+    pause_times_.clear();
+    RegisterPause(duration_ns_);
+  }
   total_time_ns_ += GetDurationNs();
   for (uint64_t pause_time : pause_times_) {
     pause_histogram_.AddValue(pause_time / 1000);
@@ -213,6 +136,16 @@
   total_freed_bytes_ = 0;
 }
 
+GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector)
+    : start_time_(NanoTime()), collector_(collector) {
+  Runtime::Current()->GetThreadList()->SuspendAll();
+}
+
+GarbageCollector::ScopedPause::~ScopedPause() {
+  collector_->RegisterPause(NanoTime() - start_time_);
+  Runtime::Current()->GetThreadList()->ResumeAll();
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index b19ac3f..ca4a1d5 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -35,6 +35,16 @@
 
 class GarbageCollector {
  public:
+  class SCOPED_LOCKABLE ScopedPause {
+   public:
+    explicit ScopedPause(GarbageCollector* collector) EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_);
+    ~ScopedPause() UNLOCK_FUNCTION();
+
+   private:
+    const uint64_t start_time_;
+    GarbageCollector* const collector_;
+  };
+
   GarbageCollector(Heap* heap, const std::string& name);
   virtual ~GarbageCollector() { }
 
@@ -125,20 +135,8 @@
   }
 
  protected:
-  // The initial phase. Done without mutators paused.
-  virtual void InitializePhase() = 0;
-
-  // Mark all reachable objects, done concurrently.
-  virtual void MarkingPhase() = 0;
-
-  // Phase of the GC which is run with mutator lock exclusively held.
-  virtual void PausePhase();
-
-  // Called with mutators running.
-  virtual void ReclaimPhase() = 0;
-
-  // Called after the GC is finished. Done without mutators paused.
-  virtual void FinishPhase() = 0;
+  // Run all of the GC phases.
+  virtual void RunPhases() = 0;
 
   // Revoke all the thread-local buffers.
   virtual void RevokeAllThreadLocalBuffers() = 0;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 007eb23..9cd740e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -130,9 +130,37 @@
     // Always clear soft references if a non-sticky collection.
     clear_soft_references_ = GetGcType() != collector::kGcTypeSticky;
   }
-  // Do any pre GC verification.
-  timings_.NewSplit("PreGcVerification");
-  heap_->PreGcVerification(this);
+}
+
+void MarkSweep::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  if (IsConcurrent()) {
+    GetHeap()->PreGcVerification(this);
+    {
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      MarkingPhase();
+    }
+    ScopedPause pause(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    PausePhase();
+    RevokeAllThreadLocalBuffers();
+  } else {
+    ScopedPause pause(this);
+    GetHeap()->PreGcVerificationPaused(this);
+    MarkingPhase();
+    GetHeap()->PrePauseRosAllocVerification(this);
+    PausePhase();
+    RevokeAllThreadLocalBuffers();
+  }
+  {
+    // Sweeping always done concurrently, even for non concurrent mark sweep.
+    ReaderMutexLock mu(self, *Locks::mutator_lock_);
+    ReclaimPhase();
+  }
+  GetHeap()->PostGcVerification(this);
+  FinishPhase();
 }
 
 void MarkSweep::ProcessReferences(Thread* self) {
@@ -166,7 +194,7 @@
   }
   ProcessReferences(self);
   {
-    timings_.NewSplit("SwapStacks");
+    TimingLogger::ScopedSplit split("SwapStacks", &timings_);
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     heap_->SwapStacks(self);
     live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
@@ -177,13 +205,11 @@
   timings_.StartSplit("PreSweepingGcVerification");
   heap_->PreSweepingGcVerification(this);
   timings_.EndSplit();
-  if (IsConcurrent()) {
-    // Disallow new system weaks to prevent a race which occurs when someone adds a new system
-    // weak before we sweep them. Since this new system weak may not be marked, the GC may
-    // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
-    // reference to a string that is about to be swept.
-    Runtime::Current()->DisallowNewSystemWeaks();
-  }
+  // Disallow new system weaks to prevent a race which occurs when someone adds a new system
+  // weak before we sweep them. Since this new system weak may not be marked, the GC may
+  // incorrectly sweep it. This also fixes a race where interning may attempt to return a strong
+  // reference to a string that is about to be swept.
+  Runtime::Current()->DisallowNewSystemWeaks();
 }
 
 void MarkSweep::PreCleanCards() {
@@ -265,9 +291,7 @@
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
   Thread* self = Thread::Current();
   SweepSystemWeaks(self);
-  if (IsConcurrent()) {
-    Runtime::Current()->AllowNewSystemWeaks();
-  }
+  Runtime::Current()->AllowNewSystemWeaks();
   {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
@@ -1256,9 +1280,6 @@
 
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  // Can't enqueue references if we hold the mutator lock.
-  timings_.NewSplit("PostGcVerification");
-  heap_->PostGcVerification(this);
   if (kCountScannedTypes) {
     VLOG(gc) << "MarkSweep scanned classes=" << class_count_ << " arrays=" << array_count_
              << " other=" << other_count_;
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 41a7764..0c5a0da 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -56,11 +56,12 @@
 
   ~MarkSweep() {}
 
-  virtual void InitializePhase() OVERRIDE;
-  virtual void MarkingPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void PausePhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void ReclaimPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  virtual void FinishPhase() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  void InitializePhase();
+  void MarkingPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PausePhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ReclaimPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FinishPhase() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MarkReachableObjects()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 3b9e853..65bbbd2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -106,7 +106,37 @@
       bytes_promoted_since_last_whole_heap_collection_(0),
       whole_heap_collection_(true),
       whole_heap_collection_interval_counter_(0),
-      collector_name_(name_) {
+      collector_name_(name_),
+      swap_semi_spaces_(true) {
+}
+
+void SemiSpace::RunPhases() {
+  Thread* self = Thread::Current();
+  InitializePhase();
+  // Semi-space collector is special since it is sometimes called with the mutators suspended
+  // during the zygote creation and collector transitions. If we already exclusively hold the
+  // mutator lock, then we can't lock it again since it will cause a deadlock.
+  if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+    GetHeap()->PreGcVerificationPaused(this);
+    GetHeap()->PrePauseRosAllocVerification(this);
+    MarkingPhase();
+    ReclaimPhase();
+    GetHeap()->PostGcVerificationPaused(this);
+  } else {
+    Locks::mutator_lock_->AssertNotHeld(self);
+    {
+      ScopedPause pause(this);
+      GetHeap()->PreGcVerificationPaused(this);
+      GetHeap()->PrePauseRosAllocVerification(this);
+      MarkingPhase();
+    }
+    {
+      ReaderMutexLock mu(self, *Locks::mutator_lock_);
+      ReclaimPhase();
+    }
+    GetHeap()->PostGcVerification(this);
+  }
+  FinishPhase();
 }
 
 void SemiSpace::InitializePhase() {
@@ -119,9 +149,6 @@
   bytes_moved_ = 0;
   objects_moved_ = 0;
   self_ = Thread::Current();
-  // Do any pre GC verification.
-  timings_.NewSplit("PreGcVerification");
-  heap_->PreGcVerification(this);
   CHECK(from_space_->CanMoveObjects()) << "Attempting to move from " << *from_space_;
   // Set the initial bitmap.
   to_space_live_bitmap_ = to_space_->GetLiveBitmap();
@@ -140,6 +167,7 @@
 }
 
 void SemiSpace::MarkingPhase() {
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self_));
   if (kStoreStackTraces) {
     Locks::mutator_lock_->AssertExclusiveHeld(self_);
     // Store the stack traces into the runtime fault string in case we get a heap corruption
@@ -214,12 +242,51 @@
     heap_->RevokeAllThreadLocalAllocationStacks(self_);
   }
   heap_->SwapStacks(self_);
-  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
-  MarkRoots();
-  // Mark roots of immune spaces.
-  UpdateAndMarkModUnion();
-  // Recursively mark remaining objects.
-  MarkReachableObjects();
+  {
+    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
+    MarkRoots();
+    // Mark roots of immune spaces.
+    UpdateAndMarkModUnion();
+    // Recursively mark remaining objects.
+    MarkReachableObjects();
+  }
+  ProcessReferences(self_);
+  {
+    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
+    SweepSystemWeaks();
+  }
+  timings_.NewSplit("RecordFree");
+  // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked
+  // before they are properly counted.
+  RevokeAllThreadLocalBuffers();
+  // Record freed memory.
+  uint64_t from_bytes = from_space_->GetBytesAllocated();
+  uint64_t to_bytes = bytes_moved_;
+  uint64_t from_objects = from_space_->GetObjectsAllocated();
+  uint64_t to_objects = objects_moved_;
+  CHECK_LE(to_objects, from_objects);
+  int64_t freed_bytes = from_bytes - to_bytes;
+  int64_t freed_objects = from_objects - to_objects;
+  freed_bytes_.FetchAndAdd(freed_bytes);
+  freed_objects_.FetchAndAdd(freed_objects);
+  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
+  // space.
+  heap_->RecordFree(freed_objects, freed_bytes);
+
+  // Clear and protect the from space.
+  from_space_->Clear();
+  VLOG(heap) << "Protecting space " << *from_space_;
+  if (kProtectFromSpace) {
+    from_space_->GetMemMap()->Protect(PROT_NONE);
+  } else {
+    from_space_->GetMemMap()->Protect(PROT_READ);
+  }
+  if (swap_semi_spaces_) {
+    heap_->SwapSemiSpaces();
+  }
+  timings_.StartSplit("PreSweepingGcVerification");
+  heap_->PreSweepingGcVerification(this);
+  timings_.EndSplit();
 }
 
 void SemiSpace::UpdateAndMarkModUnion() {
@@ -383,28 +450,6 @@
 
 void SemiSpace::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
-  ProcessReferences(self_);
-  {
-    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
-    SweepSystemWeaks();
-  }
-  // Record freed memory.
-  uint64_t from_bytes = from_space_->GetBytesAllocated();
-  uint64_t to_bytes = bytes_moved_;
-  uint64_t from_objects = from_space_->GetObjectsAllocated();
-  uint64_t to_objects = objects_moved_;
-  CHECK_LE(to_objects, from_objects);
-  int64_t freed_bytes = from_bytes - to_bytes;
-  int64_t freed_objects = from_objects - to_objects;
-  freed_bytes_.FetchAndAdd(freed_bytes);
-  freed_objects_.FetchAndAdd(freed_objects);
-  // Note: Freed bytes can be negative if we copy form a compacted space to a free-list backed
-  // space.
-  heap_->RecordFree(freed_objects, freed_bytes);
-
-  timings_.StartSplit("PreSweepingGcVerification");
-  heap_->PreSweepingGcVerification(this);
-  timings_.EndSplit();
   {
     WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
@@ -419,16 +464,6 @@
     TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
     GetHeap()->UnBindBitmaps();
   }
-  // TODO: Do this before doing verification since the from space may have objects which weren't
-  // moved and point to dead objects.
-  from_space_->Clear();
-  // Protect the from space.
-  VLOG(heap) << "Protecting space " << *from_space_;
-  if (kProtectFromSpace) {
-    from_space_->GetMemMap()->Protect(PROT_NONE);
-  } else {
-    from_space_->GetMemMap()->Protect(PROT_READ);
-  }
   if (saved_bytes_ > 0) {
     VLOG(heap) << "Avoided dirtying " << PrettySize(saved_bytes_);
   }
@@ -765,9 +800,6 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  Heap* heap = GetHeap();
-  timings_.NewSplit("PostGcVerification");
-  heap->PostGcVerification(this);
   // Null the "to" and "from" spaces since compacting from one to the other isn't valid until
   // further action is done by the heap.
   to_space_ = nullptr;
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 51b0869..d468561 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -61,12 +61,13 @@
 
   ~SemiSpace() {}
 
-  virtual void InitializePhase() OVERRIDE;
-  virtual void MarkingPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  virtual void RunPhases() OVERRIDE NO_THREAD_SAFETY_ANALYSIS;
+  virtual void InitializePhase();
+  virtual void MarkingPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  virtual void ReclaimPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  virtual void ReclaimPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
-  virtual void FinishPhase() OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual void FinishPhase() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MarkReachableObjects()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
   virtual GcType GetGcType() const OVERRIDE {
@@ -82,6 +83,12 @@
   // Set the space where we copy objects from.
   void SetFromSpace(space::ContinuousMemMapAllocSpace* from_space);
 
+  // Set whether or not we swap the semi spaces in the heap. This needs to be done with mutators
+  // suspended.
+  void SetSwapSemiSpaces(bool swap_semi_spaces) {
+    swap_semi_spaces_ = swap_semi_spaces;
+  }
+
   // Initializes internal structures.
   void Init();
 
@@ -253,6 +260,9 @@
   // collections.
   static constexpr int kDefaultWholeHeapCollectionInterval = 5;
 
+  // Whether or not we swap the semi spaces in the heap during the marking phase.
+  bool swap_semi_spaces_;
+
  private:
   friend class BitmapSetSlowPathVisitor;
   DISALLOW_COPY_AND_ASSIGN(SemiSpace);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a9799b9..b57fc69 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -93,8 +93,9 @@
            CollectorType foreground_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
-           bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-           bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+           bool ignore_max_footprint, bool use_tlab,
+           bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
+           bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
            bool verify_post_gc_rosalloc)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
@@ -136,9 +137,11 @@
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
       verify_pre_gc_heap_(verify_pre_gc_heap),
+      verify_pre_sweeping_heap_(verify_pre_sweeping_heap),
       verify_post_gc_heap_(verify_post_gc_heap),
       verify_mod_union_table_(false),
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
+      verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
@@ -1455,7 +1458,6 @@
     usleep(1000);
   }
   tl->SuspendAll();
-  PreGcRosAllocVerification(&semi_space_collector_->GetTimings());
   switch (collector_type) {
     case kCollectorTypeSS:
       // Fall-through.
@@ -1490,7 +1492,6 @@
     }
   }
   ChangeCollector(collector_type);
-  PostGcRosAllocVerification(&semi_space_collector_->GetTimings());
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
   EnqueueClearedReferences();
@@ -1805,6 +1806,8 @@
   CHECK(kMovingCollector);
   CHECK_NE(target_space, source_space) << "In-place compaction currently unsupported";
   if (target_space != source_space) {
+    // Don't swap spaces since this isn't a typical semi space collection.
+    semi_space_collector_->SetSwapSemiSpaces(false);
     semi_space_collector_->SetFromSpace(source_space);
     semi_space_collector_->SetToSpace(target_space);
     semi_space_collector_->Run(kGcCauseCollectorTransition, false);
@@ -1876,6 +1879,7 @@
       semi_space_collector_->SetFromSpace(bump_pointer_space_);
       semi_space_collector_->SetToSpace(temp_space_);
       collector = semi_space_collector_;
+      semi_space_collector_->SetSwapSemiSpaces(true);
     } else if (collector_type_ == kCollectorTypeCC) {
       gc_type = concurrent_copying_collector_->GetGcType();
       collector = concurrent_copying_collector_;
@@ -1895,14 +1899,7 @@
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
-  if (compacting_gc) {
-    runtime->GetThreadList()->SuspendAll();
-    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
-    SwapSemiSpaces();
-    runtime->GetThreadList()->ResumeAll();
-  } else {
-    collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
-  }
+  collector->Run(gc_cause, clear_soft_references || runtime->IsZygote());
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
   RequestHeapTrim();
@@ -1930,7 +1927,7 @@
     std::ostringstream pause_string;
     for (size_t i = 0; i < pause_times.size(); ++i) {
         pause_string << PrettyDuration((pause_times[i] / 1000) * 1000)
-                     << ((i != pause_times.size() - 1) ? ", " : "");
+                     << ((i != pause_times.size() - 1) ? "," : "");
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
               << " GC freed "  <<  collector->GetFreedObjects() << "("
@@ -2367,99 +2364,110 @@
 static void IdentityMarkHeapReferenceCallback(mirror::HeapReference<mirror::Object>*, void*) {
 }
 
-void Heap::PreGcVerification(collector::GarbageCollector* gc) {
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  Thread* self = Thread::Current();
-
+void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) {
+  Thread* const self = Thread::Current();
+  TimingLogger* const timings = &gc->GetTimings();
   if (verify_pre_gc_heap_) {
-    thread_list->SuspendAll();
-    {
-      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      if (!VerifyHeapReferences()) {
-        LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
-      }
+    TimingLogger::ScopedSplit split("PreGcVerifyHeapReferences", timings);
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    if (!VerifyHeapReferences()) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
     }
-    thread_list->ResumeAll();
   }
-
   // Check that all objects which reference things in the live stack are on dirty cards.
   if (verify_missing_card_marks_) {
-    thread_list->SuspendAll();
-    {
-      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      SwapStacks(self);
-      // Sort the live stack so that we can quickly binary search it later.
-      if (!VerifyMissingCardMarks()) {
-        LOG(FATAL) << "Pre " << gc->GetName() << " missing card mark verification failed";
-      }
-      SwapStacks(self);
+    TimingLogger::ScopedSplit split("PreGcVerifyMissingCardMarks", timings);
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    SwapStacks(self);
+    // Sort the live stack so that we can quickly binary search it later.
+    if (!VerifyMissingCardMarks()) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " missing card mark verification failed";
     }
-    thread_list->ResumeAll();
+    SwapStacks(self);
   }
-
   if (verify_mod_union_table_) {
-    thread_list->SuspendAll();
+    TimingLogger::ScopedSplit split("PreGcVerifyModUnionTables", timings);
     ReaderMutexLock reader_lock(self, *Locks::heap_bitmap_lock_);
     for (const auto& table_pair : mod_union_tables_) {
       accounting::ModUnionTable* mod_union_table = table_pair.second;
       mod_union_table->UpdateAndMarkReferences(IdentityMarkHeapReferenceCallback, nullptr);
       mod_union_table->Verify();
     }
-    thread_list->ResumeAll();
+  }
+}
+
+void Heap::PreGcVerification(collector::GarbageCollector* gc) {
+  if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_ ||
+      verify_pre_gc_rosalloc_) {
+    collector::GarbageCollector::ScopedPause pause(gc);
+    PreGcVerificationPaused(gc);
+  }
+}
+
+void Heap::PrePauseRosAllocVerification(collector::GarbageCollector* gc) {
+  // TODO: Add a new runtime option for this?
+  if (verify_pre_gc_rosalloc_) {
+    RosAllocVerification(&gc->GetTimings(), "PreGcRosAllocVerification");
   }
 }
 
 void Heap::PreSweepingGcVerification(collector::GarbageCollector* gc) {
+  Thread* const self = Thread::Current();
+  TimingLogger* const timings = &gc->GetTimings();
   // Called before sweeping occurs since we want to make sure we are not going so reclaim any
   // reachable objects.
-  if (verify_post_gc_heap_) {
-    Thread* self = Thread::Current();
+  if (verify_pre_sweeping_heap_) {
+    TimingLogger::ScopedSplit split("PostSweepingVerifyHeapReferences", timings);
     CHECK_NE(self->GetState(), kRunnable);
-    {
-      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-      // Swapping bound bitmaps does nothing.
-      gc->SwapBitmaps();
-      SwapSemiSpaces();
-      if (!VerifyHeapReferences()) {
-        LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
-      }
-      SwapSemiSpaces();
-      gc->SwapBitmaps();
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    // Swapping bound bitmaps does nothing.
+    gc->SwapBitmaps();
+    SwapSemiSpaces();
+    if (!VerifyHeapReferences()) {
+      LOG(FATAL) << "Pre sweeping " << gc->GetName() << " GC verification failed";
+    }
+    SwapSemiSpaces();
+    gc->SwapBitmaps();
+  }
+  if (verify_pre_sweeping_rosalloc_) {
+    RosAllocVerification(timings, "PreSweepingRosAllocVerification");
+  }
+}
+
+void Heap::PostGcVerificationPaused(collector::GarbageCollector* gc) {
+  // Only pause if we have to do some verification.
+  Thread* const self = Thread::Current();
+  TimingLogger* const timings = &gc->GetTimings();
+  if (verify_system_weaks_) {
+    ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
+    collector::MarkSweep* mark_sweep = down_cast<collector::MarkSweep*>(gc);
+    mark_sweep->VerifySystemWeaks();
+  }
+  if (verify_post_gc_rosalloc_) {
+    RosAllocVerification(timings, "PostGcRosAllocVerification");
+  }
+  if (verify_post_gc_heap_) {
+    TimingLogger::ScopedSplit split("PostGcVerifyHeapReferences", timings);
+    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    if (!VerifyHeapReferences()) {
+      LOG(FATAL) << "Pre " << gc->GetName() << " heap verification failed";
     }
   }
 }
 
 void Heap::PostGcVerification(collector::GarbageCollector* gc) {
-  if (verify_system_weaks_) {
-    Thread* self = Thread::Current();
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    collector::MarkSweep* mark_sweep = down_cast<collector::MarkSweep*>(gc);
-    mark_sweep->VerifySystemWeaks();
+  if (verify_system_weaks_ || verify_post_gc_rosalloc_ || verify_post_gc_heap_) {
+    collector::GarbageCollector::ScopedPause pause(gc);
+    PreGcVerificationPaused(gc);
   }
 }
 
-void Heap::PreGcRosAllocVerification(TimingLogger* timings) {
-  if (verify_pre_gc_rosalloc_) {
-    TimingLogger::ScopedSplit split("PreGcRosAllocVerification", timings);
-    for (const auto& space : continuous_spaces_) {
-      if (space->IsRosAllocSpace()) {
-        VLOG(heap) << "PreGcRosAllocVerification : " << space->GetName();
-        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
-        rosalloc_space->Verify();
-      }
-    }
-  }
-}
-
-void Heap::PostGcRosAllocVerification(TimingLogger* timings) {
-  if (verify_post_gc_rosalloc_) {
-    TimingLogger::ScopedSplit split("PostGcRosAllocVerification", timings);
-    for (const auto& space : continuous_spaces_) {
-      if (space->IsRosAllocSpace()) {
-        VLOG(heap) << "PostGcRosAllocVerification : " << space->GetName();
-        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
-        rosalloc_space->Verify();
-      }
+void Heap::RosAllocVerification(TimingLogger* timings, const char* name) {
+  TimingLogger::ScopedSplit split(name, timings);
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsRosAllocSpace()) {
+      VLOG(heap) << name << " : " << space->GetName();
+      space->AsRosAllocSpace()->Verify();
     }
   }
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index c37bb05..631397b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -152,8 +152,9 @@
                 CollectorType foreground_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
-                bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
-                bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+                bool ignore_max_footprint, bool use_tlab,
+                bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
+                bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
                 bool verify_post_gc_rosalloc);
 
   ~Heap();
@@ -449,10 +450,7 @@
   void RevokeRosAllocThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
   void AssertAllBumpPointerSpaceThreadLocalBuffersAreRevoked();
-
-  void PreGcRosAllocVerification(TimingLogger* timings)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void PostGcRosAllocVerification(TimingLogger* timings)
+  void RosAllocVerification(TimingLogger* timings, const char* name)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
@@ -666,11 +664,18 @@
                      Locks::heap_bitmap_lock_,
                      Locks::thread_suspend_count_lock_);
 
-  void PreGcVerification(collector::GarbageCollector* gc);
+  void PreGcVerification(collector::GarbageCollector* gc)
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void PreGcVerificationPaused(collector::GarbageCollector* gc)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PrePauseRosAllocVerification(collector::GarbageCollector* gc)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PreSweepingGcVerification(collector::GarbageCollector* gc)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PostGcVerification(collector::GarbageCollector* gc)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void PostGcVerificationPaused(collector::GarbageCollector* gc)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Update the watermark for the native allocated bytes based on the current number of native
   // bytes allocated and the target utilization ratio.
@@ -857,28 +862,35 @@
   const bool verify_missing_card_marks_;
   const bool verify_system_weaks_;
   const bool verify_pre_gc_heap_;
+  const bool verify_pre_sweeping_heap_;
   const bool verify_post_gc_heap_;
   const bool verify_mod_union_table_;
   bool verify_pre_gc_rosalloc_;
+  bool verify_pre_sweeping_rosalloc_;
   bool verify_post_gc_rosalloc_;
 
   // RAII that temporarily disables the rosalloc verification during
   // the zygote fork.
   class ScopedDisableRosAllocVerification {
    private:
-    Heap* heap_;
-    bool orig_verify_pre_gc_;
-    bool orig_verify_post_gc_;
+    Heap* const heap_;
+    const bool orig_verify_pre_gc_;
+    const bool orig_verify_pre_sweeping_;
+    const bool orig_verify_post_gc_;
+
    public:
     explicit ScopedDisableRosAllocVerification(Heap* heap)
         : heap_(heap),
           orig_verify_pre_gc_(heap_->verify_pre_gc_rosalloc_),
+          orig_verify_pre_sweeping_(heap_->verify_pre_sweeping_rosalloc_),
           orig_verify_post_gc_(heap_->verify_post_gc_rosalloc_) {
       heap_->verify_pre_gc_rosalloc_ = false;
+      heap_->verify_pre_sweeping_rosalloc_ = false;
       heap_->verify_post_gc_rosalloc_ = false;
     }
     ~ScopedDisableRosAllocVerification() {
       heap_->verify_pre_gc_rosalloc_ = orig_verify_pre_gc_;
+      heap_->verify_pre_sweeping_rosalloc_ = orig_verify_pre_sweeping_;
       heap_->verify_post_gc_rosalloc_ = orig_verify_post_gc_;
     }
   };
@@ -955,6 +967,7 @@
   const bool running_on_valgrind_;
   const bool use_tlab_;
 
+  friend class collector::GarbageCollector;
   friend class collector::MarkSweep;
   friend class collector::SemiSpace;
   friend class ReferenceQueue;
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index ce11b3d..dc2769e 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -94,9 +94,8 @@
   mark_bitmap_->CopyFrom(live_bitmap_.get());
 }
 
-// TODO: Use something cleaner than 0xFFFFFFFF.
 LargeObjectMapSpace::LargeObjectMapSpace(const std::string& name)
-    : LargeObjectSpace(name, reinterpret_cast<byte*>(0xFFFFFFFF), nullptr),
+    : LargeObjectSpace(name, nullptr, nullptr),
       lock_("large object map space lock", kAllocSpaceLock) {}
 
 LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) {
@@ -123,7 +122,10 @@
   size_t allocation_size = mem_map->Size();
   DCHECK(bytes_allocated != nullptr);
   begin_ = std::min(begin_, reinterpret_cast<byte*>(obj));
-  end_ = std::max(end_, reinterpret_cast<byte*>(obj) + allocation_size);
+  byte* obj_end = reinterpret_cast<byte*>(obj) + allocation_size;
+  if (end_ == nullptr || obj_end > end_) {
+    end_ = obj_end;
+  }
   *bytes_allocated = allocation_size;
   if (usable_size != nullptr) {
     *usable_size = allocation_size;