Merge "Revert "Improve the thread flip.""
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 42816a0..7afe6f9 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -435,8 +435,10 @@
   gc_barrier_->Init(self, 0);
   ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
   FlipCallback flip_callback(this);
+  heap_->ThreadFlipBegin(self);  // Sync with JNI critical calls.
   size_t barrier_count = Runtime::Current()->FlipThreadRoots(
       &thread_flip_visitor, &flip_callback, this);
+  heap_->ThreadFlipEnd(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
     gc_barrier_->Increment(self, barrier_count);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 638c1d8..39f26e7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -878,13 +878,9 @@
   MutexLock mu(self, *thread_flip_lock_);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
-  if (thread_flip_running_) {
-    TimingLogger::ScopedTiming split("IncrementDisableThreadFlip",
-                                     GetCurrentGcIteration()->GetTimings());
-    while (thread_flip_running_) {
-      has_waited = true;
-      thread_flip_cond_->Wait(self);
-    }
+  while (thread_flip_running_) {
+    has_waited = true;
+    thread_flip_cond_->Wait(self);
   }
   ++disable_thread_flip_count_;
   if (has_waited) {
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 216d8a7..3aa1fc2 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -224,7 +224,6 @@
         thread_to_pass = this;
       }
       MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
-      ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this);
       old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
       DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
       while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 0457ba0..b35a614 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1217,8 +1217,10 @@
   ScopedTrace trace(__FUNCTION__);
   VLOG(threads) << this << " self-suspending";
   // Make thread appear suspended to other threads, release mutator_lock_.
+  tls32_.suspended_at_suspend_check = true;
   // Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
   ScopedThreadSuspension(this, kSuspended);
+  tls32_.suspended_at_suspend_check = false;
   VLOG(threads) << this << " self-reviving";
 }
 
@@ -1633,7 +1635,7 @@
   }
   tlsPtr_.flip_function = nullptr;
   tlsPtr_.thread_local_mark_stack = nullptr;
-  tls32_.is_transitioning_to_runnable = false;
+  tls32_.suspended_at_suspend_check = false;
 }
 
 bool Thread::IsStillStarting() const {
@@ -1771,7 +1773,7 @@
   CHECK(tlsPtr_.checkpoint_function == nullptr);
   CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
-  CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
+  CHECK_EQ(tls32_.suspended_at_suspend_check, false);
 
   // Make sure we processed all deoptimization requests.
   CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
diff --git a/runtime/thread.h b/runtime/thread.h
index 1c2d4ab..840b781 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1085,12 +1085,8 @@
     return tlsPtr_.nested_signal_state;
   }
 
-  bool IsTransitioningToRunnable() const {
-    return tls32_.is_transitioning_to_runnable;
-  }
-
-  void SetIsTransitioningToRunnable(bool value) {
-    tls32_.is_transitioning_to_runnable = value;
+  bool IsSuspendedAtSuspendCheck() const {
+    return tls32_.suspended_at_suspend_check;
   }
 
   void PushVerifier(verifier::MethodVerifier* verifier);
@@ -1268,7 +1264,7 @@
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
       thread_exit_check_count(0), handling_signal_(false),
-      is_transitioning_to_runnable(false), ready_for_debug_invoke(false),
+      suspended_at_suspend_check(false), ready_for_debug_invoke(false),
       debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
       disable_thread_flip_count(0) {
     }
@@ -1310,10 +1306,10 @@
     // True if signal is being handled by this thread.
     bool32_t handling_signal_;
 
-    // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the
-    // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from
-    // the rest of them.
-    bool32_t is_transitioning_to_runnable;
+    // True if the thread is suspended in FullSuspendCheck(). This is
+    // used to distinguish runnable threads that are suspended due to
+    // a normal suspend check from other threads.
+    bool32_t suspended_at_suspend_check;
 
     // True if the thread has been suspended by a debugger event. This is
     // used to invoke method from the debugger which is only allowed when
@@ -1592,26 +1588,6 @@
   Thread* const self_;
 };
 
-class ScopedTransitioningToRunnable : public ValueObject {
- public:
-  explicit ScopedTransitioningToRunnable(Thread* self)
-      : self_(self) {
-    DCHECK_EQ(self, Thread::Current());
-    if (kUseReadBarrier) {
-      self_->SetIsTransitioningToRunnable(true);
-    }
-  }
-
-  ~ScopedTransitioningToRunnable() {
-    if (kUseReadBarrier) {
-      self_->SetIsTransitioningToRunnable(false);
-    }
-  }
-
- private:
-  Thread* const self_;
-};
-
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 688514c..419ecec 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -405,8 +405,6 @@
   Locks::thread_suspend_count_lock_->AssertNotHeld(self);
   CHECK_NE(self->GetState(), kRunnable);
 
-  collector->GetHeap()->ThreadFlipBegin(self);  // Sync with JNI critical calls.
-
   SuspendAllInternal(self, self, nullptr);
 
   // Run the flip callback for the collector.
@@ -416,31 +414,26 @@
   collector->RegisterPause(NanoTime() - start_time);
 
   // Resume runnable threads.
-  size_t runnable_thread_count = 0;
+  std::vector<Thread*> runnable_threads;
   std::vector<Thread*> other_threads;
   {
-    TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
     MutexLock mu(self, *Locks::thread_list_lock_);
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     --suspend_all_count_;
     for (const auto& thread : list_) {
-      // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
-      // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
-      // a runnable thread runs it for itself or we run it for a suspended thread below.
-      thread->SetFlipFunction(thread_flip_visitor);
       if (thread == self) {
         continue;
       }
-      // Resume early the threads that were runnable but are suspended just for this thread flip or
-      // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
-      // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
-      // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
-      ThreadState state = thread->GetState();
-      if (state == kWaitingForGcThreadFlip ||
-          thread->IsTransitioningToRunnable()) {
+      // Set the flip function for both runnable and suspended threads
+      // because Thread::DumpState/DumpJavaStack() (invoked by a
+      // checkpoint) may cause the flip function to be run for a
+      // runnable/suspended thread before a runnable threads runs it
+      // for itself or we run it for a suspended thread below.
+      thread->SetFlipFunction(thread_flip_visitor);
+      if (thread->IsSuspendedAtSuspendCheck()) {
         // The thread will resume right after the broadcast.
         thread->ModifySuspendCount(self, -1, nullptr, false);
-        ++runnable_thread_count;
+        runnable_threads.push_back(thread);
       } else {
         other_threads.push_back(thread);
       }
@@ -448,11 +441,8 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
-  collector->GetHeap()->ThreadFlipEnd(self);
-
   // Run the closure on the other threads and let them resume.
   {
-    TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
     ReaderMutexLock mu(self, *Locks::mutator_lock_);
     for (const auto& thread : other_threads) {
       Closure* flip_func = thread->GetFlipFunction();
@@ -461,15 +451,11 @@
       }
     }
     // Run it for self.
-    Closure* flip_func = self->GetFlipFunction();
-    if (flip_func != nullptr) {
-      flip_func->Run(self);
-    }
+    thread_flip_visitor->Run(self);
   }
 
   // Resume other threads.
   {
-    TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
     MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (const auto& thread : other_threads) {
       thread->ModifySuspendCount(self, -1, nullptr, false);
@@ -477,7 +463,7 @@
     Thread::resume_cond_->Broadcast(self);
   }
 
-  return runnable_thread_count + other_threads.size() + 1;  // +1 for self.
+  return runnable_threads.size() + other_threads.size() + 1;  // +1 for self.
 }
 
 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {