Fail threads attaching during runtime shutdown.

Introduce counters to indicate that threads are being born. Don't allow
thread birth to occur during runtime shutdown.

Bug: 7000936

Change-Id: Ib0d78f78c0ff126a4b5d3b5a6f1a2ff8f5061ae9
diff --git a/src/compiler.cc b/src/compiler.cc
index af8292f..edadc12 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -1019,7 +1019,7 @@
     WorkerThread* worker = reinterpret_cast<WorkerThread*>(arg);
     Runtime* runtime = Runtime::Current();
     if (worker->spawn_) {
-      runtime->AttachCurrentThread("Compiler Worker", true, NULL);
+      CHECK(runtime->AttachCurrentThread("Compiler Worker", true, NULL));
     }
     worker->Run();
     if (worker->spawn_) {
diff --git a/src/debugger.cc b/src/debugger.cc
index 8477054..ee52984 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -1492,8 +1492,14 @@
         // query all threads, so it's easier if we just don't tell them about this thread.
         return;
       }
-      if (thread_group_ == NULL || t->GetThreadGroup(soa_) == thread_group_) {
-        thread_ids_.push_back(gRegistry->Add(soa_.Decode<Object*>(t->GetPeer())));
+      bool should_add = (thread_group_ == NULL);
+      Object* peer = soa_.Decode<Object*>(t->GetPeer());
+      if (!should_add) {
+        Object* group = soa_.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(peer);
+        should_add = (group == thread_group_);
+      }
+      if (should_add) {
+        thread_ids_.push_back(gRegistry->Add(peer));
       }
     }
 
diff --git a/src/heap.cc b/src/heap.cc
index 3ab6419..703549f 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -869,6 +869,10 @@
   Locks::mutator_lock_->AssertNotHeld(self);
   DCHECK_EQ(self->GetState(), kWaitingPerformingGc);
 
+  if (self->IsHandlingStackOverflow()) {
+    LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
+  }
+
   // Ensure there is only one GC at a time.
   bool start_collect = false;
   while (!start_collect) {
@@ -975,7 +979,7 @@
       }
     }
 
-    WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     if (gc_type == kGcTypePartial) {
       // Copy the mark bits over from the live bits, do this as early as possible or else we can
       // accidentally un-mark roots.
@@ -1921,13 +1925,24 @@
 
 void Heap::RequestConcurrentGC() {
   // Make sure that we can do a concurrent GC.
-  if (requesting_gc_ || !Runtime::Current()->IsFinishedStarting() ||
-      Runtime::Current()->IsShuttingDown() || !Runtime::Current()->IsConcurrentGcEnabled()) {
+  Runtime* runtime = Runtime::Current();
+  if (requesting_gc_ || runtime == NULL || !runtime->IsFinishedStarting() ||
+      !runtime->IsConcurrentGcEnabled()) {
+    return;
+  }
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    if (runtime->IsShuttingDown()) {
+      return;
+    }
+  }
+  if (self->IsHandlingStackOverflow()) {
     return;
   }
 
   requesting_gc_ = true;
-  JNIEnv* env = Thread::Current()->GetJniEnv();
+  JNIEnv* env = self->GetJniEnv();
   DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
   DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != NULL);
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
@@ -1937,8 +1952,11 @@
 }
 
 void Heap::ConcurrentGC(Thread* self) {
-  if (Runtime::Current()->IsShuttingDown() || !concurrent_gc_) {
-    return;
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    if (Runtime::Current()->IsShuttingDown() || !concurrent_gc_) {
+      return;
+    }
   }
 
   // TODO: We shouldn't need a WaitForConcurrentGcToComplete here since only
@@ -1976,13 +1994,20 @@
       return;
     }
   }
-  if (!Runtime::Current()->IsFinishedStarting() || Runtime::Current()->IsShuttingDown()) {
-    // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
-    // Also: we do not wish to start a heap trim if the runtime is shutting down.
-    return;
+
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    Runtime* runtime = Runtime::Current();
+    if (runtime == NULL || !runtime->IsFinishedStarting() || runtime->IsShuttingDown()) {
+      // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
+      // Also: we do not wish to start a heap trim if the runtime is shutting down (a racy check
+      // as we don't hold the lock while requesting the trim).
+      return;
+    }
   }
   last_trim_time_ = ms_time;
-  JNIEnv* env = Thread::Current()->GetJniEnv();
+  JNIEnv* env = self->GetJniEnv();
   DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
   DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != NULL);
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
diff --git a/src/jdwp/jdwp_main.cc b/src/jdwp/jdwp_main.cc
index a09c488..0c3a9a0 100644
--- a/src/jdwp/jdwp_main.cc
+++ b/src/jdwp/jdwp_main.cc
@@ -284,7 +284,7 @@
 
 void JdwpState::Run() {
   Runtime* runtime = Runtime::Current();
-  runtime->AttachCurrentThread("JDWP", true, runtime->GetSystemThreadGroup());
+  CHECK(runtime->AttachCurrentThread("JDWP", true, runtime->GetSystemThreadGroup()));
 
   VLOG(jdwp) << "JDWP: thread running";
 
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 0f93461..75ab1f0 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -481,9 +481,13 @@
     thread_group = args->group;
   }
 
-  runtime->AttachCurrentThread(thread_name, as_daemon, thread_group);
-  *p_env = Thread::Current()->GetJniEnv();
-  return JNI_OK;
+  if (!runtime->AttachCurrentThread(thread_name, as_daemon, thread_group)) {
+    *p_env = NULL;
+    return JNI_ERR;
+  } else {
+    *p_env = Thread::Current()->GetJniEnv();
+    return JNI_OK;
+  }
 }
 
 class SharedLibrary {
@@ -761,13 +765,11 @@
   }
 
   static jboolean ExceptionCheck(JNIEnv* env) {
-    ScopedObjectAccess soa(env);
-    return soa.Self()->IsExceptionPending() ? JNI_TRUE : JNI_FALSE;
+    return static_cast<JNIEnvExt*>(env)->self->IsExceptionPending() ? JNI_TRUE : JNI_FALSE;
   }
 
   static void ExceptionClear(JNIEnv* env) {
-    ScopedObjectAccess soa(env);
-    soa.Self()->ClearException();
+    static_cast<JNIEnvExt*>(env)->self->ClearException();
   }
 
   static void ExceptionDescribe(JNIEnv* env) {
@@ -907,8 +909,7 @@
 
   static jboolean IsSameObject(JNIEnv* env, jobject obj1, jobject obj2) {
     ScopedObjectAccess soa(env);
-    return (soa.Decode<Object*>(obj1) == soa.Decode<Object*>(obj2))
-        ? JNI_TRUE : JNI_FALSE;
+    return (soa.Decode<Object*>(obj1) == soa.Decode<Object*>(obj2)) ? JNI_TRUE : JNI_FALSE;
   }
 
   static jobject AllocObject(JNIEnv* env, jclass java_class) {
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index 329b51c..469a26c 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -1244,7 +1244,7 @@
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
     env_->DeleteLocalRef(s);
-    check_jni_abort_catcher.Check("native code passing in reference to invalid local reference: 0x1400001");
+    check_jni_abort_catcher.Check("native code passing in reference to invalid local reference: 0x1500001");
   }
 
   s = env_->NewStringUTF("");
@@ -1325,7 +1325,7 @@
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
     env_->DeleteGlobalRef(o);
-    check_jni_abort_catcher.Check("native code passing in reference to invalid global reference: 0x100056");
+    check_jni_abort_catcher.Check("native code passing in reference to invalid global reference: 0x10005a");
   }
 
   jobject o1 = env_->NewGlobalRef(s);
diff --git a/src/locks.cc b/src/locks.cc
index 20bf81c..312b021 100644
--- a/src/locks.cc
+++ b/src/locks.cc
@@ -20,41 +20,45 @@
 
 namespace art {
 
-ReaderWriterMutex* Locks::mutator_lock_ = NULL;
-Mutex* Locks::thread_list_lock_ = NULL;
+Mutex* Locks::abort_lock_ = NULL;
 Mutex* Locks::classlinker_classes_lock_ = NULL;
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = NULL;
-Mutex* Locks::abort_lock_ = NULL;
 Mutex* Locks::logging_lock_ = NULL;
-Mutex* Locks::unexpected_signal_lock_ = NULL;
+ReaderWriterMutex* Locks::mutator_lock_ = NULL;
+Mutex* Locks::runtime_shutdown_lock_ = NULL;
+Mutex* Locks::thread_list_lock_ = NULL;
 Mutex* Locks::thread_suspend_count_lock_ = NULL;
+Mutex* Locks::unexpected_signal_lock_ = NULL;
 
 void Locks::Init() {
   if (logging_lock_ != NULL) {
     // Already initialized.
-    DCHECK(mutator_lock_ != NULL);
-    DCHECK(thread_list_lock_ != NULL);
+    DCHECK(abort_lock_ != NULL);
     DCHECK(classlinker_classes_lock_ != NULL);
     DCHECK(heap_bitmap_lock_ != NULL);
-    DCHECK(abort_lock_ != NULL);
     DCHECK(logging_lock_ != NULL);
-    DCHECK(unexpected_signal_lock_ != NULL);
+    DCHECK(mutator_lock_ != NULL);
+    DCHECK(thread_list_lock_ != NULL);
     DCHECK(thread_suspend_count_lock_ != NULL);
+    DCHECK(unexpected_signal_lock_ != NULL);
   } else {
     logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
     abort_lock_ = new Mutex("abort lock", kAbortLock, true);
-    DCHECK(mutator_lock_ == NULL);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(thread_list_lock_ == NULL);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
+
     DCHECK(classlinker_classes_lock_ == NULL);
     classlinker_classes_lock_ = new Mutex("ClassLinker classes lock", kClassLinkerClassesLock);
     DCHECK(heap_bitmap_lock_ == NULL);
     heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(unexpected_signal_lock_ == NULL);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+    DCHECK(mutator_lock_ == NULL);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
+    DCHECK(runtime_shutdown_lock_ == NULL);
+    runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", kRuntimeShutdownLock);
+    DCHECK(thread_list_lock_ == NULL);
+    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
     DCHECK(thread_suspend_count_lock_ == NULL);
     thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
+    DCHECK(unexpected_signal_lock_ == NULL);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
   }
 }
 
diff --git a/src/locks.h b/src/locks.h
index c5821d8..cd2f8b7 100644
--- a/src/locks.h
+++ b/src/locks.h
@@ -42,10 +42,11 @@
   kLoadLibraryLock = 7,
   kClassLinkerClassesLock = 8,
   kThreadListLock = 9,
-  kHeapBitmapLock = 10,
-  kMonitorLock = 11,
-  kMutatorLock = 12,
-  kZygoteCreationLock = 13,
+  kRuntimeShutdownLock = 10,
+  kHeapBitmapLock = 11,
+  kMonitorLock = 12,
+  kMutatorLock = 13,
+  kZygoteCreationLock = 14,
   kMaxMutexLevel = kMutatorLock,
 };
 std::ostream& operator<<(std::ostream& os, const LockLevel& rhs);
@@ -118,9 +119,12 @@
   // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
   static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
 
+  // Guards shutdown of the runtime.
+  static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
 
   // Guards lists of classes within the class linker.
   static Mutex* classlinker_classes_lock_ ACQUIRED_AFTER(thread_list_lock_);
diff --git a/src/mutex.cc b/src/mutex.cc
index 47ad582..522f849 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -98,14 +98,15 @@
 
 BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {}
 
-static void CheckUnattachedThread(LockLevel level) {
+static void CheckUnattachedThread(LockLevel level) NO_THREAD_SAFETY_ANALYSIS {
   // The check below enumerates the cases where we expect not to be able to sanity check locks
-  // on a thread. TODO: tighten this check.
+  // on a thread. Lock checking is disabled to avoid deadlock when checking shutdown lock.
+  // TODO: tighten this check.
   if (kDebugLocking) {
     Runtime* runtime = Runtime::Current();
     CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
-          level == kDefaultMutexLevel  || level == kThreadListLock ||
-          level == kLoggingLock || level == kAbortLock);
+          level == kDefaultMutexLevel  || level == kRuntimeShutdownLock ||
+          level == kThreadListLock || level == kLoggingLock || level == kAbortLock);
   }
 }
 
@@ -195,7 +196,9 @@
   if (rc != 0) {
     errno = rc;
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
-    bool shutting_down = Runtime::Current()->IsShuttingDown();
+    MutexLock mu(*Locks::runtime_shutdown_lock_);
+    Runtime* runtime = Runtime::Current();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
 }
@@ -326,8 +329,10 @@
   if (rc != 0) {
     errno = rc;
     // TODO: should we just not log at all if shutting down? this could be the logging mutex!
-    bool shutting_down = Runtime::Current()->IsShuttingDown();
-    PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
+    MutexLock mu(*Locks::runtime_shutdown_lock_);
+    Runtime* runtime = Runtime::Current();
+    bool shutting_down = runtime == NULL || runtime->IsShuttingDown();
+    PLOG(shutting_down ? WARNING : FATAL) << "pthread_rwlock_destroy failed for " << name_;
   }
 #endif
 }
@@ -581,7 +586,9 @@
   int rc = pthread_cond_destroy(&cond_);
   if (rc != 0) {
     errno = rc;
-    bool shutting_down = Runtime::Current()->IsShuttingDown();
+    MutexLock mu(*Locks::runtime_shutdown_lock_);
+    Runtime* runtime = Runtime::Current();
+    bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_cond_destroy failed for " << name_;
   }
 }
diff --git a/src/oat/runtime/support_jni.cc b/src/oat/runtime/support_jni.cc
index 60bcf08..e1ae530 100644
--- a/src/oat/runtime/support_jni.cc
+++ b/src/oat/runtime/support_jni.cc
@@ -47,6 +47,7 @@
 // Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
 extern uint32_t JniMethodStart(Thread* self) UNLOCK_FUNCTION(GlobalSynchronizatio::mutator_lock_) {
   JNIEnvExt* env = self->GetJniEnv();
+  DCHECK(env != NULL);
   uint32_t saved_local_ref_cookie = env->local_ref_cookie;
   env->local_ref_cookie = env->locals.GetSegmentState();
   self->TransitionFromRunnableToSuspended(kNative);
diff --git a/src/oat/runtime/support_throw.cc b/src/oat/runtime/support_throw.cc
index e68e946..887e743 100644
--- a/src/oat/runtime/support_throw.cc
+++ b/src/oat/runtime/support_throw.cc
@@ -19,6 +19,7 @@
 #include "object_utils.h"
 #include "runtime_support.h"
 #include "thread.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -87,18 +88,31 @@
   thread->DeliverException();
 }
 
-extern "C" void artThrowStackOverflowFromCode(Thread* thread, AbstractMethod** sp)
+extern "C" void artThrowStackOverflowFromCode(Thread* self, AbstractMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  CHECK(!self->IsHandlingStackOverflow()) << "Recursive stack overflow.";
   // Remove extra entry pushed onto second stack during method tracing.
   if (Runtime::Current()->IsMethodTracingActive()) {
-    TraceMethodUnwindFromCode(thread);
+    TraceMethodUnwindFromCode(self);
   }
-  thread->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
-  thread->ThrowNewExceptionF("Ljava/lang/StackOverflowError;", "stack size %s",
-                             PrettySize(thread->GetStackSize()).c_str());
-  thread->ResetDefaultStackEnd();  // Return to default stack size.
-  thread->DeliverException();
+  self->SetStackEndForStackOverflow();  // Allow space on the stack for constructor to execute.
+  JNIEnvExt* env = self->GetJniEnv();
+  std::string msg("stack size ");
+  msg += PrettySize(self->GetStackSize());
+  // Use low-level JNI routine and pre-baked error class to avoid class linking operations that
+  // would consume more stack.
+  int rc = ::art::ThrowNewException(env, WellKnownClasses::java_lang_StackOverflowError,
+                                    msg.c_str(), NULL);
+  if (rc != JNI_OK) {
+    // TODO: ThrowNewException failed presumably because of an OOME, we continue to throw the OOME
+    //       or die in the CHECK below. We may want to throw a pre-baked StackOverflowError
+    //       instead.
+    LOG(ERROR) << "Couldn't throw new StackOverflowError because JNI ThrowNew failed.";
+    CHECK(self->IsExceptionPending());
+  }
+  self->ResetDefaultStackEnd();  // Return to default stack size.
+  self->DeliverException();
 }
 
 extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self,
diff --git a/src/runtime.cc b/src/runtime.cc
index 2b9a28d..61d93f8 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -75,7 +75,10 @@
       abstract_method_error_stub_array_(NULL),
       resolution_method_(NULL),
       system_class_loader_(NULL),
+      threads_being_born_(0),
+      shutdown_cond_(new ConditionVariable("Runtime shutdown")),
       shutting_down_(false),
+      shutting_down_started_(false),
       started_(false),
       finished_starting_(false),
       vfprintf_(NULL),
@@ -111,14 +114,22 @@
 }
 
 Runtime::~Runtime() {
-  shutting_down_ = true;
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    shutting_down_started_ = true;
+    while (threads_being_born_ > 0) {
+      shutdown_cond_->Wait(self, *Locks::runtime_shutdown_lock_);
+    }
+    shutting_down_ = true;
+  }
 
   if (IsMethodTracingActive()) {
     Trace::Shutdown();
   }
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForConcurrentGcToComplete(Thread::Current());
+  heap_->WaitForConcurrentGcToComplete(self);
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
@@ -619,6 +630,14 @@
   finished_starting_ = true;
 }
 
+void Runtime::EndThreadBirth() EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
+  DCHECK_GT(threads_being_born_, 0U);
+  threads_being_born_--;
+  if (shutting_down_started_ && threads_being_born_ == 0) {
+    shutdown_cond_->Broadcast();
+  }
+}
+
 void Runtime::DidForkFromZygote() {
   is_zygote_ = false;
 
@@ -705,10 +724,11 @@
 
   // ClassLinker needs an attached thread, but we can't fully attach a thread
   // without creating objects. We can't supply a thread group yet; it will be fixed later.
-  Thread::Attach("main", false, NULL);
+  Thread* self = Thread::Attach("main", false, NULL);
+  CHECK(self != NULL);
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
-  Thread::Current()->TransitionFromSuspendedToRunnable();
+  self->TransitionFromSuspendedToRunnable();
 
   // Now we're attached, we can take the heap lock and validate the heap.
   GetHeap()->EnableObjectValidation();
@@ -903,11 +923,12 @@
   signals.Block();
 }
 
-void Runtime::AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group) {
-  Thread::Attach(thread_name, as_daemon, thread_group);
+bool Runtime::AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group) {
+  bool success = Thread::Attach(thread_name, as_daemon, thread_group) != NULL;
   if (thread_name == NULL) {
     LOG(WARNING) << *Thread::Current() << " attached without supplying a name";
   }
+  return success;
 }
 
 void Runtime::DetachCurrentThread() {
diff --git a/src/runtime.h b/src/runtime.h
index 5f277a1..ba37f40 100644
--- a/src/runtime.h
+++ b/src/runtime.h
@@ -30,6 +30,7 @@
 #include "instruction_set.h"
 #include "jobject_comparator.h"
 #include "macros.h"
+#include "locks.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
 #include "stringpiece.h"
@@ -119,10 +120,20 @@
   // Starts a runtime, which may cause threads to be started and code to run.
   void Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
-  bool IsShuttingDown() const {
+  bool IsShuttingDown() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
     return shutting_down_;
   }
 
+  size_t NumberOfThreadsBeingBorn() const EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
+    return threads_being_born_;
+  }
+
+  void StartThreadBirth() EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_) {
+    threads_being_born_++;
+  }
+
+  void EndThreadBirth() EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
+
   bool IsStarted() const {
     return started_;
   }
@@ -149,7 +160,7 @@
   jobject GetSystemThreadGroup() const;
 
   // Attaches the calling native thread to the runtime.
-  void AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group);
+  bool AttachCurrentThread(const char* thread_name, bool as_daemon, jobject thread_group);
 
   void CallExitHook(jint status);
 
@@ -405,7 +416,19 @@
   // As returned by ClassLoader.getSystemClassLoader()
   ClassLoader* system_class_loader_;
 
-  bool shutting_down_;
+  // A non-zero value indicates that a thread has been created but not yet initialized. Guarded by
+  // the shutdown lock so that threads aren't born while we're shutting down.
+  size_t threads_being_born_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+
+  // Waited upon until no threads are being born.
+  UniquePtr<ConditionVariable> shutdown_cond_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+
+  // Set when runtime shutdown is past the point that new threads may attach.
+  bool shutting_down_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+
+  // The runtime is starting to shutdown but is blocked waiting on shutdown_cond_.
+  bool shutting_down_started_ GUARDED_BY(Locks::runtime_shutdown_lock_);
+
   bool started_;
 
   // New flag added which tells us if the runtime has finished starting. If
diff --git a/src/scoped_thread_state_change.h b/src/scoped_thread_state_change.h
index b36922e..9da41e0 100644
--- a/src/scoped_thread_state_change.h
+++ b/src/scoped_thread_state_change.h
@@ -35,7 +35,9 @@
     if (self_ == NULL) {
       // Value chosen arbitrarily and won't be used in the destructor since thread_ == NULL.
       old_thread_state_ = kTerminated;
-      CHECK(!Runtime::Current()->IsStarted() || Runtime::Current()->IsShuttingDown());
+      MutexLock mu(*Locks::runtime_shutdown_lock_);
+      Runtime* runtime = Runtime::Current();
+      CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown());
     } else {
       bool runnable_transition;
       DCHECK_EQ(self, Thread::Current());
@@ -61,7 +63,10 @@
   ~ScopedThreadStateChange() LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) {
     if (self_ == NULL) {
       if (!expected_has_no_thread_) {
-        CHECK(Runtime::Current()->IsShuttingDown());
+        MutexLock mu(*Locks::runtime_shutdown_lock_);
+        Runtime* runtime = Runtime::Current();
+        bool shutting_down = (runtime == NULL) || runtime->IsShuttingDown();
+        CHECK(shutting_down);
       }
     } else {
       if (old_thread_state_ != thread_state_) {
diff --git a/src/signal_catcher.cc b/src/signal_catcher.cc
index c1ac688..8376bb6 100644
--- a/src/signal_catcher.cc
+++ b/src/signal_catcher.cc
@@ -119,23 +119,14 @@
   Runtime* runtime = Runtime::Current();
   ThreadList* thread_list = runtime->GetThreadList();
 
+  // Grab exclusively the mutator lock, set state to Runnable without checking for a pending
+  // suspend request as we're going to suspend soon anyway. We set the state to Runnable to avoid
+  // giving away the mutator lock.
   thread_list->SuspendAll();
-
-  // We should exclusively hold the mutator lock, set state to Runnable without a pending
-  // suspension to avoid giving away or trying to re-acquire the mutator lock.
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertExclusiveHeld(self);
-  ThreadState old_state;
-  int suspend_count;
-  {
-    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-    suspend_count = self->GetSuspendCount();
-    if (suspend_count != 0) {
-      CHECK_EQ(suspend_count, 1);
-      self->ModifySuspendCount(self, -1, false);
-    }
-    old_state = self->SetStateUnsafe(kRunnable);
-  }
+  const char* old_cause = self->StartAssertNoThreadSuspension("Handling sigquit");
+  ThreadState old_state = self->SetStateUnsafe(kRunnable);
 
   std::ostringstream os;
   os << "\n"
@@ -153,15 +144,10 @@
       os << "/proc/self/maps:\n" << maps;
     }
   }
-
   os << "----- end " << getpid() << " -----\n";
-  {
-    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-    self->SetState(old_state);
-    if (suspend_count != 0) {
-      self->ModifySuspendCount(self, +1, false);
-    }
-  }
+
+  CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
+  self->EndAssertNoThreadSuspension(old_cause);
   thread_list->ResumeAll();
 
   Output(os.str());
@@ -197,7 +183,7 @@
   CHECK(signal_catcher != NULL);
 
   Runtime* runtime = Runtime::Current();
-  runtime->AttachCurrentThread("Signal Catcher", true, runtime->GetSystemThreadGroup());
+  CHECK(runtime->AttachCurrentThread("Signal Catcher", true, runtime->GetSystemThreadGroup()));
 
   Thread* self = Thread::Current();
 
diff --git a/src/thread.cc b/src/thread.cc
index 5d1afe1..bd58e6a 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -103,15 +103,25 @@
 
 void* Thread::CreateCallback(void* arg) {
   Thread* self = reinterpret_cast<Thread*>(arg);
-  self->Init();
-
+  Runtime* runtime = Runtime::Current();
+  if (runtime == NULL) {
+    LOG(ERROR) << "Thread attaching to non-existent runtime: " << *self;
+    return NULL;
+  }
+  {
+    MutexLock mu(*Locks::runtime_shutdown_lock_);
+    // Check that if we got here we cannot be shutting down (as shutdown should never have started
+    // while threads are being born).
+    CHECK(!runtime->IsShuttingDown());
+    self->Init(runtime->GetThreadList(), runtime->GetJavaVM());
+    Runtime::Current()->EndThreadBirth();
+  }
   {
     ScopedObjectAccess soa(self);
     {
       SirtRef<String> thread_name(self->GetThreadName(soa));
       self->SetThreadName(thread_name->ToModifiedUtf8().c_str());
     }
-
     Dbg::PostThreadStart(self);
 
     // Invoke the 'run' method of our java.lang.Thread.
@@ -121,20 +131,12 @@
     AbstractMethod* m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     m->Invoke(self, receiver, NULL, NULL);
   }
-
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);
 
   return NULL;
 }
 
-static void SetVmData(const ScopedObjectAccess& soa, Object* managed_thread,
-                      Thread* native_thread)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Field* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
-  f->SetInt(managed_thread, reinterpret_cast<uintptr_t>(native_thread));
-}
-
 Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, Object* thread_peer) {
   Field* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
@@ -216,51 +218,68 @@
   delete[] allocated_signal_stack;
 }
 
-void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool daemon) {
+void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
   CHECK(java_peer != NULL);
+  Thread* self = static_cast<JNIEnvExt*>(env)->self;
+  Runtime* runtime = Runtime::Current();
 
-  Thread* native_thread = new Thread(daemon);
+  // Atomically start the birth of the thread ensuring the runtime isn't shutting down.
+  bool thread_start_during_shutdown = false;
   {
-    ScopedObjectAccess soa(env);
-    // Use global JNI ref to hold peer live whilst child thread starts.
-    native_thread->peer_ = env->NewGlobalRef(java_peer);
-    stack_size = FixStackSize(stack_size);
-
-    // Thread.start is synchronized, so we know that vmData is 0, and know that we're not racing to
-    // assign it.
-    Object* peer = soa.Decode<Object*>(native_thread->peer_);
-    CHECK(peer != NULL);
-    SetVmData(soa, peer, native_thread);
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    if (runtime->IsShuttingDown()) {
+      thread_start_during_shutdown = true;
+    } else {
+      runtime->StartThreadBirth();
+    }
   }
+  if (thread_start_during_shutdown) {
+    ScopedLocalRef<jclass> error_class(env, env->FindClass("java/lang/InternalError"));
+    env->ThrowNew(error_class.get(), "Thread starting during runtime shutdown");
+    return;
+  }
+
+  Thread* child_thread = new Thread(is_daemon);
+  // Use global JNI ref to hold peer live while child thread starts.
+  child_thread->peer_ = env->NewGlobalRef(java_peer);
+  stack_size = FixStackSize(stack_size);
+
+  // Thread.start is synchronized, so we know that vmData is 0, and know that we're not racing to
+  // assign it.
+  env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_vmData,
+                   reinterpret_cast<jint>(child_thread));
 
   pthread_t new_pthread;
   pthread_attr_t attr;
   CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
   CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED), "PTHREAD_CREATE_DETACHED");
   CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
-  int pthread_create_result = pthread_create(&new_pthread, &attr, Thread::CreateCallback, native_thread);
+  int pthread_create_result = pthread_create(&new_pthread, &attr, Thread::CreateCallback, child_thread);
   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
 
-  if (UNLIKELY(pthread_create_result != 0)) {
+  if (pthread_create_result != 0) {
     // pthread_create(3) failed, so clean up.
     {
-      ScopedObjectAccess soa(env);
-      Object* peer = soa.Decode<Object*>(java_peer);
-      SetVmData(soa, peer, 0);
-
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      runtime->EndThreadBirth();
+    }
+    // Manually delete the global reference since Thread::Init will not have been run.
+    env->DeleteGlobalRef(child_thread->peer_);
+    child_thread->peer_ = NULL;
+    delete child_thread;
+    child_thread = NULL;
+    // TODO: remove from thread group?
+    env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_vmData, 0);
+    {
       std::string msg(StringPrintf("pthread_create (%s stack) failed: %s",
                                    PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
-      Thread::Current()->ThrowOutOfMemoryError(msg.c_str());
+      ScopedObjectAccess soa(env);
+      soa.Self()->ThrowOutOfMemoryError(msg.c_str());
     }
-    // If we failed, manually delete the global reference since Thread::Init will not have been run.
-    env->DeleteGlobalRef(native_thread->peer_);
-    native_thread->peer_ = NULL;
-    delete native_thread;
-    return;
   }
 }
 
-void Thread::Init() {
+void Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm) {
   // This function does all the initialization that must be run by the native thread it applies to.
   // (When we create a new thread from managed code, we allocate the Thread* in Thread::Create so
   // we can handshake with the corresponding native thread when it's ready.) Check this native
@@ -276,26 +295,38 @@
   InitCardTable();
   InitTid();
 
-  Runtime* runtime = Runtime::Current();
-  CHECK(runtime != NULL);
-  if (runtime->IsShuttingDown()) {
-    UNIMPLEMENTED(WARNING) << "Thread attaching whilst runtime is shutting down";
-  }
-  thin_lock_id_ = runtime->GetThreadList()->AllocThreadId();
+  // Set pthread_self_ ahead of pthread_setspecific, that makes Thread::Current function, this
+  // avoids pthread_self_ ever being invalid when discovered from Thread::Current().
   pthread_self_ = pthread_self();
+  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
+  DCHECK_EQ(Thread::Current(), this);
 
+  thin_lock_id_ = thread_list->AllocThreadId();
   InitStackHwm();
 
-  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, this), "attach self");
-
-  jni_env_ = new JNIEnvExt(this, runtime->GetJavaVM());
-
-  runtime->GetThreadList()->Register(this);
+  jni_env_ = new JNIEnvExt(this, java_vm);
+  thread_list->Register(this);
 }
 
 Thread* Thread::Attach(const char* thread_name, bool as_daemon, jobject thread_group) {
-  Thread* self = new Thread(as_daemon);
-  self->Init();
+  Thread* self;
+  Runtime* runtime = Runtime::Current();
+  if (runtime == NULL) {
+    LOG(ERROR) << "Thread attaching to non-existent runtime: " << thread_name;
+    return NULL;
+  }
+  {
+    MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
+    if (runtime->IsShuttingDown()) {
+      LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
+      return NULL;
+    } else {
+      Runtime::Current()->StartThreadBirth();
+      self = new Thread(as_daemon);
+      self->Init(runtime->GetThreadList(), runtime->GetJavaVM());
+      Runtime::Current()->EndThreadBirth();
+    }
+  }
 
   CHECK_NE(self->GetState(), kRunnable);
   self->SetState(kNative);
@@ -341,11 +372,15 @@
                                 thread_group, thread_name.get(), thread_priority, thread_is_daemon);
   AssertNoPendingException();
 
-  ScopedObjectAccess soa(this);
-  Object* native_peer = soa.Decode<Object*>(peer.get());
-  SetVmData(soa, native_peer, Thread::Current());
+  Thread* self = this;
+  DCHECK_EQ(self, Thread::Current());
+  jni_env_->SetIntField(peer.get(), WellKnownClasses::java_lang_Thread_vmData,
+                        reinterpret_cast<jint>(self));
+
+  ScopedObjectAccess soa(self);
   SirtRef<String> peer_thread_name(GetThreadName(soa));
   if (peer_thread_name.get() == NULL) {
+    Object* native_peer = soa.Decode<Object*>(peer.get());
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
     // available (in the compiler, in tests), we manually assign the
@@ -375,7 +410,7 @@
 void Thread::InitStackHwm() {
   void* stack_base;
   size_t stack_size;
-  GetThreadStack(stack_base, stack_size);
+  GetThreadStack(pthread_self_, stack_base, stack_size);
 
   // TODO: include this in the thread dumps; potentially useful in SIGQUIT output?
   VLOG(threads) << StringPrintf("Native stack is at %p (%s)", stack_base, PrettySize(stack_size).c_str());
@@ -392,7 +427,8 @@
   // If we're the main thread, check whether we were run with an unlimited stack. In that case,
   // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
   // will be broken because we'll die long before we get close to 2GB.
-  if (thin_lock_id_ == 1) {
+  bool is_main_thread = (::art::GetTid() == getpid());
+  if (is_main_thread) {
     rlimit stack_limit;
     if (getrlimit(RLIMIT_STACK, &stack_limit) == -1) {
       PLOG(FATAL) << "getrlimit(RLIMIT_STACK) failed";
@@ -474,6 +510,7 @@
 
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
 static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
+  LOG(ERROR) << *thread << " suspend count already zero.";
   Locks::thread_suspend_count_lock_->Unlock(self);
   if (!Locks::mutator_lock_->IsSharedHeld(self)) {
     Locks::mutator_lock_->SharedTryLock(self);
@@ -489,7 +526,7 @@
   }
   std::ostringstream ss;
   Runtime::Current()->GetThreadList()->DumpLocked(ss);
-  LOG(FATAL) << *thread << " suspend count already zero.\n" << ss.str();
+  LOG(FATAL) << ss.str();
 }
 
 void Thread::ModifySuspendCount(Thread* self, int delta, bool for_debugger) {
@@ -659,7 +696,9 @@
     priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->GetInt(native_peer);
     is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->GetBoolean(native_peer);
 
-    Object* thread_group = thread->GetThreadGroup(soa);
+    Object* thread_group =
+        soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(native_peer);
+
     if (thread_group != NULL) {
       Field* group_name_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
       String* group_name_string = reinterpret_cast<String*>(group_name_field->GetObject(thread_group));
@@ -932,36 +971,39 @@
 }
 
 void Thread::Destroy() {
-  // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
-  if (jni_env_ != NULL) {
-    jni_env_->monitors.VisitRoots(MonitorExitVisitor, Thread::Current());
-  }
+  Thread* self = this;
+  DCHECK_EQ(self, Thread::Current());
 
   if (peer_ != NULL) {
-    Thread* self = this;
-
-    // We may need to call user-supplied managed code.
-    ScopedObjectAccess soa(this);
-
-    HandleUncaughtExceptions(soa);
-    RemoveFromThreadGroup(soa);
+    // We may need to call user-supplied managed code, do this before final clean-up.
+    HandleUncaughtExceptions();
+    RemoveFromThreadGroup();
 
     // this.vmData = 0;
-    SetVmData(soa, soa.Decode<Object*>(peer_), NULL);
+    jni_env_->SetIntField(peer_, WellKnownClasses::java_lang_Thread_vmData, 0);
 
-    Dbg::PostThreadDeath(self);
+    {
+      ScopedObjectAccess soa(self);
+      Dbg::PostThreadDeath(self);
+    }
 
     // Thread.join() is implemented as an Object.wait() on the Thread.lock
     // object. Signal anyone who is waiting.
-    Object* lock = soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->
-        GetObject(soa.Decode<Object*>(peer_));
+    ScopedLocalRef<jobject> lock(jni_env_,
+                                 jni_env_->GetObjectField(peer_,
+                                                          WellKnownClasses::java_lang_Thread_lock));
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
-    if (lock != NULL) {
-      lock->MonitorEnter(self);
-      lock->NotifyAll();
-      lock->MonitorExit(self);
+    if (lock.get() != NULL) {
+      jni_env_->MonitorEnter(lock.get());
+      jni_env_->CallVoidMethod(lock.get(), WellKnownClasses::java_lang_Object_notify);
+      jni_env_->MonitorExit(lock.get());
     }
   }
+
+  // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
+  if (jni_env_ != NULL) {
+    jni_env_->monitors.VisitRoots(MonitorExitVisitor, self);
+  }
 }
 
 Thread::~Thread() {
@@ -992,50 +1034,42 @@
   TearDownAlternateSignalStack();
 }
 
-void Thread::HandleUncaughtExceptions(const ScopedObjectAccess& soa) {
+void Thread::HandleUncaughtExceptions() {
   if (!IsExceptionPending()) {
     return;
   }
+
   // Get and clear the exception.
-  Object* exception = GetException();
-  ClearException();
+  ScopedLocalRef<jthrowable> exception(jni_env_, jni_env_->ExceptionOccurred());
+  jni_env_->ExceptionClear();
 
   // If the thread has its own handler, use that.
-  Object* handler =
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_uncaughtHandler)->
-          GetObject(soa.Decode<Object*>(peer_));
-  if (handler == NULL) {
+  ScopedLocalRef<jobject> handler(jni_env_,
+                                  jni_env_->GetObjectField(peer_,
+                                                           WellKnownClasses::java_lang_Thread_uncaughtHandler));
+  if (handler.get() == NULL) {
     // Otherwise use the thread group's default handler.
-    handler = GetThreadGroup(soa);
+    handler.reset(jni_env_->GetObjectField(peer_, WellKnownClasses::java_lang_Thread_group));
   }
 
   // Call the handler.
-  jmethodID mid = WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
-  AbstractMethod* m = handler->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
-  JValue args[2];
-  args[0].SetL(soa.Decode<Object*>(peer_));
-  args[1].SetL(exception);
-  m->Invoke(this, handler, args, NULL);
+  jni_env_->CallVoidMethod(handler.get(),
+                           WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException,
+                           peer_, exception.get());
 
   // If the handler threw, clear that exception too.
-  ClearException();
+  jni_env_->ExceptionClear();
 }
 
-Object* Thread::GetThreadGroup(const ScopedObjectAccessUnchecked& soa) const {
-  return soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->
-      GetObject(soa.Decode<Object*>(peer_));
-}
-
-void Thread::RemoveFromThreadGroup(const ScopedObjectAccess& soa) {
+void Thread::RemoveFromThreadGroup() {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
-  Object* group = GetThreadGroup(soa);
-  if (group != NULL) {
-    jmethodID mid = WellKnownClasses::java_lang_ThreadGroup_removeThread;
-    AbstractMethod* m = group->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
-    JValue args[1];
-    args[0].SetL(soa.Decode<Object*>(peer_));
-    m->Invoke(this, group, args, NULL);
+  ScopedLocalRef<jobject> group(jni_env_,
+                                jni_env_->GetObjectField(peer_,
+                                                         WellKnownClasses::java_lang_Thread_group));
+  if (group.get() != NULL) {
+    jni_env_->CallVoidMethod(group.get(), WellKnownClasses::java_lang_ThreadGroup_removeThread,
+                             peer_);
   }
 }
 
diff --git a/src/thread.h b/src/thread.h
index 68172e5..1e80a80 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -49,6 +49,7 @@
 class Context;
 struct DebugInvokeReq;
 class DexFile;
+struct JavaVMExt;
 struct JNIEnvExt;
 class Monitor;
 class Object;
@@ -284,9 +285,6 @@
     return peer_ != NULL;
   }
 
-  Object* GetThreadGroup(const ScopedObjectAccessUnchecked& ts) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   RuntimeStats* GetStats() {
     return &stats_;
   }
@@ -359,8 +357,7 @@
 
   // OutOfMemoryError is special, because we need to pre-allocate an instance.
   // Only the GC should call this.
-  void ThrowOutOfMemoryError(const char* msg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ThrowOutOfMemoryError(const char* msg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   //QuickFrameIterator FindExceptionHandler(void* throw_pc, void** handler_pc);
 
@@ -456,6 +453,10 @@
     stack_end_ = stack_begin_ + kStackOverflowReservedBytes;
   }
 
+  bool IsHandlingStackOverflow() const {
+    return stack_end_ == stack_begin_;
+  }
+
   static ThreadOffset StackEndOffset() {
     return ThreadOffset(OFFSETOF_MEMBER(Thread, stack_end_));
   }
@@ -590,12 +591,10 @@
 
   static void* CreateCallback(void* arg);
 
-  void HandleUncaughtExceptions(const ScopedObjectAccess& soa)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void RemoveFromThreadGroup(const ScopedObjectAccess& soa)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void HandleUncaughtExceptions();
+  void RemoveFromThreadGroup();
 
-  void Init();
+  void Init(ThreadList*, JavaVMExt*) EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
   void InitCardTable();
   void InitCpu();
   void InitFunctionPointers();
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 56912ac..83f2658 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -374,10 +374,16 @@
 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
-  MutexLock mu(self, *Locks::thread_list_lock_);
   bool all_threads_are_daemons;
   do {
+    {
+      // No more threads can be born after we start to shutdown.
+      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+      CHECK(Runtime::Current()->IsShuttingDown());
+      CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
+    }
     all_threads_are_daemons = true;
+    MutexLock mu(self, *Locks::thread_list_lock_);
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       // TODO: there's a race here with thread exit that's being worked around by checking if the
       // thread has a peer.
diff --git a/src/thread_list.h b/src/thread_list.h
index f1c8a44..b5546e3 100644
--- a/src/thread_list.h
+++ b/src/thread_list.h
@@ -74,11 +74,9 @@
 
   // Add/remove current thread from list.
   void Register(Thread* self)
-      LOCKS_EXCLUDED(Locks::mutator_lock_,
-                     Locks::thread_list_lock_);
-  void Unregister(Thread* self)
-      LOCKS_EXCLUDED(Locks::mutator_lock_,
-                     Locks::thread_list_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_)
+      LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
+  void Unregister(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_, Locks::thread_list_lock_);
 
   void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/src/utils.cc b/src/utils.cc
index 534b28e..cbe07a2 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -73,10 +73,10 @@
   return result;
 }
 
-void GetThreadStack(void*& stack_base, size_t& stack_size) {
+void GetThreadStack(pthread_t thread, void*& stack_base, size_t& stack_size) {
 #if defined(__APPLE__)
-  stack_size = pthread_get_stacksize_np(pthread_self());
-  void* stack_addr = pthread_get_stackaddr_np(pthread_self());
+  stack_size = pthread_get_stacksize_np(thread);
+  void* stack_addr = pthread_get_stackaddr_np(thread);
 
   // Check whether stack_addr is the base or end of the stack.
   // (On Mac OS 10.7, it's the end.)
@@ -88,7 +88,7 @@
   }
 #else
   pthread_attr_t attributes;
-  CHECK_PTHREAD_CALL(pthread_getattr_np, (pthread_self(), &attributes), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_getattr_np, (thread, &attributes), __FUNCTION__);
   CHECK_PTHREAD_CALL(pthread_attr_getstack, (&attributes, &stack_base, &stack_size), __FUNCTION__);
   CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
 #endif
diff --git a/src/utils.h b/src/utils.h
index 7daf61c..719ce57 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -298,8 +298,8 @@
 // Returns the given thread's name.
 std::string GetThreadName(pid_t tid);
 
-// Returns details of the calling thread's stack.
-void GetThreadStack(void*& stack_base, size_t& stack_size);
+// Returns details of the given thread's stack.
+void GetThreadStack(pthread_t thread, void*& stack_base, size_t& stack_size);
 
 // Reads data from "/proc/self/task/${tid}/stat".
 void GetTaskStats(pid_t tid, char& state, int& utime, int& stime, int& task_cpu);
diff --git a/src/well_known_classes.cc b/src/well_known_classes.cc
index 1413f0b..03b9cb2 100644
--- a/src/well_known_classes.cc
+++ b/src/well_known_classes.cc
@@ -34,6 +34,7 @@
 jclass WellKnownClasses::java_lang_reflect_AbstractMethod;
 jclass WellKnownClasses::java_lang_reflect_Proxy;
 jclass WellKnownClasses::java_lang_RuntimeException;
+jclass WellKnownClasses::java_lang_StackOverflowError;
 jclass WellKnownClasses::java_lang_Thread;
 jclass WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler;
 jclass WellKnownClasses::java_lang_ThreadGroup;
@@ -56,6 +57,7 @@
 jmethodID WellKnownClasses::java_lang_Float_valueOf;
 jmethodID WellKnownClasses::java_lang_Integer_valueOf;
 jmethodID WellKnownClasses::java_lang_Long_valueOf;
+jmethodID WellKnownClasses::java_lang_Object_notify;
 jmethodID WellKnownClasses::java_lang_ref_FinalizerReference_add;
 jmethodID WellKnownClasses::java_lang_ref_ReferenceQueue_add;
 jmethodID WellKnownClasses::java_lang_reflect_InvocationHandler_invoke;
@@ -128,6 +130,7 @@
   java_lang_reflect_AbstractMethod = CacheClass(env, "java/lang/reflect/AbstractMethod");
   java_lang_reflect_Proxy = CacheClass(env, "java/lang/reflect/Proxy");
   java_lang_RuntimeException = CacheClass(env, "java/lang/RuntimeException");
+  java_lang_StackOverflowError = CacheClass(env, "java/lang/StackOverflowError");
   java_lang_Thread = CacheClass(env, "java/lang/Thread");
   java_lang_Thread$UncaughtExceptionHandler = CacheClass(env, "java/lang/Thread$UncaughtExceptionHandler");
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
@@ -149,6 +152,9 @@
   java_lang_Daemons_requestHeapTrim = CacheMethod(env, java_lang_Daemons, true, "requestHeapTrim", "()V");
   java_lang_Daemons_start = CacheMethod(env, java_lang_Daemons, true, "start", "()V");
 
+  ScopedLocalRef<jclass> java_lang_Object(env, env->FindClass("java/lang/Object"));
+  java_lang_Object_notify = CacheMethod(env, java_lang_Object.get(), false, "notify", "()V");
+
   ScopedLocalRef<jclass> java_lang_ref_FinalizerReference(env, env->FindClass("java/lang/ref/FinalizerReference"));
   java_lang_ref_FinalizerReference_add = CacheMethod(env, java_lang_ref_FinalizerReference.get(), true, "add", "(Ljava/lang/Object;)V");
   ScopedLocalRef<jclass> java_lang_ref_ReferenceQueue(env, env->FindClass("java/lang/ref/ReferenceQueue"));
diff --git a/src/well_known_classes.h b/src/well_known_classes.h
index 15a204f..1f4217d 100644
--- a/src/well_known_classes.h
+++ b/src/well_known_classes.h
@@ -45,6 +45,7 @@
   static jclass java_lang_reflect_AbstractMethod;
   static jclass java_lang_reflect_Proxy;
   static jclass java_lang_RuntimeException;
+  static jclass java_lang_StackOverflowError;
   static jclass java_lang_Thread;
   static jclass java_lang_ThreadGroup;
   static jclass java_lang_ThreadLock;
@@ -67,6 +68,7 @@
   static jmethodID java_lang_Float_valueOf;
   static jmethodID java_lang_Integer_valueOf;
   static jmethodID java_lang_Long_valueOf;
+  static jmethodID java_lang_Object_notify;
   static jmethodID java_lang_ref_FinalizerReference_add;
   static jmethodID java_lang_ref_ReferenceQueue_add;
   static jmethodID java_lang_reflect_InvocationHandler_invoke;