Fix a deadlock caused by my big threading change yesterday.
Also add even more locking diagnostics that catch this class of error and
explain what you've done in clear terms. In this case:
03-02 16:42:46.040 20768 20785 E art : holding ThreadListLock while doing condition variable wait on ThreadSuspendCountLock
...
03-02 16:42:46.493 23421 23421 I DEBUG : #01 pc 00149e5f /system/lib/libartd.so (art::Runtime::Abort(char const*, int)+338)
03-02 16:42:46.493 23421 23421 I DEBUG : #02 pc 00113c0b /system/lib/libartd.so (art::LogMessage::~LogMessage()+1026)
03-02 16:42:46.493 23421 23421 I DEBUG : #03 pc 0015a723 /system/lib/libartd.so (art::Thread::CheckSafeToWait(art::MutexRank)+290)
03-02 16:42:46.493 23421 23421 I DEBUG : #04 pc 0012481f /system/lib/libartd.so (art::ConditionVariable::Wait(art::Mutex&)+26)
03-02 16:42:46.493 23421 23421 I DEBUG : #05 pc 001631fb /system/lib/libartd.so (art::ThreadList::FullSuspendCheck(art::Thread*)+154)
03-02 16:42:46.493 23421 23421 I DEBUG : #06 pc 00159ffd /system/lib/libartd.so (art::Thread::SetState(art::Thread::State)+128)
03-02 16:42:46.493 23421 23421 I DEBUG : #07 pc 00161bb5 /system/lib/libartd.so (art::ScopedThreadListLock::ScopedThreadListLock()+64)
03-02 16:42:46.493 23421 23421 I DEBUG : #08 pc 00164a31 /system/lib/libartd.so (art::ThreadList::Unregister()+92)
03-02 16:42:46.493 23421 23421 I DEBUG : #09 pc 0015e537 /system/lib/libartd.so (art::Thread::CreateCallback(void*)+230)
03-02 16:42:46.493 23421 23421 I DEBUG : #10 pc 00013b08 /system/lib/libc.so (__thread_entry+48)
03-02 16:42:46.493 23421 23421 I DEBUG : #11 pc 0001363c /system/lib/libc.so (pthread_create+180)
Change-Id: I9cdb770e766f63359ab7d11ee1993dd7a6fc1c90
diff --git a/src/mutex.cc b/src/mutex.cc
index 963ac99..f7c3143 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -27,14 +27,23 @@
namespace art {
-static inline void CheckRank(MutexRank rank, bool is_locking) {
+static inline void CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking) {
#ifndef NDEBUG
if (rank == -1) {
return;
}
Thread* self = Thread::Current();
if (self != NULL) {
- self->CheckRank(rank, is_locking);
+ self->CheckSafeToLockOrUnlock(rank, is_locking);
+ }
+#endif
+}
+
+static inline void CheckSafeToWait(MutexRank rank) {
+#ifndef NDEBUG
+ Thread* self = Thread::Current();
+ if (self != NULL) {
+ self->CheckSafeToWait(rank);
}
#endif
}
@@ -58,8 +67,8 @@
}
void Mutex::Lock() {
+ CheckSafeToLockOrUnlock(rank_, true);
CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
- CheckRank(rank_, true);
AssertHeld();
}
@@ -72,15 +81,15 @@
errno = result;
PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
}
- CheckRank(rank_, true);
+ CheckSafeToLockOrUnlock(rank_, true);
AssertHeld();
return true;
}
void Mutex::Unlock() {
AssertHeld();
+ CheckSafeToLockOrUnlock(rank_, false);
CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
- CheckRank(rank_, false);
}
pid_t Mutex::GetOwner() {
@@ -151,6 +160,7 @@
}
void ConditionVariable::Wait(Mutex& mutex) {
+ CheckSafeToWait(mutex.GetRank());
CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, mutex.GetImpl()));
}
@@ -160,6 +170,7 @@
#else
#define TIMEDWAIT pthread_cond_timedwait
#endif
+ CheckSafeToWait(mutex.GetRank());
int rc = TIMEDWAIT(&cond_, mutex.GetImpl(), &ts);
if (rc != 0 && rc != ETIMEDOUT) {
errno = rc;
diff --git a/src/mutex.h b/src/mutex.h
index b4774d7..475c7dd 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -56,6 +56,10 @@
return &mutex_;
}
+ MutexRank GetRank() const {
+ return rank_;
+ }
+
void AssertHeld() {
#if !defined(__APPLE__)
DCHECK_EQ(GetOwner(), GetTid());
diff --git a/src/thread.cc b/src/thread.cc
index 1c521c1..dcaf01f 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -599,6 +599,12 @@
WalkStack(&dumper);
}
+void Thread::SetStateWithoutSuspendCheck(Thread::State new_state) {
+ volatile void* raw = reinterpret_cast<volatile void*>(&state_);
+ volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw);
+ android_atomic_release_store(new_state, addr);
+}
+
Thread::State Thread::SetState(Thread::State new_state) {
Thread::State old_state = state_;
if (old_state == new_state) {
@@ -1674,7 +1680,7 @@
return os;
}
-void Thread::CheckRank(MutexRank rank, bool is_locking) {
+void Thread::CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking) {
if (is_locking) {
if (held_mutexes_[rank] == 0) {
bool bad_mutexes_held = false;
@@ -1693,4 +1699,19 @@
}
}
+void Thread::CheckSafeToWait(MutexRank rank) {
+ bool bad_mutexes_held = false;
+ for (int i = kMaxMutexRank; i >= 0; --i) {
+ if (i != rank && held_mutexes_[i] != 0) {
+ LOG(ERROR) << "holding " << static_cast<MutexRank>(i) << " while doing condition variable wait on " << rank;
+ bad_mutexes_held = true;
+ }
+ }
+ if (held_mutexes_[rank] == 0) {
+ LOG(ERROR) << "*not* holding " << rank << " while doing condition variable wait on it";
+ bad_mutexes_held = true;
+ }
+ CHECK(!bad_mutexes_held);
+}
+
} // namespace art
diff --git a/src/thread.h b/src/thread.h
index 02fa87c..7ad433c 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -122,6 +122,7 @@
}
State SetState(State new_state);
+ void SetStateWithoutSuspendCheck(State new_state);
bool IsDaemon();
bool IsSuspended();
@@ -422,7 +423,8 @@
return frame;
}
- void CheckRank(MutexRank rank, bool is_locking);
+ void CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking);
+ void CheckSafeToWait(MutexRank rank);
private:
Thread();
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 3f8789a..e780e2c 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -33,8 +33,13 @@
// Self may be null during shutdown, but in that case there's no point going to kVmWait.
thread_list->thread_list_lock_.Lock();
} else {
- ScopedThreadStateChange tsc(self, Thread::kVmWait);
+ Thread::State old_thread_state = self->SetState(Thread::kVmWait);
thread_list->thread_list_lock_.Lock();
+ // If we have the lock, by definition there's no GC in progress (though we
+ // might be taking the lock in order to start one). We avoid the suspend
+ // check here so we don't risk going to sleep on the thread suspend count lock
+ // while holding the thread list lock.
+ self->SetStateWithoutSuspendCheck(old_thread_state);
}
}
}