Merge "Remove patchoat."
diff --git a/build/Android.bp b/build/Android.bp
index 3eb4aaf..78fd21a 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -27,6 +27,7 @@
     "performance-faster-string-find",
     "performance-for-range-copy",
     "performance-implicit-conversion-in-loop",
+    "performance-noexcept-move-constructor",
     "performance-unnecessary-copy-initialization",
     "performance-unnecessary-value-param",
     "misc-unused-using-decls",
@@ -42,6 +43,7 @@
         + ",performance-faster-string-find"
         + ",performance-for-range-copy"
         + ",performance-implicit-conversion-in-loop"
+        + ",performance-noexcept-move-constructor"
         + ",performance-unnecessary-copy-initialization"
         + ",performance-unnecessary-value-param"
         + ",misc-unused-using-decls"
@@ -55,9 +57,6 @@
     // We have lots of C-style variadic functions, and are OK with them. JNI ensures
     // that working around this warning would be extra-painful.
     "-cert-dcl50-cpp",
-    // No exceptions.
-    "-misc-noexcept-move-constructor",
-    "-performance-noexcept-move-constructor",
     // "Modernization" we don't agree with.
     "-modernize-use-auto",
     "-modernize-return-braced-init-list",
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d5149b3..17d9736 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2677,6 +2677,18 @@
   const Location first = locations->InAt(0);
   const Location out = locations->Out();
   const Location second = locations->InAt(1);
+
+  // In the unlucky case the output of this instruction overlaps
+  // with an input of an "emitted-at-use-site" condition, and
+  // the output of this instruction is not one of its inputs, we'll
+  // need to fallback to branches instead of conditional ARM instructions.
+  bool output_overlaps_with_condition_inputs =
+      !IsBooleanValueOrMaterializedCondition(condition) &&
+      !out.Equals(first) &&
+      !out.Equals(second) &&
+      (condition->GetLocations()->InAt(0).Equals(out) ||
+       condition->GetLocations()->InAt(1).Equals(out));
+  DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition());
   Location src;
 
   if (condition->IsIntConstant()) {
@@ -2690,7 +2702,7 @@
     return;
   }
 
-  if (!DataType::IsFloatingPointType(type)) {
+  if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
     bool invert = false;
 
     if (out.Equals(second)) {
@@ -2762,6 +2774,7 @@
   vixl32::Label* false_target = nullptr;
   vixl32::Label* true_target = nullptr;
   vixl32::Label select_end;
+  vixl32::Label other_case;
   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
 
   if (out.Equals(second)) {
@@ -2772,12 +2785,21 @@
     src = second;
 
     if (!out.Equals(first)) {
-      codegen_->MoveLocation(out, first, type);
+      if (output_overlaps_with_condition_inputs) {
+        false_target = &other_case;
+      } else {
+        codegen_->MoveLocation(out, first, type);
+      }
     }
   }
 
   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
   codegen_->MoveLocation(out, src, type);
+  if (output_overlaps_with_condition_inputs) {
+    __ B(target);
+    __ Bind(&other_case);
+    codegen_->MoveLocation(out, first, type);
+  }
 
   if (select_end.IsReferenced()) {
     __ Bind(&select_end);
@@ -2876,31 +2898,16 @@
 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
-  // Handle the long/FP comparisons made in instruction simplification.
-  switch (cond->InputAt(0)->GetType()) {
-    case DataType::Type::kInt64:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
-      break;
-
-    case DataType::Type::kFloat32:
-    case DataType::Type::kFloat64:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
-      break;
-
-    default:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
+  const DataType::Type type = cond->InputAt(0)->GetType();
+  if (DataType::IsFloatingPointType(type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+  }
+  if (!cond->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index daf86fd..218b447 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -6140,6 +6140,9 @@
 
  private:
   static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits;
+  static constexpr size_t kNumberOfBoundsCheckPackedBits = kFlagIsStringCharAt + 1;
+  static_assert(kNumberOfBoundsCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+                "Too many packed fields.");
 };
 
 class HSuspendCheck final : public HExpression<0> {
diff --git a/libartbase/base/common_art_test.cc b/libartbase/base/common_art_test.cc
index 6dd2381..b65710b 100644
--- a/libartbase/base/common_art_test.cc
+++ b/libartbase/base/common_art_test.cc
@@ -73,11 +73,11 @@
   file_.reset(file);
 }
 
-ScratchFile::ScratchFile(ScratchFile&& other) {
+ScratchFile::ScratchFile(ScratchFile&& other) noexcept {
   *this = std::move(other);
 }
 
-ScratchFile& ScratchFile::operator=(ScratchFile&& other) {
+ScratchFile& ScratchFile::operator=(ScratchFile&& other) noexcept {
   if (GetFile() != other.GetFile()) {
     std::swap(filename_, other.filename_);
     std::swap(file_, other.file_);
diff --git a/libartbase/base/common_art_test.h b/libartbase/base/common_art_test.h
index d645fa1..32a2628 100644
--- a/libartbase/base/common_art_test.h
+++ b/libartbase/base/common_art_test.h
@@ -54,9 +54,9 @@
 
   ScratchFile(const ScratchFile& other, const char* suffix);
 
-  ScratchFile(ScratchFile&& other);
+  ScratchFile(ScratchFile&& other) noexcept;
 
-  ScratchFile& operator=(ScratchFile&& other);
+  ScratchFile& operator=(ScratchFile&& other) noexcept;
 
   explicit ScratchFile(File* file);
 
diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc
index 1bf553d..06a168d 100644
--- a/libartbase/base/mem_map.cc
+++ b/libartbase/base/mem_map.cc
@@ -585,7 +585,7 @@
                 redzone_size);
 }
 
-MemMap::MemMap(MemMap&& other)
+MemMap::MemMap(MemMap&& other) noexcept
     : MemMap() {
   swap(other);
 }
@@ -692,6 +692,24 @@
                           int tail_prot,
                           std::string* error_msg,
                           bool use_debug_name) {
+  return RemapAtEnd(new_end,
+                    tail_name,
+                    tail_prot,
+                    MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS,
+                    /* fd */ -1,
+                    /* offset */ 0,
+                    error_msg,
+                    use_debug_name);
+}
+
+MemMap MemMap::RemapAtEnd(uint8_t* new_end,
+                          const char* tail_name,
+                          int tail_prot,
+                          int flags,
+                          int fd,
+                          off_t offset,
+                          std::string* error_msg,
+                          bool use_debug_name) {
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
@@ -715,9 +733,6 @@
   DCHECK_EQ(tail_base_begin + tail_base_size, old_base_end);
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
-  unique_fd fd;
-  int flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS;
-
   MEMORY_TOOL_MAKE_UNDEFINED(tail_base_begin, tail_base_size);
   // Note: Do not explicitly unmap the tail region, mmap() with MAP_FIXED automatically
   // removes old mappings for the overlapping region. This makes the operation atomic
@@ -726,13 +741,13 @@
                                                           tail_base_size,
                                                           tail_prot,
                                                           flags,
-                                                          fd.get(),
-                                                          0));
+                                                          fd,
+                                                          offset));
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("anonymous mmap(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
+    *error_msg = StringPrintf("map(%p, %zd, 0x%x, 0x%x, %d, 0) failed. See process "
                               "maps in the log.", tail_base_begin, tail_base_size, tail_prot, flags,
-                              fd.get());
+                              fd);
     return Invalid();
   }
   // Update *this.
diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h
index 20eda32..4f92492 100644
--- a/libartbase/base/mem_map.h
+++ b/libartbase/base/mem_map.h
@@ -68,8 +68,8 @@
     return MemMap();
   }
 
-  MemMap(MemMap&& other) REQUIRES(!MemMap::mem_maps_lock_);
-  MemMap& operator=(MemMap&& other) REQUIRES(!MemMap::mem_maps_lock_) {
+  MemMap(MemMap&& other) noexcept REQUIRES(!MemMap::mem_maps_lock_);
+  MemMap& operator=(MemMap&& other) noexcept REQUIRES(!MemMap::mem_maps_lock_) {
     Reset();
     swap(other);
     return *this;
@@ -261,6 +261,16 @@
                     std::string* error_msg,
                     bool use_debug_name = true);
 
+  // Unmap the pages of a file at end and remap them to create another memory map.
+  MemMap RemapAtEnd(uint8_t* new_end,
+                    const char* tail_name,
+                    int tail_prot,
+                    int tail_flags,
+                    int fd,
+                    off_t offset,
+                    std::string* error_msg,
+                    bool use_debug_name = true);
+
   // Take ownership of pages at the beginning of the mapping. The mapping must be an
   // anonymous reservation mapping, owning entire pages. The `byte_count` must not
   // exceed the size of this reservation.
diff --git a/libartbase/base/mem_map_test.cc b/libartbase/base/mem_map_test.cc
index ab3d18f..bf143d4 100644
--- a/libartbase/base/mem_map_test.cc
+++ b/libartbase/base/mem_map_test.cc
@@ -455,6 +455,53 @@
 }
 #endif
 
+TEST_F(MemMapTest, RemapFileViewAtEnd) {
+  CommonInit();
+  std::string error_msg;
+  ScratchFile scratch_file;
+
+  // Create a scratch file 3 pages large.
+  constexpr size_t kMapSize = 3 * kPageSize;
+  std::unique_ptr<uint8_t[]> data(new uint8_t[kMapSize]());
+  memset(data.get(), 1, kPageSize);
+  memset(&data[0], 0x55, kPageSize);
+  memset(&data[kPageSize], 0x5a, kPageSize);
+  memset(&data[2 * kPageSize], 0xaa, kPageSize);
+  ASSERT_TRUE(scratch_file.GetFile()->WriteFully(&data[0], kMapSize));
+
+  MemMap map = MemMap::MapFile(/*byte_count*/kMapSize,
+                               PROT_READ,
+                               MAP_PRIVATE,
+                               scratch_file.GetFd(),
+                               /*start*/0,
+                               /*low_4gb*/true,
+                               scratch_file.GetFilename().c_str(),
+                               &error_msg);
+  ASSERT_TRUE(map.IsValid()) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_EQ(map.Size(), kMapSize);
+  ASSERT_LT(reinterpret_cast<uintptr_t>(map.BaseBegin()), 1ULL << 32);
+  ASSERT_EQ(data[0], *map.Begin());
+  ASSERT_EQ(data[kPageSize], *(map.Begin() + kPageSize));
+  ASSERT_EQ(data[2 * kPageSize], *(map.Begin() + 2 * kPageSize));
+
+  for (size_t offset = 2 * kPageSize; offset > 0; offset -= kPageSize) {
+    MemMap tail = map.RemapAtEnd(map.Begin() + offset,
+                                 "bad_offset_map",
+                                 PROT_READ,
+                                 MAP_PRIVATE | MAP_FIXED,
+                                 scratch_file.GetFd(),
+                                 offset,
+                                 &error_msg);
+    ASSERT_TRUE(tail.IsValid()) << error_msg;
+    ASSERT_TRUE(error_msg.empty());
+    ASSERT_EQ(offset, map.Size());
+    ASSERT_EQ(static_cast<size_t>(kPageSize), tail.Size());
+    ASSERT_EQ(tail.Begin(), map.Begin() + map.Size());
+    ASSERT_EQ(data[offset], *tail.Begin());
+  }
+}
+
 TEST_F(MemMapTest, MapAnonymousExactAddr32bitHighAddr) {
   // Some MIPS32 hardware (namely the Creator Ci20 development board)
   // cannot allocate in the 2GB-4GB region.
diff --git a/libartbase/base/scoped_arena_allocator.cc b/libartbase/base/scoped_arena_allocator.cc
index ab05c60..a54f350 100644
--- a/libartbase/base/scoped_arena_allocator.cc
+++ b/libartbase/base/scoped_arena_allocator.cc
@@ -106,7 +106,7 @@
   return ptr;
 }
 
-ScopedArenaAllocator::ScopedArenaAllocator(ScopedArenaAllocator&& other)
+ScopedArenaAllocator::ScopedArenaAllocator(ScopedArenaAllocator&& other) noexcept
     : DebugStackReference(std::move(other)),
       DebugStackRefCounter(),
       ArenaAllocatorStats(other),
diff --git a/libartbase/base/scoped_arena_allocator.h b/libartbase/base/scoped_arena_allocator.h
index 7eaec5e..52d0361 100644
--- a/libartbase/base/scoped_arena_allocator.h
+++ b/libartbase/base/scoped_arena_allocator.h
@@ -138,7 +138,7 @@
 class ScopedArenaAllocator
     : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats {
  public:
-  ScopedArenaAllocator(ScopedArenaAllocator&& other);
+  ScopedArenaAllocator(ScopedArenaAllocator&& other) noexcept;
   explicit ScopedArenaAllocator(ArenaStack* arena_stack);
   ~ScopedArenaAllocator();
 
diff --git a/libartbase/base/unix_file/fd_file.cc b/libartbase/base/unix_file/fd_file.cc
index d715670..de60277 100644
--- a/libartbase/base/unix_file/fd_file.cc
+++ b/libartbase/base/unix_file/fd_file.cc
@@ -91,7 +91,7 @@
   }
 }
 
-FdFile::FdFile(FdFile&& other)
+FdFile::FdFile(FdFile&& other) noexcept
     : guard_state_(other.guard_state_),
       fd_(other.fd_),
       file_path_(std::move(other.file_path_)),
@@ -105,7 +105,7 @@
   other.fd_ = -1;
 }
 
-FdFile& FdFile::operator=(FdFile&& other) {
+FdFile& FdFile::operator=(FdFile&& other) noexcept {
   if (this == &other) {
     return *this;
   }
diff --git a/libartbase/base/unix_file/fd_file.h b/libartbase/base/unix_file/fd_file.h
index e362ed1..54a16a2 100644
--- a/libartbase/base/unix_file/fd_file.h
+++ b/libartbase/base/unix_file/fd_file.h
@@ -46,10 +46,10 @@
   FdFile(const std::string& path, int flags, mode_t mode, bool checkUsage);
 
   // Move constructor.
-  FdFile(FdFile&& other);
+  FdFile(FdFile&& other) noexcept;
 
   // Move assignment operator.
-  FdFile& operator=(FdFile&& other);
+  FdFile& operator=(FdFile&& other) noexcept;
 
   // Release the file descriptor. This will make further accesses to this FdFile invalid. Disables
   // all further state checking.
diff --git a/openjdkjvmti/ti_class.cc b/openjdkjvmti/ti_class.cc
index 1ed615b..f6113df 100644
--- a/openjdkjvmti/ti_class.cc
+++ b/openjdkjvmti/ti_class.cc
@@ -145,7 +145,7 @@
   FakeJvmtiDeleter() {}
 
   FakeJvmtiDeleter(FakeJvmtiDeleter&) = default;
-  FakeJvmtiDeleter(FakeJvmtiDeleter&&) = default;
+  FakeJvmtiDeleter(FakeJvmtiDeleter&&) noexcept = default;
   FakeJvmtiDeleter& operator=(const FakeJvmtiDeleter&) = default;
 
   template <typename U> void operator()(const U* ptr) const {
diff --git a/openjdkjvmti/ti_stack.cc b/openjdkjvmti/ti_stack.cc
index e2b98b3..220ad22 100644
--- a/openjdkjvmti/ti_stack.cc
+++ b/openjdkjvmti/ti_stack.cc
@@ -77,7 +77,7 @@
         start(start_),
         stop(stop_) {}
   GetStackTraceVisitor(const GetStackTraceVisitor&) = default;
-  GetStackTraceVisitor(GetStackTraceVisitor&&) = default;
+  GetStackTraceVisitor(GetStackTraceVisitor&&) noexcept = default;
 
   bool VisitFrame() override REQUIRES_SHARED(art::Locks::mutator_lock_) {
     art::ArtMethod* m = GetMethod();
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 41ef6c2..b54c77d 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -818,37 +818,6 @@
   return ERR(NONE);
 }
 
-class ScopedSuspendByPeer {
- public:
-  explicit ScopedSuspendByPeer(jthread jtarget)
-      : thread_list_(art::Runtime::Current()->GetThreadList()),
-        timeout_(false),
-        target_(thread_list_->SuspendThreadByPeer(jtarget,
-                                                  /* suspend_thread */ true,
-                                                  art::SuspendReason::kInternal,
-                                                  &timeout_)) { }
-  ~ScopedSuspendByPeer() {
-    if (target_ != nullptr) {
-      if (!thread_list_->Resume(target_, art::SuspendReason::kInternal)) {
-        LOG(ERROR) << "Failed to resume " << target_ << "!";
-      }
-    }
-  }
-
-  art::Thread* GetTargetThread() const {
-    return target_;
-  }
-
-  bool TimedOut() const {
-    return timeout_;
-  }
-
- private:
-  art::ThreadList* thread_list_;
-  bool timeout_;
-  art::Thread* target_;
-};
-
 jvmtiError ThreadUtil::SuspendOther(art::Thread* self,
                                     jthread target_jthread) {
   // Loop since we need to bail out and try again if we would end up getting suspended while holding
@@ -876,27 +845,29 @@
       if (!GetAliveNativeThread(target_jthread, soa, &target, &err)) {
         return err;
       }
+      art::ThreadState state = target->GetState();
+      if (state == art::ThreadState::kStarting || target->IsStillStarting()) {
+        return ERR(THREAD_NOT_ALIVE);
+      } else {
+        art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
+        if (target->GetUserCodeSuspendCount() != 0) {
+          return ERR(THREAD_SUSPENDED);
+        }
+      }
     }
-    // Get the actual thread in a suspended state so we can change the user-code suspend count.
-    ScopedSuspendByPeer ssbp(target_jthread);
-    if (ssbp.GetTargetThread() == nullptr && !ssbp.TimedOut()) {
+    bool timeout = true;
+    art::Thread* ret_target = art::Runtime::Current()->GetThreadList()->SuspendThreadByPeer(
+        target_jthread,
+        /* request_suspension */ true,
+        art::SuspendReason::kForUserCode,
+        &timeout);
+    if (ret_target == nullptr && !timeout) {
       // TODO It would be good to get more information about why exactly the thread failed to
       // suspend.
       return ERR(INTERNAL);
-    } else if (!ssbp.TimedOut()) {
-      art::ThreadState state = ssbp.GetTargetThread()->GetState();
-      if (state == art::ThreadState::kStarting || ssbp.GetTargetThread()->IsStillStarting()) {
-        return ERR(THREAD_NOT_ALIVE);
-      }
-      // we didn't time out and got a result. Suspend the thread by usercode and return. It's
-      // already suspended internal so we don't need to do anything but increment the count.
-      art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
-      if (ssbp.GetTargetThread()->GetUserCodeSuspendCount() != 0) {
-        return ERR(THREAD_SUSPENDED);
-      }
-      bool res = ssbp.GetTargetThread()->ModifySuspendCount(
-          self, +1, nullptr, art::SuspendReason::kForUserCode);
-      return res ? OK : ERR(INTERNAL);
+    } else if (!timeout) {
+      // we didn't time out and got a result.
+      return OK;
     }
     // We timed out. Just go around and try again.
   } while (true);
@@ -905,17 +876,6 @@
 
 jvmtiError ThreadUtil::SuspendSelf(art::Thread* self) {
   CHECK(self == art::Thread::Current());
-  if (!self->CanBeSuspendedByUserCode()) {
-    // TODO This is really undesirable. As far as I can tell this is can only come about because of
-    // class-loads in the jit-threads (through either VMObjectAlloc or the ClassLoad/ClassPrepare
-    // events that we send). It's unlikely that anyone would be suspending themselves there since
-    // it's almost guaranteed to cause a deadlock but it is technically allowed. Ideally we'd want
-    // to put a CHECK here (or in the event-dispatch code) that we are only in this situation when
-    // sending the GC callbacks but the jit causing events means we cannot do this.
-    LOG(WARNING) << "Attempt to self-suspend on a thread without suspension enabled. Thread is "
-                 << *self;
-    return ERR(INTERNAL);
-  }
   {
     art::MutexLock mu(self, *art::Locks::user_code_suspension_lock_);
     art::MutexLock thread_list_mu(self, *art::Locks::thread_suspend_count_lock_);
@@ -963,6 +923,7 @@
     return ERR(NULL_POINTER);
   }
   art::Thread* self = art::Thread::Current();
+  art::Thread* target;
   // Retry until we know we won't get suspended by user code while resuming something.
   do {
     SuspendCheck(self);
@@ -973,37 +934,36 @@
       continue;
     }
     // From now on we know we cannot get suspended by user-code.
-    // NB This does a SuspendCheck (during thread state change) so we need to make sure we don't
-    // have the 'suspend_lock' locked here.
-    art::ScopedObjectAccess soa(self);
-    if (thread == nullptr) {
-      // The thread is the current thread.
-      return ERR(THREAD_NOT_SUSPENDED);
-    } else if (!soa.Env()->IsInstanceOf(thread, art::WellKnownClasses::java_lang_Thread)) {
-      // Not a thread object.
-      return ERR(INVALID_THREAD);
-    } else if (self->GetPeer() == soa.Decode<art::mirror::Object>(thread)) {
-      // The thread is the current thread.
-      return ERR(THREAD_NOT_SUSPENDED);
+    {
+      // NB This does a SuspendCheck (during thread state change) so we need to make sure we don't
+      // have the 'suspend_lock' locked here.
+      art::ScopedObjectAccess soa(self);
+      art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+      jvmtiError err = ERR(INTERNAL);
+      if (!GetAliveNativeThread(thread, soa, &target, &err)) {
+        return err;
+      } else if (target == self) {
+        // We would have paused until we aren't suspended anymore due to the ScopedObjectAccess so
+        // we can just return THREAD_NOT_SUSPENDED. Unfortunately we cannot do any real DCHECKs
+        // about current state since it's all concurrent.
+        return ERR(THREAD_NOT_SUSPENDED);
+      }
+      // The JVMTI spec requires us to return THREAD_NOT_SUSPENDED if it is alive but we really
+      // cannot tell why resume failed.
+      {
+        art::MutexLock thread_suspend_count_mu(self, *art::Locks::thread_suspend_count_lock_);
+        if (target->GetUserCodeSuspendCount() == 0) {
+          return ERR(THREAD_NOT_SUSPENDED);
+        }
+      }
     }
-    ScopedSuspendByPeer ssbp(thread);
-    if (ssbp.TimedOut()) {
-      // Unknown error. Couldn't suspend thread!
-      return ERR(INTERNAL);
-    } else if (ssbp.GetTargetThread() == nullptr) {
-      // Thread must not be alive.
-      return ERR(THREAD_NOT_ALIVE);
-    }
-    // We didn't time out and got a result. Check the thread is suspended by usercode, unsuspend it
-    // and return. It's already suspended internal so we don't need to do anything but decrement the
-    // count.
-    art::MutexLock thread_list_mu(self, *art::Locks::thread_suspend_count_lock_);
-    if (ssbp.GetTargetThread()->GetUserCodeSuspendCount() == 0) {
-      return ERR(THREAD_NOT_SUSPENDED);
-    } else if (!ssbp.GetTargetThread()->ModifySuspendCount(
-        self, -1, nullptr, art::SuspendReason::kForUserCode)) {
+    // It is okay that we don't have a thread_list_lock here since we know that the thread cannot
+    // die since it is currently held suspended by a SuspendReason::kForUserCode suspend.
+    DCHECK(target != self);
+    if (!art::Runtime::Current()->GetThreadList()->Resume(target,
+                                                          art::SuspendReason::kForUserCode)) {
       // TODO Give a better error.
-      // This should not really be possible and is probably some race.
+      // This is most likely THREAD_NOT_SUSPENDED but we cannot really be sure.
       return ERR(INTERNAL);
     } else {
       return OK;
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 00c9360..a9c743a 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -27,6 +27,9 @@
 #define ADD_TEST_EQ(x, y)
 #endif
 
+// Rounds the value n up to the nearest multiple of sz. sz must be a multiple of two.
+#define ALIGN_UP(n, sz) (((n) + (sz - 1)) & ~((sz) - 1))
+
 #if defined(__LP64__)
 #define POINTER_SIZE_SHIFT 3
 #define POINTER_SIZE art::PointerSize::k64
@@ -96,8 +99,10 @@
 #define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
             art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value())
-// Offset of field Thread::interpreter_cache_.
-#define THREAD_INTERPRETER_CACHE_OFFSET (144 + 312 * __SIZEOF_POINTER__)
+// Offset of field Thread::interpreter_cache_. This is aligned on a 16 byte boundary so we need to
+// round up depending on the size of tlsPtr_.
+#define THREAD_INTERPRETER_CACHE_OFFSET \
+  (ALIGN_UP((THREAD_CARD_TABLE_OFFSET + 301 * __SIZEOF_POINTER__), 16))
 ADD_TEST_EQ(THREAD_INTERPRETER_CACHE_OFFSET,
             art::Thread::InterpreterCacheOffset<POINTER_SIZE>().Int32Value())
 
@@ -227,4 +232,6 @@
 #undef DEFINED_ADD_TEST_EQ
 #endif
 
+#undef ALIGN_UP
+
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d95f71a..c18f46b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -2501,7 +2501,7 @@
       // the Java-side could still succeed for racy programs if another thread is actively
       // modifying the class loader's path list.
 
-      if (!self->CanCallIntoJava()) {
+      if (self->IsRuntimeThread()) {
         // Oops, we can't call into java so we can't run actual class-loader code.
         // This is true for e.g. for the compiler (jit or aot).
         ObjPtr<mirror::Throwable> pre_allocated =
@@ -2634,6 +2634,17 @@
     }
   }
 
+  // This is to prevent the calls to ClassLoad and ClassPrepare which can cause java/user-supplied
+  // code to be executed. We put it up here so we can avoid all the allocations associated with
+  // creating the class. This can happen with (eg) jit threads.
+  if (!self->CanLoadClasses()) {
+    // Make sure we don't try to load anything, potentially causing an infinite loop.
+    ObjPtr<mirror::Throwable> pre_allocated =
+        Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+    self->SetException(pre_allocated);
+    return nullptr;
+  }
+
   if (klass == nullptr) {
     // Allocate a class with the status of not ready.
     // Interface object should get the right size here. Regular class will
@@ -3622,6 +3633,18 @@
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
   StackHandleScope<2> hs(self);
+
+  // This is to prevent the calls to ClassLoad and ClassPrepare which can cause java/user-supplied
+  // code to be executed. We put it up here so we can avoid all the allocations associated with
+  // creating the class. This can happen with (eg) jit threads.
+  if (!self->CanLoadClasses()) {
+    // Make sure we don't try to load anything, potentially causing an infinite loop.
+    ObjPtr<mirror::Throwable> pre_allocated =
+        Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+    self->SetException(pre_allocated);
+    return nullptr;
+  }
+
   MutableHandle<mirror::Class> component_type(hs.NewHandle(FindClass(self, descriptor + 1,
                                                                      class_loader)));
   if (component_type == nullptr) {
@@ -3809,6 +3832,7 @@
 ObjPtr<mirror::Class> ClassLinker::InsertClass(const char* descriptor,
                                                ObjPtr<mirror::Class> klass,
                                                size_t hash) {
+  DCHECK(Thread::Current()->CanLoadClasses());
   if (VLOG_IS_ON(class_linker)) {
     ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
     std::string source;
@@ -4333,6 +4357,18 @@
                                                     jobjectArray methods,
                                                     jobjectArray throws) {
   Thread* self = soa.Self();
+
+  // This is to prevent the calls to ClassLoad and ClassPrepare which can cause java/user-supplied
+  // code to be executed. We put it up here so we can avoid all the allocations associated with
+  // creating the class. This can happen with (eg) jit-threads.
+  if (!self->CanLoadClasses()) {
+    // Make sure we don't try to load anything, potentially causing an infinite loop.
+    ObjPtr<mirror::Throwable> pre_allocated =
+        Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+    self->SetException(pre_allocated);
+    return nullptr;
+  }
+
   StackHandleScope<10> hs(self);
   MutableHandle<mirror::Class> temp_klass(hs.NewHandle(
       AllocClass(self, GetClassRoot<mirror::Class>(this), sizeof(mirror::Class))));
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 657a78b..7199d5e 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -771,13 +771,19 @@
 
   // Avoid running Java code for exception initialization.
   // TODO: Checks to make this a bit less brittle.
+  //
+  // Note: this lambda ensures that the destruction of the ScopedLocalRefs will run in the extended
+  //       stack, which is important for modes with larger stack sizes (e.g., ASAN). Using a lambda
+  //       instead of a block simplifies the control flow.
+  auto create_and_throw = [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
+    // Allocate an uninitialized object.
+    ScopedLocalRef<jobject> exc(env,
+                                env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
+    if (exc == nullptr) {
+      LOG(WARNING) << "Could not allocate StackOverflowError object.";
+      return;
+    }
 
-  std::string error_msg;
-
-  // Allocate an uninitialized object.
-  ScopedLocalRef<jobject> exc(env,
-                              env->AllocObject(WellKnownClasses::java_lang_StackOverflowError));
-  if (exc.get() != nullptr) {
     // "Initialize".
     // StackOverflowError -> VirtualMachineError -> Error -> Throwable -> Object.
     // Only Throwable has "custom" fields:
@@ -793,57 +799,54 @@
     // detailMessage.
     // TODO: Use String::FromModifiedUTF...?
     ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg.c_str()));
-    if (s.get() != nullptr) {
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
-
-      // cause.
-      env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
-
-      // suppressedExceptions.
-      ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
-          WellKnownClasses::java_util_Collections,
-          WellKnownClasses::java_util_Collections_EMPTY_LIST));
-      CHECK(emptylist.get() != nullptr);
-      env->SetObjectField(exc.get(),
-                          WellKnownClasses::java_lang_Throwable_suppressedExceptions,
-                          emptylist.get());
-
-      // stackState is set as result of fillInStackTrace. fillInStackTrace calls
-      // nativeFillInStackTrace.
-      ScopedLocalRef<jobject> stack_state_val(env, nullptr);
-      {
-        ScopedObjectAccessUnchecked soa(env);
-        stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
-      }
-      if (stack_state_val.get() != nullptr) {
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackState,
-                            stack_state_val.get());
-
-        // stackTrace.
-        ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
-            WellKnownClasses::libcore_util_EmptyArray,
-            WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
-        env->SetObjectField(exc.get(),
-                            WellKnownClasses::java_lang_Throwable_stackTrace,
-                            stack_trace_elem.get());
-      } else {
-        error_msg = "Could not create stack trace.";
-      }
-      // Throw the exception.
-      self->SetException(self->DecodeJObject(exc.get())->AsThrowable());
-    } else {
-      // Could not allocate a string object.
-      error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
+    if (s == nullptr) {
+      LOG(WARNING) << "Could not throw new StackOverflowError because JNI NewStringUTF failed.";
+      return;
     }
-  } else {
-    error_msg = "Could not allocate StackOverflowError object.";
-  }
 
-  if (!error_msg.empty()) {
-    LOG(WARNING) << error_msg;
-    CHECK(self->IsExceptionPending());
-  }
+    env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_detailMessage, s.get());
+
+    // cause.
+    env->SetObjectField(exc.get(), WellKnownClasses::java_lang_Throwable_cause, exc.get());
+
+    // suppressedExceptions.
+    ScopedLocalRef<jobject> emptylist(env, env->GetStaticObjectField(
+        WellKnownClasses::java_util_Collections,
+        WellKnownClasses::java_util_Collections_EMPTY_LIST));
+    CHECK(emptylist != nullptr);
+    env->SetObjectField(exc.get(),
+                        WellKnownClasses::java_lang_Throwable_suppressedExceptions,
+                        emptylist.get());
+
+    // stackState is set as result of fillInStackTrace. fillInStackTrace calls
+    // nativeFillInStackTrace.
+    ScopedLocalRef<jobject> stack_state_val(env, nullptr);
+    {
+      ScopedObjectAccessUnchecked soa(env);  // TODO: Is this necessary?
+      stack_state_val.reset(soa.Self()->CreateInternalStackTrace<false>(soa));
+    }
+    if (stack_state_val != nullptr) {
+      env->SetObjectField(exc.get(),
+                          WellKnownClasses::java_lang_Throwable_stackState,
+                          stack_state_val.get());
+
+      // stackTrace.
+      ScopedLocalRef<jobject> stack_trace_elem(env, env->GetStaticObjectField(
+          WellKnownClasses::libcore_util_EmptyArray,
+          WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT));
+      env->SetObjectField(exc.get(),
+                          WellKnownClasses::java_lang_Throwable_stackTrace,
+                          stack_trace_elem.get());
+    } else {
+      LOG(WARNING) << "Could not create stack trace.";
+      // Note: we'll create an exception without stack state, which is valid.
+    }
+
+    // Throw the exception.
+    self->SetException(self->DecodeJObject(exc.get())->AsThrowable());
+  };
+  create_and_throw();
+  CHECK(self->IsExceptionPending());
 
   bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
   self->ResetDefaultStackEnd();  // Return to default stack size.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index cb85804..50c65ea 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -140,8 +140,11 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_mark_stack, async_exception, sizeof(void*));
-    EXPECT_OFFSET_DIFF(Thread, tlsPtr_.async_exception, Thread, wait_mutex_, sizeof(void*),
-                       thread_tlsptr_end);
+    // The first field after tlsPtr_ is forced to a 16 byte alignment so it might have some space.
+    auto offset_tlsptr_end = OFFSETOF_MEMBER(Thread, tlsPtr_) +
+        sizeof(decltype(reinterpret_cast<Thread*>(16)->tlsPtr_));
+    CHECKED(offset_tlsptr_end - OFFSETOF_MEMBER(Thread, tlsPtr_.async_exception) == sizeof(void*),
+            "async_exception last field");
   }
 
   void CheckJniEntryPoints() {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 8af5d55..e48365b 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -389,7 +389,7 @@
                                                   /*inout*/MemMap* oat_reservation,
                                                   /*out*/std::string* error_msg)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
+    TimingLogger logger(__PRETTY_FUNCTION__, /* precise= */ true, VLOG_IS_ON(image));
     std::unique_ptr<ImageSpace> space = Init(image_filename,
                                              image_location,
                                              validate_oat_file,
@@ -1323,7 +1323,7 @@
                       /*out*/std::vector<std::unique_ptr<space::ImageSpace>>* boot_image_spaces,
                       /*out*/MemMap* extra_reservation,
                       /*out*/std::string* error_msg) REQUIRES_SHARED(Locks::mutator_lock_) {
-    TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
+    TimingLogger logger(__PRETTY_FUNCTION__, /* precise= */ true, VLOG_IS_ON(image));
     std::string filename = GetSystemImageFilename(image_location_.c_str(), image_isa_);
     std::vector<std::string> locations;
     if (!GetBootClassPathImageLocations(image_location_, filename, &locations, error_msg)) {
@@ -1393,7 +1393,7 @@
       /*out*/std::vector<std::unique_ptr<space::ImageSpace>>* boot_image_spaces,
       /*out*/MemMap* extra_reservation,
       /*out*/std::string* error_msg) REQUIRES_SHARED(Locks::mutator_lock_) {
-    TimingLogger logger(__PRETTY_FUNCTION__, true, VLOG_IS_ON(image));
+    TimingLogger logger(__PRETTY_FUNCTION__, /* precise= */ true, VLOG_IS_ON(image));
     DCHECK(DalvikCacheExists());
     std::vector<std::string> locations;
     if (!GetBootClassPathImageLocations(image_location_, cache_filename_, &locations, error_msg)) {
@@ -1597,11 +1597,25 @@
         : diff_(diff) {}
 
     void VisitClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+      // A mirror::Class object consists of
+      //  - instance fields inherited from j.l.Object,
+      //  - instance fields inherited from j.l.Class,
+      //  - embedded tables (vtable, interface method table),
+      //  - static fields of the class itself.
+      // The reference fields are at the start of each field section (this is how the
+      // ClassLinker orders fields; except when that would create a gap between superclass
+      // fields and the first reference of the subclass due to alignment, it can be filled
+      // with smaller fields - but that's not the case for j.l.Object and j.l.Class).
+
+      DCHECK_ALIGNED(klass, kObjectAlignment);
+      static_assert(IsAligned<kHeapReferenceSize>(kObjectAlignment), "Object alignment check.");
       // First, patch the `klass->klass_`, known to be a reference to the j.l.Class.class.
       // This should be the only reference field in j.l.Object and we assert that below.
       PatchReferenceField</* kMayBeNull */ false>(klass, mirror::Object::ClassOffset());
       // Then patch the reference instance fields described by j.l.Class.class.
-      // Use the sizeof(Object) to determine where these reference fields start.
+      // Use the sizeof(Object) to determine where these reference fields start;
+      // this is the same as `class_class->GetFirstReferenceInstanceFieldOffset()`
+      // after patching but the j.l.Class may not have been patched yet.
       mirror::Class* class_class = klass->GetClass<kVerifyNone, kWithoutReadBarrier>();
       size_t num_reference_instance_fields = class_class->NumReferenceInstanceFields<kVerifyNone>();
       DCHECK_NE(num_reference_instance_fields, 0u);
@@ -1609,8 +1623,10 @@
       MemberOffset instance_field_offset(sizeof(mirror::Object));
       for (size_t i = 0; i != num_reference_instance_fields; ++i) {
         PatchReferenceField(klass, instance_field_offset);
-        instance_field_offset = MemberOffset(
-            instance_field_offset.Uint32Value() + sizeof(mirror::HeapReference<mirror::Object>));
+        static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
+                      "Heap reference sizes equality check.");
+        instance_field_offset =
+            MemberOffset(instance_field_offset.Uint32Value() + kHeapReferenceSize);
       }
       // Now that we have patched the `super_class_`, if this is the j.l.Class.class,
       // we can get a reference to j.l.Object.class and assert that it has only one
@@ -1626,8 +1642,10 @@
             klass->GetFirstReferenceStaticFieldOffset<kVerifyNone>(kPointerSize);
         for (size_t i = 0; i != num_reference_static_fields; ++i) {
           PatchReferenceField(klass, static_field_offset);
-          static_field_offset = MemberOffset(
-              static_field_offset.Uint32Value() + sizeof(mirror::HeapReference<mirror::Object>));
+          static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
+                        "Heap reference sizes equality check.");
+          static_field_offset =
+              MemberOffset(static_field_offset.Uint32Value() + kHeapReferenceSize);
         }
       }
       // Then patch native pointers.
@@ -1774,7 +1792,7 @@
     PatchObjectVisitor<kPointerSize> patch_object_visitor(diff);
 
     mirror::Class* dcheck_class_class = nullptr;  // Used only for a DCHECK().
-    for (size_t s = 0, size = spaces.size(); s != size; ++s) {
+    for (size_t s = 0u, size = spaces.size(); s != size; ++s) {
       const ImageSpace* space = spaces[s].get();
 
       // First patch the image header. The `diff` is OK for patching 32-bit fields but
@@ -1876,7 +1894,7 @@
       constructor_class = GetClassRoot<mirror::Constructor, kWithoutReadBarrier>(class_roots);
     }
 
-    for (size_t s = 0, size = spaces.size(); s != size; ++s) {
+    for (size_t s = 0u, size = spaces.size(); s != size; ++s) {
       const ImageSpace* space = spaces[s].get();
       const ImageHeader& image_header = space->GetImageHeader();
 
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 608b48e..171af6f 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -31,7 +31,8 @@
 static constexpr uint kEvacuateLivePercentThreshold = 75U;
 
 // Whether we protect the unused and cleared regions.
-static constexpr bool kProtectClearedRegions = true;
+// Only protect for target builds to prevent flaky test failures (b/63131961).
+static constexpr bool kProtectClearedRegions = kIsTargetBuild;
 
 // Wether we poison memory areas occupied by dead objects in unevacuated regions.
 static constexpr bool kPoisonDeadObjectsInUnevacuatedRegions = true;
diff --git a/runtime/image-inl.h b/runtime/image-inl.h
index 9fde669..2082064 100644
--- a/runtime/image-inl.h
+++ b/runtime/image-inl.h
@@ -51,9 +51,9 @@
 
 inline void ImageHeader::VisitPackedArtFields(ArtFieldVisitor* visitor, uint8_t* base) const {
   const ImageSection& fields = GetFieldsSection();
-  for (size_t pos = 0; pos < fields.Size(); ) {
+  for (size_t pos = 0u; pos < fields.Size(); ) {
     auto* array = reinterpret_cast<LengthPrefixedArray<ArtField>*>(base + fields.Offset() + pos);
-    for (size_t i = 0; i < array->size(); ++i) {
+    for (size_t i = 0u; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, sizeof(ArtField)));
     }
     pos += array->ComputeSize(array->size());
@@ -66,15 +66,15 @@
   const size_t method_alignment = ArtMethod::Alignment(pointer_size);
   const size_t method_size = ArtMethod::Size(pointer_size);
   const ImageSection& methods = GetMethodsSection();
-  for (size_t pos = 0; pos < methods.Size(); ) {
+  for (size_t pos = 0u; pos < methods.Size(); ) {
     auto* array = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(base + methods.Offset() + pos);
-    for (size_t i = 0; i < array->size(); ++i) {
+    for (size_t i = 0u; i < array->size(); ++i) {
       visitor->Visit(&array->At(i, method_size, method_alignment));
     }
     pos += array->ComputeSize(array->size(), method_size, method_alignment);
   }
   const ImageSection& runtime_methods = GetRuntimeMethodsSection();
-  for (size_t pos = 0; pos < runtime_methods.Size(); ) {
+  for (size_t pos = 0u; pos < runtime_methods.Size(); ) {
     auto* method = reinterpret_cast<ArtMethod*>(base + runtime_methods.Offset() + pos);
     visitor->Visit(method);
     pos += method_size;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 33d228f..63cb6a4 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -18,12 +18,15 @@
 
 #include <sstream>
 
+#include "android-base/unique_fd.h"
+
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/histogram-inl.h"
 #include "base/logging.h"  // For VLOG.
 #include "base/membarrier.h"
+#include "base/memfd.h"
 #include "base/mem_map.h"
 #include "base/quasi_atomic.h"
 #include "base/stl_util.h"
@@ -52,16 +55,32 @@
 #include "thread-current-inl.h"
 #include "thread_list.h"
 
+using android::base::unique_fd;
+
 namespace art {
 namespace jit {
 
-static constexpr int kProtCode = PROT_READ | PROT_EXEC;
-static constexpr int kProtData = PROT_READ | PROT_WRITE;
-static constexpr int kProtProfile = PROT_READ;
-
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
 
+static constexpr int kProtR = PROT_READ;
+static constexpr int kProtRW = PROT_READ | PROT_WRITE;
+static constexpr int kProtRWX = PROT_READ | PROT_WRITE | PROT_EXEC;
+static constexpr int kProtRX = PROT_READ | PROT_EXEC;
+
+namespace {
+
+// Translate an address belonging to one memory map into an address in a second. This is useful
+// when there are two virtual memory ranges for the same physical memory range.
+template <typename T>
+T* TranslateAddress(T* src_ptr, const MemMap& src, const MemMap& dst) {
+  CHECK(src.HasAddress(src_ptr));
+  uint8_t* const raw_src_ptr = reinterpret_cast<uint8_t*>(src_ptr);
+  return reinterpret_cast<T*>(raw_src_ptr - src.Begin() + dst.Begin());
+}
+
+}  // namespace
+
 class JitCodeCache::JniStubKey {
  public:
   explicit JniStubKey(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_)
@@ -190,17 +209,41 @@
 
   // Register for membarrier expedited sync core if JIT will be generating code.
   if (!used_only_for_profile_data) {
-    art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore);
+    if (art::membarrier(art::MembarrierCommand::kRegisterPrivateExpeditedSyncCore) != 0) {
+      // MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ensures that CPU instruction pipelines are
+      // flushed and it's used when adding code to the JIT. The memory used by the new code may
+      // have just been released and, in theory, the old code could still be in a pipeline.
+      VLOG(jit) << "Kernel does not support membarrier sync-core";
+    }
   }
 
-  // Decide how we should map the code and data sections.
-  // If we use the code cache just for profiling we do not need to map the code section as
-  // executable.
-  // NOTE 1: this is yet another workaround to bypass strict SElinux policies in order to be able
-  //         to profile system server.
-  // NOTE 2: We could just not create the code section at all but we will need to
-  //         special case too many cases.
-  int memmap_flags_prot_code = used_only_for_profile_data ? kProtProfile : kProtCode;
+  // File descriptor enabling dual-view mapping of code section.
+  unique_fd mem_fd;
+
+  // Bionic supports memfd_create, but the call may fail on older kernels.
+  mem_fd = unique_fd(art::memfd_create("/jit-cache", /* flags */ 0));
+  if (mem_fd.get() < 0) {
+    VLOG(jit) << "Failed to initialize dual view JIT. memfd_create() error: "
+              << strerror(errno);
+  }
+
+  if (mem_fd.get() >= 0 && ftruncate(mem_fd, max_capacity) != 0) {
+    std::ostringstream oss;
+    oss << "Failed to initialize memory file: " << strerror(errno);
+    *error_msg = oss.str();
+    return nullptr;
+  }
+
+  // Data cache will be half of the initial allocation.
+  // Code cache will be the other half of the initial allocation.
+  // TODO: Make this variable?
+
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+  const size_t data_capacity = max_capacity / 2;
+  const size_t exec_capacity = used_only_for_profile_data ? 0 : max_capacity - data_capacity;
+  DCHECK_LE(data_capacity + exec_capacity, max_capacity);
 
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
@@ -208,71 +251,147 @@
   // We could do PC-relative addressing to avoid this problem, but that
   // would require reserving code and data area before submitting, which
   // means more windows for the code memory to be RWX.
-  MemMap data_map = MemMap::MapAnonymous(
-      "data-code-cache",
-      /* addr */ nullptr,
-      max_capacity,
-      kProtData,
-      /* low_4gb */ true,
-      /* reuse */ false,
-      /* reservation */ nullptr,
-      &error_str);
-  if (!data_map.IsValid()) {
+  int base_flags;
+  MemMap data_pages;
+  if (mem_fd.get() >= 0) {
+    // Dual view of JIT code cache case. Create an initial mapping of data pages large enough
+    // for data and non-writable view of JIT code pages. We use the memory file descriptor to
+    // enable dual mapping - we'll create a second mapping using the descriptor below. The
+    // mappings will look like:
+    //
+    //       VA                  PA
+    //
+    //       +---------------+
+    //       | non exec code |\
+    //       +---------------+ \
+    //       :               :\ \
+    //       +---------------+.\.+---------------+
+    //       |  exec code    |  \|     code      |
+    //       +---------------+...+---------------+
+    //       |      data     |   |     data      |
+    //       +---------------+...+---------------+
+    //
+    // In this configuration code updates are written to the non-executable view of the code
+    // cache, and the executable view of the code cache has fixed RX memory protections.
+    //
+    // This memory needs to be mapped shared as the code portions will have two mappings.
+    base_flags = MAP_SHARED;
+    data_pages = MemMap::MapFile(
+        data_capacity + exec_capacity,
+        kProtRW,
+        base_flags,
+        mem_fd,
+        /* start */ 0,
+        /* low_4gb */ true,
+        "data-code-cache",
+        &error_str);
+  } else {
+    // Single view of JIT code cache case. Create an initial mapping of data pages large enough
+    // for data and JIT code pages. The mappings will look like:
+    //
+    //       VA                  PA
+    //
+    //       +---------------+...+---------------+
+    //       |  exec code    |   |     code      |
+    //       +---------------+...+---------------+
+    //       |      data     |   |     data      |
+    //       +---------------+...+---------------+
+    //
+    // In this configuration code updates are written to the executable view of the code cache,
+    // and the executable view of the code cache transitions RX to RWX for the update and then
+    // back to RX after the update.
+    base_flags = MAP_PRIVATE | MAP_ANON;
+    data_pages = MemMap::MapAnonymous(
+        "data-code-cache",
+        /* addr */ nullptr,
+        data_capacity + exec_capacity,
+        kProtRW,
+        /* low_4gb */ true,
+        /* reuse */ false,
+        /* reservation */ nullptr,
+        &error_str);
+  }
+
+  if (!data_pages.IsValid()) {
     std::ostringstream oss;
     oss << "Failed to create read write cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
 
-  // Align both capacities to page size, as that's the unit mspaces use.
-  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
-  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+  MemMap exec_pages;
+  MemMap non_exec_pages;
+  if (exec_capacity > 0) {
+    uint8_t* const divider = data_pages.Begin() + data_capacity;
+    // Set initial permission for executable view to catch any SELinux permission problems early
+    // (for processes that cannot map WX pages). Otherwise, this region does not need to be
+    // executable as there is no code in the cache yet.
+    exec_pages = data_pages.RemapAtEnd(divider,
+                                       "jit-code-cache",
+                                       kProtRX,
+                                       base_flags | MAP_FIXED,
+                                       mem_fd.get(),
+                                       (mem_fd.get() >= 0) ? data_capacity : 0,
+                                       &error_str);
+    if (!exec_pages.IsValid()) {
+      std::ostringstream oss;
+      oss << "Failed to create read execute code cache: " << error_str << " size=" << max_capacity;
+      *error_msg = oss.str();
+      return nullptr;
+    }
 
-  // Data cache is 1 / 2 of the map.
-  // TODO: Make this variable?
-  size_t data_size = max_capacity / 2;
-  size_t code_size = max_capacity - data_size;
-  DCHECK_EQ(code_size + data_size, max_capacity);
-  uint8_t* divider = data_map.Begin() + data_size;
-
-  MemMap code_map = data_map.RemapAtEnd(
-      divider, "jit-code-cache", memmap_flags_prot_code | PROT_WRITE, &error_str);
-  if (!code_map.IsValid()) {
-    std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
-    *error_msg = oss.str();
-    return nullptr;
+    if (mem_fd.get() >= 0) {
+      // For dual view, create the secondary view of code memory used for updating code. This view
+      // is never executable.
+      non_exec_pages = MemMap::MapFile(exec_capacity,
+                                       kProtR,
+                                       base_flags,
+                                       mem_fd,
+                                       /* start */ data_capacity,
+                                       /* low_4GB */ false,
+                                       "jit-code-cache-rw",
+                                       &error_str);
+      if (!non_exec_pages.IsValid()) {
+        // Log and continue as single view JIT.
+        VLOG(jit) << "Failed to map non-executable view of JIT code cache";
+      }
+    }
+  } else {
+    // Profiling only. No memory for code required.
+    DCHECK(used_only_for_profile_data);
   }
-  DCHECK_EQ(code_map.Begin(), divider);
-  data_size = initial_capacity / 2;
-  code_size = initial_capacity - data_size;
-  DCHECK_EQ(code_size + data_size, initial_capacity);
+
+  const size_t initial_data_capacity = initial_capacity / 2;
+  const size_t initial_exec_capacity =
+      (exec_capacity == 0) ? 0 : (initial_capacity - initial_data_capacity);
+
   return new JitCodeCache(
-      std::move(code_map),
-      std::move(data_map),
-      code_size,
-      data_size,
+      std::move(data_pages),
+      std::move(exec_pages),
+      std::move(non_exec_pages),
+      initial_data_capacity,
+      initial_exec_capacity,
       max_capacity,
-      garbage_collect_code,
-      memmap_flags_prot_code);
+      garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap&& code_map,
-                           MemMap&& data_map,
-                           size_t initial_code_capacity,
+JitCodeCache::JitCodeCache(MemMap&& data_pages,
+                           MemMap&& exec_pages,
+                           MemMap&& non_exec_pages,
                            size_t initial_data_capacity,
+                           size_t initial_exec_capacity,
                            size_t max_capacity,
-                           bool garbage_collect_code,
-                           int memmap_flags_prot_code)
+                           bool garbage_collect_code)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
-      code_map_(std::move(code_map)),
-      data_map_(std::move(data_map)),
+      data_pages_(std::move(data_pages)),
+      exec_pages_(std::move(exec_pages)),
+      non_exec_pages_(std::move(non_exec_pages)),
       max_capacity_(max_capacity),
-      current_capacity_(initial_code_capacity + initial_data_capacity),
-      code_end_(initial_code_capacity),
+      current_capacity_(initial_exec_capacity + initial_data_capacity),
       data_end_(initial_data_capacity),
+      exec_end_(initial_exec_capacity),
       last_collection_increased_code_cache_(false),
       garbage_collect_code_(garbage_collect_code),
       used_memory_for_data_(0),
@@ -284,40 +403,46 @@
       histogram_code_memory_use_("Memory used for compiled code", 16),
       histogram_profiling_info_memory_use_("Memory used for profiling info", 16),
       is_weak_access_enabled_(true),
-      inline_cache_cond_("Jit inline cache condition variable", lock_),
-      memmap_flags_prot_code_(memmap_flags_prot_code) {
+      inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
-  DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
-  code_mspace_ = create_mspace_with_base(code_map_.Begin(), code_end_, false /*locked*/);
-  data_mspace_ = create_mspace_with_base(data_map_.Begin(), data_end_, false /*locked*/);
+  DCHECK_GE(max_capacity, initial_exec_capacity + initial_data_capacity);
 
-  if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
-    PLOG(FATAL) << "create_mspace_with_base failed";
+  // Initialize the data heap
+  data_mspace_ = create_mspace_with_base(data_pages_.Begin(), data_end_, false /*locked*/);
+  CHECK(data_mspace_ != nullptr) << "create_mspace_with_base (data) failed";
+
+  // Initialize the code heap
+  MemMap* code_heap = nullptr;
+  if (non_exec_pages_.IsValid()) {
+    code_heap = &non_exec_pages_;
+  } else if (exec_pages_.IsValid()) {
+    code_heap = &exec_pages_;
   }
-
-  SetFootprintLimit(current_capacity_);
-
-  CheckedCall(mprotect,
-              "mprotect jit code cache",
-              code_map_.Begin(),
-              code_map_.Size(),
-              memmap_flags_prot_code_);
-  CheckedCall(mprotect,
-              "mprotect jit data cache",
-              data_map_.Begin(),
-              data_map_.Size(),
-              kProtData);
+  if (code_heap != nullptr) {
+    // Make all pages reserved for the code heap writable. The mspace allocator, that manages the
+    // heap, will take and initialize pages in create_mspace_with_base().
+    CheckedCall(mprotect, "create code heap", code_heap->Begin(), code_heap->Size(), kProtRW);
+    exec_mspace_ = create_mspace_with_base(code_heap->Begin(), exec_end_, false /*locked*/);
+    CHECK(exec_mspace_ != nullptr) << "create_mspace_with_base (exec) failed";
+    SetFootprintLimit(current_capacity_);
+    // Protect pages containing heap metadata. Updates to the code heap toggle write permission to
+    // perform the update and there are no other times write access is required.
+    CheckedCall(mprotect, "protect code heap", code_heap->Begin(), code_heap->Size(), kProtR);
+  } else {
+    exec_mspace_ = nullptr;
+    SetFootprintLimit(current_capacity_);
+  }
 
   VLOG(jit) << "Created jit code cache: initial data size="
             << PrettySize(initial_data_capacity)
             << ", initial code size="
-            << PrettySize(initial_code_capacity);
+            << PrettySize(initial_exec_capacity);
 }
 
 JitCodeCache::~JitCodeCache() {}
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
-  return code_map_.Begin() <= ptr && ptr < code_map_.End();
+  return exec_pages_.Begin() <= ptr && ptr < exec_pages_.End();
 }
 
 bool JitCodeCache::WillExecuteJitCode(ArtMethod* method) {
@@ -385,22 +510,20 @@
       : ScopedTrace("ScopedCodeCacheWrite"),
         code_cache_(code_cache) {
     ScopedTrace trace("mprotect all");
-    CheckedCall(
-        mprotect,
-        "make code writable",
-        code_cache_->code_map_.Begin(),
-        code_cache_->code_map_.Size(),
-        code_cache_->memmap_flags_prot_code_ | PROT_WRITE);
+    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping();
+    if (updatable_pages != nullptr) {
+      int prot = code_cache_->HasDualCodeMapping() ? kProtRW : kProtRWX;
+      CheckedCall(mprotect, "Cache +W", updatable_pages->Begin(), updatable_pages->Size(), prot);
+    }
   }
 
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CheckedCall(
-        mprotect,
-        "make code protected",
-        code_cache_->code_map_.Begin(),
-        code_cache_->code_map_.Size(),
-        code_cache_->memmap_flags_prot_code_);
+    const MemMap* const updatable_pages = code_cache_->GetUpdatableCodeMapping();
+    if (updatable_pages != nullptr) {
+      int prot = code_cache_->HasDualCodeMapping() ? kProtR : kProtRX;
+      CheckedCall(mprotect, "Cache -W", updatable_pages->Begin(), updatable_pages->Size(), prot);
+    }
   }
 
  private:
@@ -602,7 +725,13 @@
   if (OatQuickMethodHeader::FromCodePointer(code_ptr)->IsOptimized()) {
     FreeData(GetRootTable(code_ptr));
   }  // else this is a JNI stub without any data.
-  FreeCode(reinterpret_cast<uint8_t*>(allocation));
+
+  uint8_t* code_allocation = reinterpret_cast<uint8_t*>(allocation);
+  if (HasDualCodeMapping()) {
+    code_allocation = TranslateAddress(code_allocation, exec_pages_, non_exec_pages_);
+  }
+
+  FreeCode(code_allocation);
 }
 
 void JitCodeCache::FreeAllMethodHeaders(
@@ -753,6 +882,16 @@
   }
 }
 
+const MemMap* JitCodeCache::GetUpdatableCodeMapping() const {
+  if (HasDualCodeMapping()) {
+    return &non_exec_pages_;
+  } else if (HasCodeMapping()) {
+    return &exec_pages_;
+  } else {
+    return nullptr;
+  }
+}
+
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -773,38 +912,73 @@
     DCheckRootsAreValid(roots);
   }
 
-  size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
-  // Ensure the header ends up at expected instruction alignment.
-  size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
-  size_t total_size = header_size + code_size;
-
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
-  uint8_t* memory = nullptr;
+
   MutexLock mu(self, lock_);
   // We need to make sure that there will be no jit-gcs going on and wait for any ongoing one to
   // finish.
   WaitForPotentialCollectionToCompleteRunnable(self);
   {
     ScopedCodeCacheWrite scc(this);
-    memory = AllocateCode(total_size);
-    if (memory == nullptr) {
+
+    size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
+    // Ensure the header ends up at expected instruction alignment.
+    size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
+    size_t total_size = header_size + code_size;
+
+    // AllocateCode allocates memory in non-executable region for alignment header and code. The
+    // header size may include alignment padding.
+    uint8_t* nox_memory = AllocateCode(total_size);
+    if (nox_memory == nullptr) {
       return nullptr;
     }
-    code_ptr = memory + header_size;
 
+    // code_ptr points to non-executable code.
+    code_ptr = nox_memory + header_size;
     std::copy(code, code + code_size, code_ptr);
     method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+
+    // From here code_ptr points to executable code.
+    if (HasDualCodeMapping()) {
+      code_ptr = TranslateAddress(code_ptr, non_exec_pages_, exec_pages_);
+    }
+
     new (method_header) OatQuickMethodHeader(
         (stack_map != nullptr) ? code_ptr - stack_map : 0u,
         code_size);
-    // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
-    // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
-    // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
-    // 6P) stop being supported or their kernels are fixed.
+
+    DCHECK(!Runtime::Current()->IsAotCompiler());
+    if (has_should_deoptimize_flag) {
+      method_header->SetHasShouldDeoptimizeFlag();
+    }
+
+    // Update method_header pointer to executable code region.
+    if (HasDualCodeMapping()) {
+      method_header = TranslateAddress(method_header, non_exec_pages_, exec_pages_);
+    }
+
+    // Both instruction and data caches need flushing to the point of unification where both share
+    // a common view of memory. Flushing the data cache ensures the dirty cachelines from the
+    // newly added code are written out to the point of unification. Flushing the instruction
+    // cache ensures the newly written code will be fetched from the point of unification before
+    // use. Memory in the code cache is re-cycled as code is added and removed. The flushes
+    // prevent stale code from residing in the instruction cache.
+    //
+    // Caches are flushed before write permission is removed because some ARMv8 Qualcomm kernels
+    // may trigger a segfault if a page fault occurs when requesting a cache maintenance
+    // operation. This is a kernel bug that we need to work around until affected devices
+    // (e.g. Nexus 5X and 6P) stop being supported or their kernels are fixed.
     //
     // For reference, this behavior is caused by this commit:
     // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+    //
+    if (HasDualCodeMapping()) {
+      // Flush the data cache lines associated with the non-executable copy of the code just added.
+      FlushDataCache(nox_memory, nox_memory + total_size);
+    }
+    // FlushInstructionCache() flushes both data and instruction caches lines. The cacheline range
+    // flushed is for the executable mapping of the code just added.
     FlushInstructionCache(code_ptr, code_ptr + code_size);
 
     // Ensure CPU instruction pipelines are flushed for all cores. This is necessary for
@@ -813,16 +987,14 @@
     // shootdown (incidentally invalidating the CPU pipelines by sending an IPI to all cores to
     // notify them of the TLB invalidation). Some architectures, notably ARM and ARM64, have
     // hardware support that broadcasts TLB invalidations and so their kernels have no software
-    // based TLB shootdown.
+    // based TLB shootdown. The sync-core flavor of membarrier was introduced in Linux 4.16 to
+    // address this (see mbarrier(2)). The membarrier here will fail on prior kernels and on
+    // platforms lacking the appropriate support.
     art::membarrier(art::MembarrierCommand::kPrivateExpeditedSyncCore);
 
-    DCHECK(!Runtime::Current()->IsAotCompiler());
-    if (has_should_deoptimize_flag) {
-      method_header->SetHasShouldDeoptimizeFlag();
-    }
-
     number_of_compilations_++;
   }
+
   // We need to update the entry point in the runnable state for the instrumentation.
   {
     // The following needs to be guarded by cha_lock_ also. Otherwise it's possible that the
@@ -1167,9 +1339,9 @@
   DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
   DCHECK_EQ(per_space_footprint * 2, new_footprint);
   mspace_set_footprint_limit(data_mspace_, per_space_footprint);
-  {
+  if (HasCodeMapping()) {
     ScopedCodeCacheWrite scc(this);
-    mspace_set_footprint_limit(code_mspace_, per_space_footprint);
+    mspace_set_footprint_limit(exec_mspace_, per_space_footprint);
   }
 }
 
@@ -1244,8 +1416,8 @@
       number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
-          reinterpret_cast<uintptr_t>(code_map_.Begin()),
-          reinterpret_cast<uintptr_t>(code_map_.Begin() + current_capacity_ / 2)));
+          reinterpret_cast<uintptr_t>(exec_pages_.Begin()),
+          reinterpret_cast<uintptr_t>(exec_pages_.Begin() + current_capacity_ / 2)));
       collection_in_progress_ = true;
     }
   }
@@ -1614,15 +1786,17 @@
 // NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
 // is already held.
 void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
-  if (code_mspace_ == mspace) {
-    size_t result = code_end_;
-    code_end_ += increment;
-    return reinterpret_cast<void*>(result + code_map_.Begin());
+  if (mspace == exec_mspace_) {
+    DCHECK(exec_mspace_ != nullptr);
+    const MemMap* const code_pages = GetUpdatableCodeMapping();
+    void* result = code_pages->Begin() + exec_end_;
+    exec_end_ += increment;
+    return result;
   } else {
     DCHECK_EQ(data_mspace_, mspace);
-    size_t result = data_end_;
+    void* result = data_pages_.Begin() + data_end_;
     data_end_ += increment;
-    return reinterpret_cast<void*>(result + data_map_.Begin());
+    return result;
   }
 }
 
@@ -1849,7 +2023,7 @@
 uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   uint8_t* result = reinterpret_cast<uint8_t*>(
-      mspace_memalign(code_mspace_, alignment, code_size));
+      mspace_memalign(exec_mspace_, alignment, code_size));
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   // Ensure the header ends up at expected instruction alignment.
   DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
@@ -1859,7 +2033,7 @@
 
 void JitCodeCache::FreeCode(uint8_t* code) {
   used_memory_for_code_ -= mspace_usable_size(code);
-  mspace_free(code_mspace_, code);
+  mspace_free(exec_mspace_, code);
 }
 
 uint8_t* JitCodeCache::AllocateData(size_t data_size) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e2aa01c..76ad8db 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -223,7 +223,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
-    return mspace == code_mspace_ || mspace == data_mspace_;
+    return mspace == data_mspace_ || mspace == exec_mspace_;
   }
 
   void* MoreCore(const void* mspace, intptr_t increment);
@@ -279,13 +279,13 @@
 
  private:
   // Take ownership of maps.
-  JitCodeCache(MemMap&& code_map,
-               MemMap&& data_map,
-               size_t initial_code_capacity,
+  JitCodeCache(MemMap&& data_pages,
+               MemMap&& exec_pages,
+               MemMap&& non_exec_pages,
                size_t initial_data_capacity,
+               size_t initial_exec_capacity,
                size_t max_capacity,
-               bool garbage_collect_code,
-               int memmap_flags_prot_code);
+               bool garbage_collect_code);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -381,6 +381,16 @@
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
   void FreeData(uint8_t* data) REQUIRES(lock_);
 
+  bool HasDualCodeMapping() const {
+    return non_exec_pages_.IsValid();
+  }
+
+  bool HasCodeMapping() const {
+    return exec_pages_.IsValid();
+  }
+
+  const MemMap* GetUpdatableCodeMapping() const;
+
   bool IsWeakAccessEnabled(Thread* self) const;
   void WaitUntilInlineCacheAccessible(Thread* self)
       REQUIRES(!lock_)
@@ -395,14 +405,17 @@
   ConditionVariable lock_cond_ GUARDED_BY(lock_);
   // Whether there is a code cache collection in progress.
   bool collection_in_progress_ GUARDED_BY(lock_);
-  // Mem map which holds code.
-  MemMap code_map_;
   // Mem map which holds data (stack maps and profiling info).
-  MemMap data_map_;
-  // The opaque mspace for allocating code.
-  void* code_mspace_ GUARDED_BY(lock_);
+  MemMap data_pages_;
+  // Mem map which holds code and has executable permission.
+  MemMap exec_pages_;
+  // Mem map which holds code with non executable permission. Only valid for dual view JIT when
+  // this is the non-executable view of code used to write updates.
+  MemMap non_exec_pages_;
   // The opaque mspace for allocating data.
   void* data_mspace_ GUARDED_BY(lock_);
+  // The opaque mspace for allocating code.
+  void* exec_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
   // Holds compiled code associated with the shorty for a JNI stub.
@@ -420,12 +433,12 @@
   // The current capacity in bytes of the code cache.
   size_t current_capacity_ GUARDED_BY(lock_);
 
-  // The current footprint in bytes of the code portion of the code cache.
-  size_t code_end_ GUARDED_BY(lock_);
-
   // The current footprint in bytes of the data portion of the code cache.
   size_t data_end_ GUARDED_BY(lock_);
 
+  // The current footprint in bytes of the code portion of the code cache.
+  size_t exec_end_ GUARDED_BY(lock_);
+
   // Whether the last collection round increased the code cache.
   bool last_collection_increased_code_cache_ GUARDED_BY(lock_);
 
@@ -464,9 +477,6 @@
   // Condition to wait on for accessing inline caches.
   ConditionVariable inline_cache_cond_ GUARDED_BY(lock_);
 
-  // Mapping flags for the code section.
-  const int memmap_flags_prot_code_;
-
   friend class art::JitJniStubTestHelper;
   friend class ScopedCodeCacheWrite;
 
diff --git a/runtime/jni/check_jni.cc b/runtime/jni/check_jni.cc
index c5e8830..6f61f5e 100644
--- a/runtime/jni/check_jni.cc
+++ b/runtime/jni/check_jni.cc
@@ -181,7 +181,7 @@
     }
   }
 
-  VarArgs(VarArgs&& other) {
+  VarArgs(VarArgs&& other) noexcept {
     m_ = other.m_;
     cnt_ = other.cnt_;
     type_ = other.type_;
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index d62cbdb..45f5633 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -277,7 +277,7 @@
     size_t identical;
 
     SummaryElement() : equiv(0), identical(0) {}
-    SummaryElement(SummaryElement&& ref) {
+    SummaryElement(SummaryElement&& ref) noexcept {
       root = ref.root;
       equiv = ref.equiv;
       identical = ref.identical;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8418d2b..a48f1fe 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1448,7 +1448,7 @@
   CHECK_EQ(self->GetThreadId(), ThreadList::kMainThreadId);
   CHECK(self != nullptr);
 
-  self->SetCanCallIntoJava(!IsAotCompiler());
+  self->SetIsRuntimeThread(IsAotCompiler());
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
   self->TransitionFromSuspendedToRunnable();
diff --git a/runtime/suspend_reason.h b/runtime/suspend_reason.h
index 4e75a4f..289a1a4 100644
--- a/runtime/suspend_reason.h
+++ b/runtime/suspend_reason.h
@@ -22,8 +22,6 @@
 namespace art {
 
 // The various reasons that we might be suspending a thread.
-// TODO Once kForDebugger is removed by removing the old debugger we should make the kForUserCode
-//      just a basic count for bookkeeping instead of linking it as directly with internal suspends.
 enum class SuspendReason {
   // Suspending for internal reasons (e.g. GC, stack trace, etc.).
   // TODO Split this into more descriptive sections.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b6f0965..4a3d8cb 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1236,34 +1236,6 @@
   LOG(FATAL) << ss.str();
 }
 
-void Thread::SetCanBeSuspendedByUserCode(bool can_be_suspended_by_user_code) {
-  CHECK_EQ(this, Thread::Current()) << "This function may only be called on the current thread. "
-                                    << *Thread::Current() << " tried to modify the suspendability "
-                                    << "of " << *this;
-  // NB This checks the new value! This ensures that we can only set can_be_suspended_by_user_code
-  // to false if !CanCallIntoJava().
-  DCHECK(!CanCallIntoJava() || can_be_suspended_by_user_code)
-      << "Threads able to call into java may not be marked as unsuspendable!";
-  if (can_be_suspended_by_user_code == CanBeSuspendedByUserCode()) {
-    // Don't need to do anything if nothing is changing.
-    return;
-  }
-  art::MutexLock mu(this, *Locks::user_code_suspension_lock_);
-  art::MutexLock thread_list_mu(this, *Locks::thread_suspend_count_lock_);
-
-  // We want to add the user-code suspend count if we are newly allowing user-code suspends and
-  // remove them if we are disabling them.
-  int adj = can_be_suspended_by_user_code ? GetUserCodeSuspendCount() : -GetUserCodeSuspendCount();
-  // Adjust the global suspend count appropriately. Use kInternal to not change the ForUserCode
-  // count.
-  if (adj != 0) {
-    bool suspend = ModifySuspendCountInternal(this, adj, nullptr, SuspendReason::kInternal);
-    CHECK(suspend) << this << " was unable to modify it's own suspend count!";
-  }
-  // Mark thread as accepting user-code suspensions.
-  can_be_suspended_by_user_code_ = can_be_suspended_by_user_code;
-}
-
 bool Thread::ModifySuspendCountInternal(Thread* self,
                                         int delta,
                                         AtomicInteger* suspend_barrier,
@@ -1285,17 +1257,6 @@
       LOG(ERROR) << "attempting to modify suspend count in an illegal way.";
       return false;
     }
-    DCHECK(this == self || this->IsSuspended())
-        << "Only self kForUserCode suspension on an unsuspended thread is allowed: " << this;
-    if (UNLIKELY(!CanBeSuspendedByUserCode())) {
-      VLOG(threads) << this << " is being requested to suspend for user code but that is disabled "
-                    << "the thread will not actually go to sleep.";
-      // Having the user_code_suspend_count still be around is useful but we don't need to actually
-      // do anything since we aren't going to 'really' suspend. Just adjust the
-      // user_code_suspend_count and return.
-      tls32_.user_code_suspend_count += delta;
-      return true;
-    }
   }
   if (UNLIKELY(delta < 0 && tls32_.suspend_count <= 0)) {
     UnsafeLogFatalForSuspendCount(self, this);
@@ -2156,8 +2117,7 @@
 Thread::Thread(bool daemon)
     : tls32_(daemon),
       wait_monitor_(nullptr),
-      can_call_into_java_(true),
-      can_be_suspended_by_user_code_(true) {
+      is_runtime_thread_(false) {
   wait_mutex_ = new Mutex("a thread wait mutex");
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
@@ -2181,6 +2141,10 @@
   tls32_.is_transitioning_to_runnable = false;
 }
 
+bool Thread::CanLoadClasses() const {
+  return !IsRuntimeThread() || !Runtime::Current()->IsJavaDebuggable();
+}
+
 bool Thread::IsStillStarting() const {
   // You might think you can check whether the state is kStarting, but for much of thread startup,
   // the thread is in kNative; it might also be in kVmWait.
diff --git a/runtime/thread.h b/runtime/thread.h
index 3c85b80..a915cd8 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -990,26 +990,18 @@
     --tls32_.disable_thread_flip_count;
   }
 
-  // Returns true if the thread is subject to user_code_suspensions.
-  bool CanBeSuspendedByUserCode() const {
-    return can_be_suspended_by_user_code_;
-  }
-
-  // Sets CanBeSuspenededByUserCode and adjusts the suspend-count as needed. This may only be called
-  // when running on the current thread. It is **absolutely required** that this be called only on
-  // the Thread::Current() thread.
-  void SetCanBeSuspendedByUserCode(bool can_be_suspended_by_user_code)
-      REQUIRES(!Locks::thread_suspend_count_lock_, !Locks::user_code_suspension_lock_);
-
   // Returns true if the thread is allowed to call into java.
-  bool CanCallIntoJava() const {
-    return can_call_into_java_;
+  bool IsRuntimeThread() const {
+    return is_runtime_thread_;
   }
 
-  void SetCanCallIntoJava(bool can_call_into_java) {
-    can_call_into_java_ = can_call_into_java;
+  void SetIsRuntimeThread(bool is_runtime_thread) {
+    is_runtime_thread_ = is_runtime_thread;
   }
 
+  // Returns true if the thread is allowed to load java classes.
+  bool CanLoadClasses() const;
+
   // Activates single step control for debugging. The thread takes the
   // ownership of the given SingleStepControl*. It is deleted by a call
   // to DeactivateSingleStepControl or upon thread destruction.
@@ -1587,9 +1579,8 @@
     // critical section enter.
     uint32_t disable_thread_flip_count;
 
-    // If CanBeSuspendedByUserCode, how much of 'suspend_count_' is by request of user code, used to
-    // distinguish threads suspended by the runtime from those suspended by user code. Otherwise
-    // this is just a count of how many user-code suspends have been attempted (but were ignored).
+    // How much of 'suspend_count_' is by request of user code, used to distinguish threads
+    // suspended by the runtime from those suspended by user code.
     // This should have GUARDED_BY(Locks::user_code_suspension_lock_) but auto analysis cannot be
     // told that AssertHeld should be good enough.
     int user_code_suspend_count GUARDED_BY(Locks::thread_suspend_count_lock_);
@@ -1783,6 +1774,14 @@
     mirror::Throwable* async_exception;
   } tlsPtr_;
 
+  // Small thread-local cache to be used from the interpreter.
+  // It is keyed by dex instruction pointer.
+  // The value is opcode-depended (e.g. field offset).
+  InterpreterCache interpreter_cache_;
+
+  // All fields below this line should not be accessed by native code. This means these fields can
+  // be modified, rearranged, added or removed without having to modify asm_support.h
+
   // Guards the 'wait_monitor_' members.
   Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
@@ -1804,18 +1803,8 @@
   // compiled code or entrypoints.
   SafeMap<std::string, std::unique_ptr<TLSData>> custom_tls_ GUARDED_BY(Locks::custom_tls_lock_);
 
-  // True if the thread is allowed to call back into java (for e.g. during class resolution).
-  // By default this is true.
-  bool can_call_into_java_;
-
-  // True if the thread is subject to user-code suspension. By default this is true. This can only
-  // be false for threads where '!can_call_into_java_'.
-  bool can_be_suspended_by_user_code_;
-
-  // Small thread-local cache to be used from the interpreter.
-  // It is keyed by dex instruction pointer.
-  // The value is opcode-depended (e.g. field offset).
-  InterpreterCache interpreter_cache_;
+  // True if the thread is some form of runtime thread (ex, GC or JIT).
+  bool is_runtime_thread_;
 
   friend class Dbg;  // For SetStateUnsafe.
   friend class gc::collector::SemiSpace;  // For getting stack traces.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index cddc275..ec40716 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -902,8 +902,6 @@
                                         bool request_suspension,
                                         SuspendReason reason,
                                         bool* timed_out) {
-  CHECK_NE(reason, SuspendReason::kForUserCode) << "Cannot suspend for user-code by peer. Must be "
-                                                << "done directly on the thread.";
   const uint64_t start_time = NanoTime();
   useconds_t sleep_us = kThreadSuspendInitialSleepUs;
   *timed_out = false;
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 28fc59c..f1c808b 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -102,15 +102,10 @@
                                      nullptr,
                                      worker->thread_pool_->create_peers_));
   worker->thread_ = Thread::Current();
-  // Thread pool workers cannot call into java.
-  worker->thread_->SetCanCallIntoJava(false);
-  // Thread pool workers should not be getting paused by user-code.
-  worker->thread_->SetCanBeSuspendedByUserCode(false);
+  // Mark thread pool workers as runtime-threads.
+  worker->thread_->SetIsRuntimeThread(true);
   // Do work until its time to shut down.
   worker->Run();
-  // Thread pool worker is finished. We want to allow suspension during shutdown.
-  worker->thread_->SetCanBeSuspendedByUserCode(true);
-  // Thread shuts down.
   runtime->DetachCurrentThread();
   return nullptr;
 }
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
index fc51567..97c39bb 100644
--- a/runtime/ti/agent.cc
+++ b/runtime/ti/agent.cc
@@ -176,7 +176,7 @@
   }
 }
 
-Agent::Agent(Agent&& other)
+Agent::Agent(Agent&& other) noexcept
     : dlopen_handle_(nullptr),
       onload_(nullptr),
       onattach_(nullptr),
@@ -184,7 +184,7 @@
   *this = std::move(other);
 }
 
-Agent& Agent::operator=(Agent&& other) {
+Agent& Agent::operator=(Agent&& other) noexcept {
   if (this != &other) {
     if (dlopen_handle_ != nullptr) {
       Unload();
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
index 24a6f1c..faf76a1 100644
--- a/runtime/ti/agent.h
+++ b/runtime/ti/agent.h
@@ -105,8 +105,8 @@
   // TODO We need to acquire some locks probably.
   void Unload();
 
-  Agent(Agent&& other);
-  Agent& operator=(Agent&& other);
+  Agent(Agent&& other) noexcept;
+  Agent& operator=(Agent&& other) noexcept;
 
   ~Agent();
 
diff --git a/test/684-select-condition/expected.txt b/test/684-select-condition/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/684-select-condition/expected.txt
diff --git a/test/684-select-condition/info.txt b/test/684-select-condition/info.txt
new file mode 100644
index 0000000..f9d4acd
--- /dev/null
+++ b/test/684-select-condition/info.txt
@@ -0,0 +1 @@
+Regression test for a bug in ARM's code generator for HSelect.
diff --git a/test/684-select-condition/src/Main.java b/test/684-select-condition/src/Main.java
new file mode 100644
index 0000000..196ff1a
--- /dev/null
+++ b/test/684-select-condition/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String args[]) {
+    doFloatingPointTest("1", "1.0");
+    doFloatingPointTest("4", "2.0");
+    checkValue(String.valueOf(doIntegerTest1(4)), "0");
+    checkValue(String.valueOf(doIntegerTest2(4)), "4");
+
+    // Another variant of the floating point test, but less brittle.
+    staticField = 1;
+    checkValue(String.valueOf($noinline$test()), "1.0");
+    staticField = 4;
+    checkValue(String.valueOf($noinline$test()), "2.0");
+  }
+
+  // This code is a reduced version of the original reproducer. The arm
+  // code generator used to generate wrong code for it. Note that this
+  // test is very brittle and a simple change in it could cause the compiler
+  // to not trip.
+  public static void doFloatingPointTest(String s, String expected) {
+    float a = (float)Integer.valueOf(s);
+    a = a < 2.0f ? a : 2.0f;
+    checkValue("" + a, expected);
+  }
+
+  // The compiler used to trip on the two following methods. The test there
+  // is very brittle and requires not running constant folding after
+  // load/store elimination.
+  public static int doIntegerTest1(int param) {
+    Main main = new Main();
+    main.field = 0;
+    return (main.field == 0) ? 0 : param;
+  }
+
+  public static int doIntegerTest2(int param) {
+    Main main = new Main();
+    main.field = 0;
+    return (main.field != 0) ? 0 : param;
+  }
+
+  public static void checkValue(String actual, String expected) {
+    if (!expected.equals(actual)) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  static void $noinline$nothing() {}
+  static int $noinline$getField() { return staticField; }
+
+  static float $noinline$test() {
+    // The 2.0f shall be materialized for GreaterThanOrEqual at the beginning of the method;
+    // since the following call clobbers caller-saves, it is allocated to s16.
+    // r0(field) = InvokeStaticOrDirect[]
+    int one = $noinline$getField();
+    // s0(a_1) = TypeConversion[r0(one)]
+    float a = (float)one;
+    // s16(a_2) = Select[s0(a_1), C(2.0f), GreaterThanOrEqual[s0(a_1), s16(2.0f)]]
+    a = a < 2.0f ? a : 2.0f;
+    // The following call is added to clobber caller-saves, forcing the output of the Select
+    // to be allocated to s16.
+    $noinline$nothing();
+    return a;
+  }
+
+  int field;
+  static int staticField;
+}
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 660f4c9..2fa7d2a 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -117,6 +117,7 @@
 gdb = False
 gdb_arg = ''
 runtime_option = ''
+with_agent = []
 run_test_option = []
 stop_testrunner = False
 dex2oat_jobs = -1   # -1 corresponds to default threads for dex2oat
@@ -333,6 +334,9 @@
   if runtime_option:
     for opt in runtime_option:
       options_all += ' --runtime-option ' + opt
+  if with_agent:
+    for opt in with_agent:
+      options_all += ' --with-agent ' + opt
 
   if dex2oat_jobs != -1:
     options_all += ' --dex2oat-jobs ' + str(dex2oat_jobs)
@@ -909,6 +913,7 @@
   global timeout
   global dex2oat_jobs
   global run_all_configs
+  global with_agent
 
   parser = argparse.ArgumentParser(description="Runs all or a subset of the ART test suite.")
   parser.add_argument('-t', '--test', action='append', dest='tests', help='name(s) of the test(s)')
@@ -941,6 +946,8 @@
                             This should be enclosed in single-quotes to allow for spaces. The option
                             will be split using shlex.split() prior to invoking run-test.
                             Example \"--run-test-option='--with-agent libtifast.so=MethodExit'\"""")
+  global_group.add_argument('--with-agent', action='append', dest='with_agent',
+                            help="""Pass an agent to be attached to the runtime""")
   global_group.add_argument('--runtime-option', action='append', dest='runtime_option',
                             help="""Pass an option to the runtime. Runtime options
                             starting with a '-' must be separated by a '=', for
@@ -989,6 +996,7 @@
     if options['gdb_arg']:
       gdb_arg = options['gdb_arg']
   runtime_option = options['runtime_option'];
+  with_agent = options['with_agent'];
   run_test_option = sum(map(shlex.split, options['run_test_option']), [])
 
   timeout = options['timeout']
diff --git a/tools/jit-load/Android.bp b/tools/jit-load/Android.bp
new file mode 100644
index 0000000..a57a408
--- /dev/null
+++ b/tools/jit-load/Android.bp
@@ -0,0 +1,85 @@
+//
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Build variants {target,host} x {debug,ndebug} x {32,64}
+
+cc_defaults {
+    name: "jitload-defaults",
+    host_supported: true,
+    srcs: [
+        "jitload.cc",
+    ],
+    defaults: ["art_defaults"],
+
+    // Note that this tool needs to be built for both 32-bit and 64-bit since it requires
+    // to be same ISA as what it is attached to.
+    compile_multilib: "both",
+
+    shared_libs: [
+        "libbase",
+    ],
+    target: {
+        android: {
+        },
+        host: {
+        },
+    },
+    header_libs: [
+        "libopenjdkjvmti_headers",
+    ],
+    multilib: {
+        lib32: {
+            suffix: "32",
+        },
+        lib64: {
+            suffix: "64",
+        },
+    },
+    symlink_preferred_arch: true,
+}
+
+art_cc_library {
+    name: "libjitload",
+    defaults: ["jitload-defaults"],
+    shared_libs: [
+        "libart",
+        "libdexfile",
+        "libprofile",
+        "libartbase",
+    ],
+}
+
+art_cc_library {
+    name: "libjitloadd",
+    defaults: [
+        "art_debug_defaults",
+        "jitload-defaults",
+    ],
+    shared_libs: [
+        "libartd",
+        "libdexfiled",
+        "libprofiled",
+        "libartbased",
+    ],
+}
+
+//art_cc_test {
+//    name: "art_titrace_tests",
+//    defaults: [
+//        "art_gtest_defaults",
+//    ],
+//    srcs: ["titrace_test.cc"],
+//}
diff --git a/tools/jit-load/README.md b/tools/jit-load/README.md
new file mode 100644
index 0000000..8aa4513
--- /dev/null
+++ b/tools/jit-load/README.md
@@ -0,0 +1,35 @@
+# jitload
+
+Jitload is an art-specific agent allowing one to count the number of classes
+loaded on the jit-thread or verify that none were.
+
+# Usage
+### Build
+>    `make libjitload`  # or 'make libjitloadd' with debugging checks enabled
+
+The libraries will be built for 32-bit, 64-bit, host and target. Below examples assume you want to use the 64-bit version.
+### Command Line
+
+>    `art -Xplugin:$ANDROID_HOST_OUT/lib64/libopenjdkjvmti.so -agentpath:$ANDROID_HOST_OUT/lib64/libjitload.so -cp tmp/java/helloworld.dex -Xint helloworld`
+
+* `-Xplugin` and `-agentpath` need to be used, otherwise libtitrace agent will fail during init.
+* If using `libartd.so`, make sure to use the debug version of jvmti and agent.
+* Pass the '=fatal' option to the agent to cause it to abort if any classes are
+  loaded on a jit thread. Otherwise a warning will be printed.
+
+>    `art -d -Xplugin:$ANDROID_HOST_OUT/lib64/libopenjdkjvmtid.so -agentpath:$ANDROID_HOST_OUT/lib64/libjitloadd.so=fatal -cp tmp/java/helloworld.dex -Xint helloworld`
+
+* To use with run-test or testrunner.py use the --with-agent argument.
+
+>    `./test/run-test --host --with-agent libtitraced.so=fatal 001-HelloWorld`
+
+
+### Printing the Results
+All statistics gathered during the trace are printed automatically when the
+program normally exits. In the case of Android applications, they are always
+killed, so we need to manually print the results.
+
+>    `kill -SIGQUIT $(pid com.example.android.displayingbitmaps)`
+
+Will initiate a dump of the counts (to logcat).
+
diff --git a/tools/jit-load/jitload.cc b/tools/jit-load/jitload.cc
new file mode 100644
index 0000000..d67eef0
--- /dev/null
+++ b/tools/jit-load/jitload.cc
@@ -0,0 +1,144 @@
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <android-base/logging.h>
+#include <jni.h>
+#include <jvmti.h>
+
+#include "base/runtime_debug.h"
+#include "jit/jit.h"
+#include "runtime-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+namespace jitload {
+
+// Special env version that allows JVMTI-like access on userdebug builds.
+static constexpr jint kArtTiVersion = JVMTI_VERSION_1_2 | 0x40000000;
+
+#define CHECK_CALL_SUCCESS(c) \
+  do { \
+    auto vc = (c); \
+    CHECK(vc == JNI_OK || vc == JVMTI_ERROR_NONE) << "call " << #c  << " did not succeed\n"; \
+  } while (false)
+
+static jthread GetJitThread() {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  auto* jit = art::Runtime::Current()->GetJit();
+  if (jit == nullptr) {
+    return nullptr;
+  }
+  auto* thread_pool = jit->GetThreadPool();
+  if (thread_pool == nullptr) {
+    return nullptr;
+  }
+  // Currently we only have a single jit thread so we only look at that one.
+  return soa.AddLocalReference<jthread>(
+          thread_pool->GetWorkers()[0]->GetThread()->GetPeerFromOtherThread());
+}
+
+JNICALL void VmInitCb(jvmtiEnv* jvmti,
+                      JNIEnv* env ATTRIBUTE_UNUSED,
+                      jthread curthread ATTRIBUTE_UNUSED) {
+  jthread jit_thread = GetJitThread();
+  if (jit_thread != nullptr) {
+    CHECK_EQ(jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_CLASS_PREPARE, jit_thread),
+             JVMTI_ERROR_NONE);
+  }
+}
+
+struct AgentOptions {
+  bool fatal;
+  uint64_t cnt;
+};
+
+JNICALL static void DataDumpRequestCb(jvmtiEnv* jvmti) {
+  AgentOptions* ops;
+  CHECK_CALL_SUCCESS(jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&ops)));
+  LOG(WARNING) << "Jit thread has loaded " << ops->cnt << " classes";
+}
+
+JNICALL void ClassPrepareJit(jvmtiEnv* jvmti,
+                             JNIEnv* jni_env ATTRIBUTE_UNUSED,
+                             jthread thr ATTRIBUTE_UNUSED,
+                             jclass klass) {
+  AgentOptions* ops;
+  CHECK_CALL_SUCCESS(jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&ops)));
+  char* klass_name;
+  CHECK_CALL_SUCCESS(jvmti->GetClassSignature(klass, &klass_name, nullptr));
+  (ops->fatal ? LOG_STREAM(FATAL)
+              : LOG_STREAM(WARNING)) << "Loaded " << klass_name << " on jit thread!";
+  ops->cnt++;
+  CHECK_CALL_SUCCESS(jvmti->Deallocate(reinterpret_cast<unsigned char*>(klass_name)));
+}
+
+JNICALL void VMDeathCb(jvmtiEnv* jvmti, JNIEnv* env ATTRIBUTE_UNUSED) {
+  DataDumpRequestCb(jvmti);
+}
+
+static jvmtiEnv* SetupJvmti(JavaVM* vm, const char* options) {
+  android::base::InitLogging(/* argv */nullptr);
+
+  jvmtiEnv* jvmti = nullptr;
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti), JVMTI_VERSION_1_0) != JNI_OK &&
+      vm->GetEnv(reinterpret_cast<void**>(&jvmti), kArtTiVersion) != JNI_OK) {
+    LOG(FATAL) << "Unable to setup JVMTI environment!";
+  }
+  jvmtiEventCallbacks cb {
+        .VMInit = VmInitCb,
+        .ClassPrepare = ClassPrepareJit,
+        .DataDumpRequest = DataDumpRequestCb,
+        .VMDeath = VMDeathCb,
+  };
+  AgentOptions* ops;
+  CHECK_CALL_SUCCESS(
+      jvmti->Allocate(sizeof(AgentOptions), reinterpret_cast<unsigned char**>(&ops)));
+  ops->fatal = (strcmp(options, "fatal") == 0);
+  ops->cnt = 0;
+  CHECK_CALL_SUCCESS(jvmti->SetEnvironmentLocalStorage(ops));
+  CHECK_CALL_SUCCESS(jvmti->SetEventCallbacks(&cb, sizeof(cb)));
+  CHECK_CALL_SUCCESS(jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_INIT, nullptr));
+  CHECK_CALL_SUCCESS(
+      jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_DATA_DUMP_REQUEST, nullptr));
+  return jvmti;
+}
+
+// Early attachment (e.g. 'java -agent[lib|path]:filename.so').
+extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* /* reserved */) {
+  SetupJvmti(vm, options);
+  return JNI_OK;
+}
+
+// Late attachment (e.g. 'am attach-agent').
+extern "C" JNIEXPORT jint JNICALL Agent_OnAttach(JavaVM *vm, char* options, void* /* reserved */) {
+  jvmtiEnv* jvmti = SetupJvmti(vm, options);
+
+  JNIEnv* jni = nullptr;
+  jthread thr = nullptr;
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&jni), JNI_VERSION_1_6));
+  CHECK_CALL_SUCCESS(jvmti->GetCurrentThread(&thr));
+
+  // Final setup is done in the VmInitCb.
+  VmInitCb(jvmti, jni, thr);
+
+  jni->DeleteLocalRef(thr);
+  return JNI_OK;
+}
+
+#undef CHECK_CALL_SUCCESS
+
+}  // namespace jitload
+
diff --git a/tools/titrace/titrace.cc b/tools/titrace/titrace.cc
index 981ad56..ca568d7 100644
--- a/tools/titrace/titrace.cc
+++ b/tools/titrace/titrace.cc
@@ -54,7 +54,7 @@
   }
 
   TiMemory(const TiMemory& other) = delete;
-  TiMemory(TiMemory&& other) {
+  TiMemory(TiMemory&& other) noexcept {
     env_ = other.env_;
     mem_ = other.mem_;
     size_ = other.size_;
@@ -66,7 +66,7 @@
     }
   }
 
-  TiMemory& operator=(TiMemory&& other) {
+  TiMemory& operator=(TiMemory&& other) noexcept {
     if (mem_ != other.mem_) {
       TiMemory::~TiMemory();
     }