Merge "Let classloader provide correct LD_LIBRARY_PATH"
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index ac7a4a7..6d48598 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -98,7 +98,7 @@
   }
   size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
   // There are 2 bytes to encode the number of entries.
-  if (num_entries >= 65536) {
+  if (num_entries > std::numeric_limits<uint16_t>::max()) {
     LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: "
                  << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
                                  *method_verifier->GetMethodReference().dex_file);
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 242e3df..07f9a9b 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -120,7 +120,7 @@
   DequickenMap dequicken_map_;
   SafeCastSet safe_cast_set_;
 
-  bool has_verification_failures_;
+  bool has_verification_failures_ = false;
 
   // Copy of mapping generated by verifier of dex PCs of string init invocations
   // to the set of other registers that the receiver has been copied into.
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 32bde8e..73e121f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -110,10 +110,6 @@
     CheckNoDexObjects();
   }
 
-  if (!AllocMemory()) {
-    return false;
-  }
-
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     CheckNonImageClassesRemoved();
@@ -123,6 +119,12 @@
   CalculateNewObjectOffsets();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
+  // This needs to happen after CalculateNewObjectOffsets since it relies on intern_table_bytes_ and
+  // bin size sums being calculated.
+  if (!AllocMemory()) {
+    return false;
+  }
+
   return true;
 }
 
@@ -205,7 +207,7 @@
   }
 
   // Write out the image bitmap at the page aligned start of the image end.
-  const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
+  const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap);
   CHECK_ALIGNED(bitmap_section.Offset(), kPageSize);
   if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()),
                          bitmap_section.Size(), bitmap_section.Offset())) {
@@ -222,26 +224,10 @@
   return true;
 }
 
-void ImageWriter::SetImageOffset(mirror::Object* object,
-                                 ImageWriter::BinSlot bin_slot,
-                                 size_t offset) {
+void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) {
   DCHECK(object != nullptr);
   DCHECK_NE(offset, 0U);
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset);
-  DCHECK_ALIGNED(obj, kObjectAlignment);
 
-  static size_t max_offset = 0;
-  max_offset = std::max(max_offset, offset);
-  image_bitmap_->Set(obj);  // Mark the obj as mutated, since we will end up changing it.
-  {
-    // Remember the object-inside-of-the-image's hash code so we can restore it after the copy.
-    auto hash_it = saved_hashes_map_.find(bin_slot);
-    if (hash_it != saved_hashes_map_.end()) {
-      std::pair<BinSlot, uint32_t> slot_hash = *hash_it;
-      saved_hashes_.push_back(std::make_pair(obj, slot_hash.second));
-      saved_hashes_map_.erase(hash_it);
-    }
-  }
   // The object is already deflated from when we set the bin slot. Just overwrite the lock word.
   object->SetLockWord(LockWord::FromForwardingAddress(offset), false);
   DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u);
@@ -262,7 +248,7 @@
   size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex();
   DCHECK_ALIGNED(new_offset, kObjectAlignment);
 
-  SetImageOffset(object, bin_slot, new_offset);
+  SetImageOffset(object, new_offset);
   DCHECK_LT(new_offset, image_end_);
 }
 
@@ -302,14 +288,14 @@
       // No hash, don't need to save it.
       break;
     case LockWord::kHashCode:
-      saved_hashes_map_[bin_slot] = lw.GetHashCode();
+      DCHECK(saved_hashcode_map_.find(object) == saved_hashcode_map_.end());
+      saved_hashcode_map_.emplace(object, lw.GetHashCode());
       break;
     default:
       LOG(FATAL) << "Unreachable.";
       UNREACHABLE();
   }
-  object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)),
-                      false);
+  object->SetLockWord(LockWord::FromForwardingAddress(bin_slot.Uint32Value()), false);
   DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u);
   DCHECK(IsImageBinSlotAssigned(object));
 }
@@ -487,11 +473,8 @@
 
   ++bin_slot_count_[bin];
 
-  DCHECK_LT(GetBinSizeSum(), image_->Size());
-
   // Grow the image closer to the end by the object we just assigned.
   image_end_ += offset_delta;
-  DCHECK_LT(image_end_, image_->Size());
 }
 
 bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const {
@@ -535,10 +518,8 @@
 }
 
 bool ImageWriter::AllocMemory() {
-  auto* runtime = Runtime::Current();
-  const size_t heap_size = runtime->GetHeap()->GetTotalMemory();
-  // Add linear alloc usage since we need to have room for the ArtFields.
-  const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize);
+  const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
+                                kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE,
                                     false, false, &error_msg));
@@ -547,9 +528,10 @@
     return false;
   }
 
-  // Create the image bitmap.
-  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                                    RoundUp(length, kPageSize)));
+  // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
+  CHECK_LE(image_end_, length);
+  image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
+      "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize)));
   if (image_bitmap_.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate memory for image bitmap";
     return false;
@@ -569,42 +551,6 @@
   return true;
 }
 
-// Collect all the java.lang.String in the heap and put them in the output strings_ array.
-class StringCollector {
- public:
-  StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index)
-      : strings_(strings), index_(index) {
-  }
-  static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    auto* collector = reinterpret_cast<StringCollector*>(arg);
-    if (obj->GetClass()->IsStringClass()) {
-      collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString());
-    }
-  }
-  size_t GetIndex() const {
-    return index_;
-  }
-
- private:
-  Handle<mirror::ObjectArray<mirror::String>> strings_;
-  size_t index_;
-};
-
-// Compare strings based on length, used for sorting strings by length / reverse length.
-class LexicographicalStringComparator {
- public:
-  bool operator()(const mirror::HeapReference<mirror::String>& lhs,
-                  const mirror::HeapReference<mirror::String>& rhs) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::String* lhs_s = lhs.AsMirrorPtr();
-    mirror::String* rhs_s = rhs.AsMirrorPtr();
-    uint16_t* lhs_begin = lhs_s->GetValue();
-    uint16_t* rhs_begin = rhs_s->GetValue();
-    return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(),
-                                        rhs_begin, rhs_begin + rhs_s->GetLength());
-  }
-};
-
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
@@ -769,7 +715,8 @@
       DCHECK_EQ(obj, obj->AsString()->Intern());
       return;
     }
-    mirror::String* const interned = obj->AsString()->Intern();
+    mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong(
+        obj->AsString()->Intern());
     if (obj != interned) {
       if (!IsImageBinSlotAssigned(interned)) {
         // interned obj is after us, allocate its location early
@@ -965,7 +912,6 @@
   // know where image_roots is going to end up
   image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  DCHECK_LT(image_end_, image_->Size());
   image_objects_offset_begin_ = image_end_;
   // Prepare bin slots for dex cache arrays.
   PrepareDexCacheArraySlots();
@@ -997,7 +943,6 @@
 
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
-  DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
 
   DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
 
@@ -1010,6 +955,11 @@
         bin_slot_previous_sizes_[native_reloc.bin_type];
   }
 
+  // Calculate how big the intern table will be after being serialized.
+  auto* const intern_table = Runtime::Current()->GetInternTable();
+  CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
+  intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+
   // Note that image_end_ is left at end of used mirror object section.
 }
 
@@ -1039,6 +989,10 @@
   CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean],
            methods_section->Offset());
   cur_pos = methods_section->End();
+  // Calculate the size of the interned strings.
+  auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
+  *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
+  cur_pos = interned_strings_section->End();
   // Finally bitmap section.
   const size_t bitmap_bytes = image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
@@ -1046,16 +1000,19 @@
   cur_pos = bitmap_section->End();
   if (kIsDebugBuild) {
     size_t idx = 0;
-    for (auto& section : sections) {
+    for (const ImageSection& section : sections) {
       LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section;
       ++idx;
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
   }
+  const size_t image_end = static_cast<uint32_t>(interned_strings_section->End());
+  CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
+      "Oat file should be right after the image.";
   // Create the header.
   new (image_->Begin()) ImageHeader(
-      PointerToLowMemUInt32(image_begin_), static_cast<uint32_t>(methods_section->End()), sections,
-      image_roots_address_, oat_file_->GetOatHeader().GetChecksum(),
+      PointerToLowMemUInt32(image_begin_), image_end,
+      sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(),
       PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_),
       PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_,
       compile_pic_);
@@ -1068,6 +1025,37 @@
   return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
 }
 
+class FixupRootVisitor : public RootVisitor {
+ public:
+  explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {
+  }
+
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      *roots[i] = ImageAddress(*roots[i]);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr()));
+    }
+  }
+
+ private:
+  ImageWriter* const image_writer_;
+
+  mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const size_t offset = image_writer_->GetImageOffset(obj);
+    auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset);
+    VLOG(compiler) << "Update root from " << obj << " to " << dest;
+    return dest;
+  }
+};
+
 void ImageWriter::CopyAndFixupNativeData() {
   // Copy ArtFields and methods to their locations and update the array for convenience.
   for (auto& pair : native_object_reloc_) {
@@ -1088,7 +1076,7 @@
   }
   // Fixup the image method roots.
   auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
-  const auto& methods_section = image_header->GetMethodsSection();
+  const ImageSection& methods_section = image_header->GetMethodsSection();
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
     auto* m = image_methods_[i];
     CHECK(m != nullptr);
@@ -1101,18 +1089,35 @@
     auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest);
   }
+  // Write the intern table into the image.
+  const ImageSection& intern_table_section = image_header->GetImageSection(
+      ImageHeader::kSectionInternedStrings);
+  InternTable* const intern_table = Runtime::Current()->GetInternTable();
+  uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset();
+  const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr);
+  // Fixup the pointers in the newly written intern table to contain image addresses.
+  InternTable temp_table;
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
+  // the VisitRoots() will update the memory directly rather than the copies.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  temp_table.ReadFromMemory(memory_ptr);
+  CHECK_EQ(temp_table.Size(), intern_table->Size());
+  FixupRootVisitor visitor(this);
+  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
+  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
 }
 
 void ImageWriter::CopyAndFixupObjects() {
   gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
-  for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
+  for (const auto& hash_pair : saved_hashcode_map_) {
     Object* obj = hash_pair.first;
     DCHECK_EQ(obj->GetLockWord<kVerifyNone>(false).ReadBarrierState(), 0U);
     obj->SetLockWord<kVerifyNone>(LockWord::FromHashCode(hash_pair.second, 0U), false);
   }
-  saved_hashes_.clear();
+  saved_hashcode_map_.clear();
 }
 
 void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
@@ -1155,18 +1160,22 @@
 }
 
 void ImageWriter::CopyAndFixupObject(Object* obj) {
-  // see GetLocalAddress for similar computation
   size_t offset = GetImageOffset(obj);
   auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
-  const uint8_t* src = reinterpret_cast<const uint8_t*>(obj);
+  DCHECK_LT(offset, image_end_);
+  const auto* src = reinterpret_cast<const uint8_t*>(obj);
 
-  size_t n = obj->SizeOf();
+  image_bitmap_->Set(dst);  // Mark the obj as live.
+
+  const size_t n = obj->SizeOf();
   DCHECK_LE(offset + n, image_->Size());
   memcpy(dst, src, n);
 
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
   // word.
-  dst->SetLockWord(LockWord::Default(), false);
+  const auto it = saved_hashcode_map_.find(obj);
+  dst->SetLockWord(it != saved_hashcode_map_.end() ?
+      LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false);
   FixupObject(obj, dst);
 }
 
@@ -1176,7 +1185,7 @@
   FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
@@ -1186,7 +1195,7 @@
   }
 
   // java.lang.ref.Reference visitor.
-  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
@@ -1490,4 +1499,11 @@
   return lockword_ & ~kBinMask;
 }
 
+uint8_t* ImageWriter::GetOatFileBegin() const {
+  DCHECK_GT(intern_table_bytes_, 0u);
+  return image_begin_ + RoundUp(
+      image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] +
+      bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize);
+}
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index a35d6ad..9d45ce2 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -54,7 +54,7 @@
         quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
         target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
         bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(),
-        dirty_methods_(0u), clean_methods_(0u) {
+        intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) {
     CHECK_NE(image_begin, 0U);
     std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr);
   }
@@ -84,11 +84,7 @@
         image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset);
   }
 
-  uint8_t* GetOatFileBegin() const {
-    return image_begin_ + RoundUp(
-        image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] +
-        bin_slot_sizes_[kBinArtMethodClean], kPageSize);
-  }
+  uint8_t* GetOatFileBegin() const;
 
   bool Write(const std::string& image_filename, const std::string& oat_filename,
              const std::string& oat_location)
@@ -158,7 +154,7 @@
     // The offset in bytes from the beginning of the bin. Aligned to object size.
     uint32_t GetIndex() const;
     // Pack into a single uint32_t, for storing into a lock word.
-    explicit operator uint32_t() const { return lockword_; }
+    uint32_t Uint32Value() const { return lockword_; }
     // Comparison operator for map support
     bool operator<(const BinSlot& other) const  { return lockword_ < other.lockword_; }
 
@@ -170,7 +166,7 @@
   // We use the lock word to store the offset of the object in the image.
   void AssignImageOffset(mirror::Object* object, BinSlot bin_slot)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset)
+  void SetImageOffset(mirror::Object* object, size_t offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsImageOffsetAssigned(mirror::Object* object) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -330,11 +326,9 @@
   // The start offsets of the dex cache arrays.
   SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
 
-  // Saved hashes (objects are inside of the image so that they don't move).
-  std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
-
-  // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address).
-  std::map<BinSlot, uint32_t> saved_hashes_map_;
+  // Saved hash codes. We use these to restore lockwords which were temporarily used to have
+  // forwarding addresses as well as copying over hash codes.
+  std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_;
 
   // Beginning target oat address for the pointers from the output image to its oat file.
   const uint8_t* oat_data_begin_;
@@ -360,6 +354,9 @@
   size_t bin_slot_previous_sizes_[kBinSize];  // Number of bytes in previous bins.
   size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
 
+  // Cached size of the intern table for when we allocate memory.
+  size_t intern_table_bytes_;
+
   // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to
   // have one entry per art field for convenience. ArtFields are placed right after the end of the
   // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields.
@@ -376,8 +373,9 @@
   uint64_t dirty_methods_;
   uint64_t clean_methods_;
 
-  friend class FixupVisitor;
   friend class FixupClassVisitor;
+  friend class FixupRootVisitor;
+  friend class FixupVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 049b3e3..130f0e9 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -508,19 +508,14 @@
       dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
 
-  uint32_t max_native_offset = 0;
-  for (size_t i = 0; i < pc_infos_.Size(); i++) {
-    uint32_t native_offset = pc_infos_.Get(i).native_pc;
-    if (native_offset > max_native_offset) {
-      max_native_offset = native_offset;
-    }
-  }
+  uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
 
-  GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth());
-  for (size_t i = 0; i < pc_infos_.Size(); i++) {
-    struct PcInfo pc_info = pc_infos_.Get(i);
-    uint32_t native_offset = pc_info.native_pc;
-    uint32_t dex_pc = pc_info.dex_pc;
+  size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+  GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
+  for (size_t i = 0; i != num_stack_maps; ++i) {
+    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
+    uint32_t native_offset = stack_map_entry.native_pc_offset;
+    uint32_t dex_pc = stack_map_entry.dex_pc;
     const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
     CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
     builder.AddEntry(native_offset, references);
@@ -528,17 +523,17 @@
 }
 
 void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
-  for (size_t i = 0; i < pc_infos_.Size(); i++) {
-    struct PcInfo pc_info = pc_infos_.Get(i);
-    uint32_t pc2dex_offset = pc_info.native_pc;
-    int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
+    uint32_t pc2dex_offset = stack_map_entry.native_pc_offset;
+    int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc;
     src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
   }
 }
 
 void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
   uint32_t pc2dex_data_size = 0u;
-  uint32_t pc2dex_entries = pc_infos_.Size();
+  uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
   uint32_t pc2dex_offset = 0u;
   int32_t pc2dex_dalvik_offset = 0;
   uint32_t dex2pc_data_size = 0u;
@@ -547,11 +542,11 @@
   int32_t dex2pc_dalvik_offset = 0;
 
   for (size_t i = 0; i < pc2dex_entries; i++) {
-    struct PcInfo pc_info = pc_infos_.Get(i);
-    pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
-    pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = pc_info.native_pc;
-    pc2dex_dalvik_offset = pc_info.dex_pc;
+    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
+    pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
+    pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
+    pc2dex_offset = stack_map_entry.native_pc_offset;
+    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
   }
 
   // Walk over the blocks and find which ones correspond to catch block entries.
@@ -586,12 +581,12 @@
   dex2pc_dalvik_offset = 0u;
 
   for (size_t i = 0; i < pc2dex_entries; i++) {
-    struct PcInfo pc_info = pc_infos_.Get(i);
-    DCHECK(pc2dex_offset <= pc_info.native_pc);
-    write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset);
-    write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset);
-    pc2dex_offset = pc_info.native_pc;
-    pc2dex_dalvik_offset = pc_info.dex_pc;
+    const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
+    DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
+    write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
+    write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
+    pc2dex_offset = stack_map_entry.native_pc_offset;
+    pc2dex_dalvik_offset = stack_map_entry.dex_pc;
   }
 
   for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) {
@@ -617,9 +612,9 @@
     auto it = table.PcToDexBegin();
     auto it2 = table.DexToPcBegin();
     for (size_t i = 0; i < pc2dex_entries; i++) {
-      struct PcInfo pc_info = pc_infos_.Get(i);
-      CHECK_EQ(pc_info.native_pc, it.NativePcOffset());
-      CHECK_EQ(pc_info.dex_pc, it.DexPc());
+      const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
+      CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
+      CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
       ++it;
     }
     for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) {
@@ -695,14 +690,11 @@
   }
 
   // Collect PC infos for the mapping table.
-  struct PcInfo pc_info;
-  pc_info.dex_pc = outer_dex_pc;
-  pc_info.native_pc = GetAssembler()->CodeSize();
-  pc_infos_.Add(pc_info);
+  uint32_t native_pc = GetAssembler()->CodeSize();
 
   if (instruction == nullptr) {
     // For stack overflow checks.
-    stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0);
+    stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0);
     stack_map_stream_.EndStackMapEntry();
     return;
   }
@@ -719,8 +711,8 @@
   }
   // The register mask must be a subset of callee-save registers.
   DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
-  stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc,
-                                       pc_info.native_pc,
+  stack_map_stream_.BeginStackMapEntry(outer_dex_pc,
+                                       native_pc,
                                        register_mask,
                                        locations->GetStackMask(),
                                        outer_environment_size,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index c6ebf6d..e6b1f7c 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -64,11 +64,6 @@
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
 };
 
-struct PcInfo {
-  uint32_t dex_pc;
-  uintptr_t native_pc;
-};
-
 class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
  public:
   SlowPathCode() {
@@ -366,7 +361,6 @@
         is_baseline_(false),
         graph_(graph),
         compiler_options_(compiler_options),
-        pc_infos_(graph->GetArena(), 32),
         slow_paths_(graph->GetArena(), 8),
         block_order_(nullptr),
         current_block_index_(0),
@@ -455,7 +449,6 @@
   HGraph* const graph_;
   const CompilerOptions& compiler_options_;
 
-  GrowableArray<PcInfo> pc_infos_;
   GrowableArray<SlowPathCode*> slow_paths_;
 
   // The order to use for code generation.
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 0244620..668956a 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -36,6 +36,9 @@
   static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
 
   void Run() OVERRIDE;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 126b3b9..7ef6955 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2483,7 +2483,7 @@
     intrinsic_ = intrinsic;
   }
 
-  bool IsInlined() const {
+  bool IsFromInlinedInvoke() const {
     return GetEnvironment()->GetParent() != nullptr;
   }
 
@@ -3603,7 +3603,7 @@
   bool CanThrow() const OVERRIDE {
     // May call runtime and and therefore can throw.
     // TODO: finer grain decision.
-    return !is_referrers_class_;
+    return CanCallRuntime();
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index ccf8de9..2d1c0ba 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
 
+#include "base/arena_object.h"
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 
@@ -25,7 +26,7 @@
 /**
  * Abstraction to implement an optimization pass.
  */
-class HOptimization : public ValueObject {
+class HOptimization : public ArenaObject<kArenaAllocMisc> {
  public:
   HOptimization(HGraph* graph,
                 bool is_in_ssa_form,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bf0b9fa..303a7cb 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -318,49 +318,55 @@
                              const DexCompilationUnit& dex_compilation_unit,
                              PassInfoPrinter* pass_info_printer,
                              StackHandleScopeCollection* handles) {
-  HDeadCodeElimination dce1(graph, stats,
-                            HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
-  HDeadCodeElimination dce2(graph, stats,
-                            HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
-  HConstantFolding fold1(graph);
-  InstructionSimplifier simplify1(graph, stats);
-  HBooleanSimplifier boolean_simplify(graph);
+  ArenaAllocator* arena = graph->GetArena();
+  HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
+      graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+  HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+  HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
 
-  HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+  HInliner* inliner = new (arena) HInliner(
+      graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
 
-  HConstantFolding fold2(graph, "constant_folding_after_inlining");
-  SideEffectsAnalysis side_effects(graph);
-  GVNOptimization gvn(graph, side_effects);
-  LICM licm(graph, side_effects);
-  BoundsCheckElimination bce(graph);
-  ReferenceTypePropagation type_propagation(graph, handles);
-  InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
-  InstructionSimplifier simplify3(graph, stats, "last_instruction_simplifier");
-  ReferenceTypePropagation type_propagation2(graph, handles);
+  HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
+  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
+  LICM* licm = new (arena) LICM(graph, *side_effects);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph);
+  ReferenceTypePropagation* type_propagation =
+      new (arena) ReferenceTypePropagation(graph, handles);
+  InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
+      graph, stats, "instruction_simplifier_after_types");
+  InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
+      graph, stats, "last_instruction_simplifier");
+  ReferenceTypePropagation* type_propagation2 =
+      new (arena) ReferenceTypePropagation(graph, handles);
 
-  IntrinsicsRecognizer intrinsics(graph, driver);
+  IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations[] = {
-    &intrinsics,
-    &dce1,
-    &fold1,
-    &simplify1,
-    &type_propagation,
-    &simplify2,
-    &inliner,
+    intrinsics,
+    dce1,
+    fold1,
+    simplify1,
+    type_propagation,
+    simplify2,
+    inliner,
     // Run another type propagation phase: inlining will open up more opprotunities
     // to remove checkast/instanceof and null checks.
-    &type_propagation2,
+    type_propagation2,
     // BooleanSimplifier depends on the InstructionSimplifier removing redundant
     // suspend checks to recognize empty blocks.
-    &boolean_simplify,
-    &fold2,
-    &side_effects,
-    &gvn,
-    &licm,
-    &bce,
-    &simplify3,
-    &dce2,
+    boolean_simplify,
+    fold2,
+    side_effects,
+    gvn,
+    licm,
+    bce,
+    simplify3,
+    dce2,
   };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index a249aa9..ca928ae 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -86,16 +86,6 @@
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
-    // The static call will initialize the class so there's no need for a clinit check if
-    // it's the first user.
-    // There is one special case where we still need the clinit check, when inlining. Because
-    // currently the callee is responsible for reporting parameters to the GC, the code
-    // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
-    // Therefore we cannot allocate any object in that code, including loading a new class.
-    if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) {
-      last_input->SetMustGenerateClinitCheck(false);
-    }
-
     // Remove a load class instruction as last input of a static
     // invoke, which has been added (along with a clinit check,
     // removed by PrepareForRegisterAllocation::VisitClinitCheck
@@ -104,10 +94,20 @@
     // stage (i.e., after inlining has been performed).
     invoke->RemoveLoadClassAsLastInput();
 
-    // If the load class instruction is no longer used, remove it from
-    // the graph.
-    if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) {
-      last_input->GetBlock()->RemoveInstruction(last_input);
+    // The static call will initialize the class so there's no need for a clinit check if
+    // it's the first user.
+    // There is one special case where we still need the clinit check, when inlining. Because
+    // currently the callee is responsible for reporting parameters to the GC, the code
+    // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
+    // Therefore we cannot allocate any object in that code, including loading a new class.
+    if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) {
+      last_input->SetMustGenerateClinitCheck(false);
+
+      // If the load class instruction is no longer used, remove it from
+      // the graph.
+      if (!last_input->HasUses()) {
+        last_input->GetBlock()->RemoveInstruction(last_input);
+      }
     }
   }
 }
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 42b9182..65610d5 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -49,7 +49,6 @@
   }
 
   dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
-  native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
   register_mask_max_ = std::max(register_mask_max_, register_mask);
   current_dex_register_ = 0;
 }
@@ -128,16 +127,25 @@
   current_inline_info_ = InlineInfoEntry();
 }
 
+uint32_t StackMapStream::ComputeMaxNativePcOffset() const {
+  uint32_t max_native_pc_offset = 0u;
+  for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) {
+    max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset);
+  }
+  return max_native_pc_offset;
+}
+
 size_t StackMapStream::PrepareForFillIn() {
   int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
   stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte;
   inline_info_size_ = ComputeInlineInfoSize();
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  uint32_t max_native_pc_offset = ComputeMaxNativePcOffset();
   stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_,
                                                           inline_info_size_,
                                                           dex_register_maps_size_,
                                                           dex_pc_max_,
-                                                          native_pc_offset_max_,
+                                                          max_native_pc_offset,
                                                           register_mask_max_);
   stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize();
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 274d573..bc3653d 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -67,7 +67,6 @@
         inline_infos_(allocator, 2),
         stack_mask_max_(-1),
         dex_pc_max_(0),
-        native_pc_offset_max_(0),
         register_mask_max_(0),
         number_of_stack_maps_with_inline_info_(0),
         dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()),
@@ -126,6 +125,17 @@
                             uint32_t num_dex_registers);
   void EndInlineInfoEntry();
 
+  size_t GetNumberOfStackMaps() const {
+    return stack_maps_.Size();
+  }
+
+  const StackMapEntry& GetStackMap(size_t i) const {
+    DCHECK_LT(i, stack_maps_.Size());
+    return stack_maps_.GetRawStorage()[i];
+  }
+
+  uint32_t ComputeMaxNativePcOffset() const;
+
   // Prepares the stream to fill in a memory region. Must be called before FillIn.
   // Returns the size (in bytes) needed to store this stream.
   size_t PrepareForFillIn();
@@ -163,7 +173,6 @@
   GrowableArray<InlineInfoEntry> inline_infos_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
-  uint32_t native_pc_offset_max_;
   uint32_t register_mask_max_;
   size_t number_of_stack_maps_with_inline_info_;
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 96d5654..9e9dea6 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1610,6 +1610,8 @@
     const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap);
     const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields);
     const auto& method_section = image_header_.GetMethodsSection();
+    const auto& intern_section = image_header_.GetImageSection(
+        ImageHeader::kSectionInternedStrings);
     stats_.header_bytes = header_bytes;
     size_t alignment_bytes = RoundUp(header_bytes, kObjectAlignment) - header_bytes;
     stats_.alignment_bytes += alignment_bytes;
@@ -1617,6 +1619,7 @@
     stats_.bitmap_bytes += bitmap_section.Size();
     stats_.art_field_bytes += field_section.Size();
     stats_.art_method_bytes += method_section.Size();
+    stats_.interned_strings_bytes += intern_section.Size();
     stats_.Dump(os);
     os << "\n";
 
@@ -1945,6 +1948,7 @@
     size_t object_bytes;
     size_t art_field_bytes;
     size_t art_method_bytes;
+    size_t interned_strings_bytes;
     size_t bitmap_bytes;
     size_t alignment_bytes;
 
@@ -1974,6 +1978,7 @@
           object_bytes(0),
           art_field_bytes(0),
           art_method_bytes(0),
+          interned_strings_bytes(0),
           bitmap_bytes(0),
           alignment_bytes(0),
           managed_code_bytes(0),
@@ -2131,21 +2136,24 @@
            << "art_file_bytes = header_bytes + object_bytes + alignment_bytes\n";
         Indenter indent_filter(os.rdbuf(), kIndentChar, kIndentBy1Count);
         std::ostream indent_os(&indent_filter);
-        indent_os << StringPrintf("header_bytes     =  %8zd (%2.0f%% of art file bytes)\n"
-                                  "object_bytes     =  %8zd (%2.0f%% of art file bytes)\n"
-                                  "art_field_bytes  =  %8zd (%2.0f%% of art file bytes)\n"
-                                  "art_method_bytes =  %8zd (%2.0f%% of art file bytes)\n"
-                                  "bitmap_bytes     =  %8zd (%2.0f%% of art file bytes)\n"
-                                  "alignment_bytes  =  %8zd (%2.0f%% of art file bytes)\n\n",
+        indent_os << StringPrintf("header_bytes          =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "object_bytes          =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "art_field_bytes       =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "art_method_bytes      =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "interned_string_bytes =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "bitmap_bytes          =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "alignment_bytes       =  %8zd (%2.0f%% of art file bytes)\n\n",
                                   header_bytes, PercentOfFileBytes(header_bytes),
                                   object_bytes, PercentOfFileBytes(object_bytes),
                                   art_field_bytes, PercentOfFileBytes(art_field_bytes),
                                   art_method_bytes, PercentOfFileBytes(art_method_bytes),
+                                  interned_strings_bytes,
+                                  PercentOfFileBytes(interned_strings_bytes),
                                   bitmap_bytes, PercentOfFileBytes(bitmap_bytes),
                                   alignment_bytes, PercentOfFileBytes(alignment_bytes))
             << std::flush;
         CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes +
-            bitmap_bytes + alignment_bytes);
+                 interned_strings_bytes + bitmap_bytes + alignment_bytes);
       }
 
       os << "object_bytes breakdown:\n";
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 007125c..0401727 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -437,6 +437,41 @@
   }
 }
 
+class FixupRootVisitor : public RootVisitor {
+ public:
+  explicit FixupRootVisitor(const PatchOat* patch_oat) : patch_oat_(patch_oat) {
+  }
+
+  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      *roots[i] = patch_oat_->RelocatedAddressOfPointer(*roots[i]);
+    }
+  }
+
+  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (size_t i = 0; i < count; ++i) {
+      roots[i]->Assign(patch_oat_->RelocatedAddressOfPointer(roots[i]->AsMirrorPtr()));
+    }
+  }
+
+ private:
+  const PatchOat* const patch_oat_;
+};
+
+void PatchOat::PatchInternedStrings(const ImageHeader* image_header) {
+  const auto& section = image_header->GetImageSection(ImageHeader::kSectionInternedStrings);
+  InternTable temp_table;
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  FixupRootVisitor visitor(this);
+  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
+}
+
 void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) {
   auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>(
       img_roots->Get(ImageHeader::kDexCaches));
@@ -483,12 +518,9 @@
   auto* img_roots = image_header->GetImageRoots();
   image_header->RelocateImage(delta_);
 
-  // Patch and update ArtFields.
   PatchArtFields(image_header);
-
-  // Patch and update ArtMethods.
   PatchArtMethods(image_header);
-
+  PatchInternedStrings(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 7b9c8bd..23abca8 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -116,6 +116,8 @@
   bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PatchArtFields(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PatchArtMethods(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PatchInternedStrings(const ImageHeader* image_header)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -123,7 +125,7 @@
   bool WriteImage(File* out);
 
   template <typename T>
-  T* RelocatedCopyOf(T* obj) {
+  T* RelocatedCopyOf(T* obj) const {
     if (obj == nullptr) {
       return nullptr;
     }
@@ -136,7 +138,7 @@
   }
 
   template <typename T>
-  T* RelocatedAddressOfPointer(T* obj) {
+  T* RelocatedAddressOfPointer(T* obj) const {
     if (obj == nullptr) {
       return obj;
     }
@@ -149,7 +151,7 @@
   }
 
   template <typename T>
-  T RelocatedAddressOfIntPointer(T obj) {
+  T RelocatedAddressOfIntPointer(T obj) const {
     if (obj == 0) {
       return obj;
     }
@@ -199,6 +201,7 @@
 
   TimingLogger* timings_;
 
+  friend class FixupRootVisitor;
   DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat);
 };
 
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index ab63ddd..8daf6d4 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -22,6 +22,7 @@
 #include <stdint.h>
 #include <utility>
 
+#include "bit_utils.h"
 #include "logging.h"
 
 namespace art {
@@ -121,6 +122,7 @@
   typedef BaseIterator<T, HashSet> Iterator;
   typedef BaseIterator<const T, const HashSet> ConstIterator;
 
+  // If we don't own the data, this will create a new array which owns the data.
   void Clear() {
     DeallocateStorage();
     AllocateStorage(1);
@@ -128,19 +130,70 @@
     elements_until_expand_ = 0;
   }
 
-  HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr),
+  HashSet() : num_elements_(0), num_buckets_(0), owns_data_(false), data_(nullptr),
       min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
     Clear();
   }
 
-  HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+  HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
+      data_(nullptr) {
     *this = other;
   }
 
-  HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) {
+  HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
+      data_(nullptr) {
     *this = std::move(other);
   }
 
+  // Construct from existing data.
+  // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the
+  // passed in ptr_.
+  HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) {
+    uint64_t temp;
+    size_t offset = 0;
+    offset = ReadFromBytes(ptr, offset, &temp);
+    num_elements_ = static_cast<uint64_t>(temp);
+    offset = ReadFromBytes(ptr, offset, &temp);
+    num_buckets_ = static_cast<uint64_t>(temp);
+    CHECK_LE(num_elements_, num_buckets_);
+    offset = ReadFromBytes(ptr, offset, &temp);
+    elements_until_expand_ = static_cast<uint64_t>(temp);
+    offset = ReadFromBytes(ptr, offset, &min_load_factor_);
+    offset = ReadFromBytes(ptr, offset, &max_load_factor_);
+    if (!make_copy_of_data) {
+      owns_data_ = false;
+      data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset));
+      offset += sizeof(*data_) * num_buckets_;
+    } else {
+      AllocateStorage(num_buckets_);
+      // Write elements, not that this may not be safe for cross compilation if the elements are
+      // pointer sized.
+      for (size_t i = 0; i < num_buckets_; ++i) {
+        offset = ReadFromBytes(ptr, offset, &data_[i]);
+      }
+    }
+    // Caller responsible for aligning.
+    *read_count = offset;
+  }
+
+  // Returns how large the table is after being written. If target is null, then no writing happens
+  // but the size is still returned. Target must be 8 byte aligned.
+  size_t WriteToMemory(uint8_t* ptr) {
+    size_t offset = 0;
+    offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
+    offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
+    offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_));
+    offset = WriteToBytes(ptr, offset, min_load_factor_);
+    offset = WriteToBytes(ptr, offset, max_load_factor_);
+    // Write elements, not that this may not be safe for cross compilation if the elements are
+    // pointer sized.
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      offset = WriteToBytes(ptr, offset, data_[i]);
+    }
+    // Caller responsible for aligning.
+    return offset;
+  }
+
   ~HashSet() {
     DeallocateStorage();
   }
@@ -152,6 +205,7 @@
     std::swap(elements_until_expand_, other.elements_until_expand_);
     std::swap(min_load_factor_, other.min_load_factor_);
     std::swap(max_load_factor_, other.max_load_factor_);
+    std::swap(owns_data_, other.owns_data_);
     return *this;
   }
 
@@ -386,6 +440,7 @@
   void AllocateStorage(size_t num_buckets) {
     num_buckets_ = num_buckets;
     data_ = allocfn_.allocate(num_buckets_);
+    owns_data_ = true;
     for (size_t i = 0; i < num_buckets_; ++i) {
       allocfn_.construct(allocfn_.address(data_[i]));
       emptyfn_.MakeEmpty(data_[i]);
@@ -394,10 +449,13 @@
 
   void DeallocateStorage() {
     if (num_buckets_ != 0) {
-      for (size_t i = 0; i < NumBuckets(); ++i) {
-        allocfn_.destroy(allocfn_.address(data_[i]));
+      if (owns_data_) {
+        for (size_t i = 0; i < NumBuckets(); ++i) {
+          allocfn_.destroy(allocfn_.address(data_[i]));
+        }
+        allocfn_.deallocate(data_, NumBuckets());
+        owns_data_ = false;
       }
-      allocfn_.deallocate(data_, NumBuckets());
       data_ = nullptr;
       num_buckets_ = 0;
     }
@@ -418,18 +476,23 @@
   // Expand / shrink the table to the new specified size.
   void Resize(size_t new_size) {
     DCHECK_GE(new_size, Size());
-    T* old_data = data_;
+    T* const old_data = data_;
     size_t old_num_buckets = num_buckets_;
     // Reinsert all of the old elements.
+    const bool owned_data = owns_data_;
     AllocateStorage(new_size);
     for (size_t i = 0; i < old_num_buckets; ++i) {
       T& element = old_data[i];
       if (!emptyfn_.IsEmpty(element)) {
         data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element);
       }
-      allocfn_.destroy(allocfn_.address(element));
+      if (owned_data) {
+        allocfn_.destroy(allocfn_.address(element));
+      }
     }
-    allocfn_.deallocate(old_data, old_num_buckets);
+    if (owned_data) {
+      allocfn_.deallocate(old_data, old_num_buckets);
+    }
   }
 
   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
@@ -439,6 +502,24 @@
     return index;
   }
 
+  // Return new offset.
+  template <typename Elem>
+  static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) {
+    DCHECK_ALIGNED(ptr + offset, sizeof(n));
+    if (ptr != nullptr) {
+      *reinterpret_cast<Elem*>(ptr + offset) = n;
+    }
+    return offset + sizeof(n);
+  }
+
+  template <typename Elem>
+  static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) {
+    DCHECK(ptr != nullptr);
+    DCHECK_ALIGNED(ptr + offset, sizeof(*out));
+    *out = *reinterpret_cast<const Elem*>(ptr + offset);
+    return offset + sizeof(*out);
+  }
+
   Alloc allocfn_;  // Allocator function.
   HashFn hashfn_;  // Hashing function.
   EmptyFn emptyfn_;  // IsEmpty/SetEmpty function.
@@ -446,6 +527,7 @@
   size_t num_elements_;  // Number of inserted elements.
   size_t num_buckets_;  // Number of hash table buckets.
   size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
+  bool owns_data_;  // If we own data_ and are responsible for freeing it.
   T* data_;  // Backing storage.
   double min_load_factor_;
   double max_load_factor_;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index f2be85e..0ab148e 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -94,7 +94,6 @@
   kMonitorListLock,
   kJniLoadLibraryLock,
   kThreadListLock,
-  kBreakpointInvokeLock,
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 429fa5b..dc8a3d1 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1055,7 +1055,7 @@
 static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(obj != nullptr);
-  CHECK(obj->GetClass() != nullptr) << "Null class " << obj;
+  CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj;
   CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
   if (obj->IsClass()) {
     auto klass = obj->AsClass();
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 3c75daf..5918c10 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -33,6 +33,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
+#include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -1658,6 +1659,51 @@
   return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor());
 }
 
+static JValue GetArtFieldValue(ArtField* f, mirror::Object* o)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Primitive::Type fieldType = f->GetTypeAsPrimitiveType();
+  JValue field_value;
+  switch (fieldType) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(f->GetBoolean(o));
+      return field_value;
+
+    case Primitive::kPrimByte:
+      field_value.SetB(f->GetByte(o));
+      return field_value;
+
+    case Primitive::kPrimChar:
+      field_value.SetC(f->GetChar(o));
+      return field_value;
+
+    case Primitive::kPrimShort:
+      field_value.SetS(f->GetShort(o));
+      return field_value;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      // Int and Float must be treated as 32-bit values in JDWP.
+      field_value.SetI(f->GetInt(o));
+      return field_value;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      // Long and Double must be treated as 64-bit values in JDWP.
+      field_value.SetJ(f->GetLong(o));
+      return field_value;
+
+    case Primitive::kPrimNot:
+      field_value.SetL(f->GetObject(o));
+      return field_value;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Attempt to read from field of type 'void'";
+      UNREACHABLE();
+  }
+  LOG(FATAL) << "Attempt to read from field of unknown type";
+  UNREACHABLE();
+}
+
 static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::ObjectId object_id,
                                          JDWP::FieldId field_id, JDWP::ExpandBuf* pReply,
                                          bool is_static)
@@ -1692,27 +1738,17 @@
     }
   } else {
     if (f->IsStatic()) {
-      LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field "
-          << PrettyField(f);
+      LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.GetValues"
+                   << " on static field " << PrettyField(f);
     }
   }
   if (f->IsStatic()) {
     o = f->GetDeclaringClass();
   }
 
+  JValue field_value(GetArtFieldValue(f, o));
   JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
-  JValue field_value;
-  if (tag == JDWP::JT_VOID) {
-    LOG(FATAL) << "Unknown tag: " << tag;
-  } else if (!IsPrimitiveTag(tag)) {
-    field_value.SetL(f->GetObject(o));
-  } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
-    field_value.SetJ(f->Get64(o));
-  } else {
-    field_value.SetI(f->Get32(o));
-  }
   Dbg::OutputJValue(tag, &field_value, pReply);
-
   return JDWP::ERR_NONE;
 }
 
@@ -1726,6 +1762,76 @@
   return GetFieldValueImpl(ref_type_id, 0, field_id, pReply, true);
 }
 
+static JDWP::JdwpError SetArtFieldValue(ArtField* f, mirror::Object* o, uint64_t value, int width)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Primitive::Type fieldType = f->GetTypeAsPrimitiveType();
+  // Debugging only happens at runtime so we know we are not running in a transaction.
+  static constexpr bool kNoTransactionMode = false;
+  switch (fieldType) {
+    case Primitive::kPrimBoolean:
+      CHECK_EQ(width, 1);
+      f->SetBoolean<kNoTransactionMode>(o, static_cast<uint8_t>(value));
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimByte:
+      CHECK_EQ(width, 1);
+      f->SetByte<kNoTransactionMode>(o, static_cast<uint8_t>(value));
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimChar:
+      CHECK_EQ(width, 2);
+      f->SetChar<kNoTransactionMode>(o, static_cast<uint16_t>(value));
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimShort:
+      CHECK_EQ(width, 2);
+      f->SetShort<kNoTransactionMode>(o, static_cast<int16_t>(value));
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      CHECK_EQ(width, 4);
+      // Int and Float must be treated as 32-bit values in JDWP.
+      f->SetInt<kNoTransactionMode>(o, static_cast<int32_t>(value));
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      CHECK_EQ(width, 8);
+      // Long and Double must be treated as 64-bit values in JDWP.
+      f->SetLong<kNoTransactionMode>(o, value);
+      return JDWP::ERR_NONE;
+
+    case Primitive::kPrimNot: {
+      JDWP::JdwpError error;
+      mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error);
+      if (error != JDWP::ERR_NONE) {
+        return JDWP::ERR_INVALID_OBJECT;
+      }
+      if (v != nullptr) {
+        mirror::Class* field_type;
+        {
+          StackHandleScope<2> hs(Thread::Current());
+          HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
+          HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
+          field_type = f->GetType<true>();
+        }
+        if (!field_type->IsAssignableFrom(v->GetClass())) {
+          return JDWP::ERR_INVALID_OBJECT;
+        }
+      }
+      f->SetObject<kNoTransactionMode>(o, v);
+      return JDWP::ERR_NONE;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Attempt to write to field of type 'void'";
+      UNREACHABLE();
+  }
+  LOG(FATAL) << "Attempt to write to field of unknown type";
+  UNREACHABLE();
+}
+
 static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId object_id, JDWP::FieldId field_id,
                                          uint64_t value, int width, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1744,47 +1850,14 @@
     }
   } else {
     if (f->IsStatic()) {
-      LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field " << PrettyField(f);
+      LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues"
+                   << " on static field " << PrettyField(f);
     }
   }
   if (f->IsStatic()) {
     o = f->GetDeclaringClass();
   }
-
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
-
-  if (IsPrimitiveTag(tag)) {
-    if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
-      CHECK_EQ(width, 8);
-      // Debugging can't use transactional mode (runtime only).
-      f->Set64<false>(o, value);
-    } else {
-      CHECK_LE(width, 4);
-      // Debugging can't use transactional mode (runtime only).
-      f->Set32<false>(o, value);
-    }
-  } else {
-    mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error);
-    if (error != JDWP::ERR_NONE) {
-      return JDWP::ERR_INVALID_OBJECT;
-    }
-    if (v != nullptr) {
-      mirror::Class* field_type;
-      {
-        StackHandleScope<2> hs(Thread::Current());
-        HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
-        HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
-        field_type = f->GetType<true>();
-      }
-      if (!field_type->IsAssignableFrom(v->GetClass())) {
-        return JDWP::ERR_INVALID_OBJECT;
-      }
-    }
-    // Debugging can't use transactional mode (runtime only).
-    f->SetObject<false>(o, v);
-  }
-
-  return JDWP::ERR_NONE;
+  return SetArtFieldValue(f, o, value, width);
 }
 
 JDWP::JdwpError Dbg::SetFieldValue(JDWP::ObjectId object_id, JDWP::FieldId field_id, uint64_t value,
@@ -3660,17 +3733,16 @@
   }
 }
 
-JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id,
-                                  JDWP::RefTypeId class_id, JDWP::MethodId method_id,
-                                  uint32_t arg_count, uint64_t* arg_values,
-                                  JDWP::JdwpTag* arg_types, uint32_t options,
-                                  JDWP::JdwpTag* pResultTag, uint64_t* pResultValue,
-                                  JDWP::ObjectId* pExceptionId) {
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
+JDWP::JdwpError Dbg::PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id,
+                                         JDWP::ObjectId object_id, JDWP::RefTypeId class_id,
+                                         JDWP::MethodId method_id, uint32_t arg_count,
+                                         uint64_t arg_values[], JDWP::JdwpTag* arg_types,
+                                         uint32_t options) {
+  Thread* const self = Thread::Current();
+  CHECK_EQ(self, GetDebugThread()) << "This must be called by the JDWP thread";
 
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
   Thread* targetThread = nullptr;
-  std::unique_ptr<DebugInvokeReq> req;
-  Thread* self = Thread::Current();
   {
     ScopedObjectAccessUnchecked soa(self);
     JDWP::JdwpError error;
@@ -3780,99 +3852,82 @@
     }
 
     // Allocates a DebugInvokeReq.
-    req.reset(new (std::nothrow) DebugInvokeReq(receiver, c, m, options, arg_values, arg_count));
-    if (req.get() == nullptr) {
+    DebugInvokeReq* req = new (std::nothrow) DebugInvokeReq(request_id, thread_id, receiver, c, m,
+                                                            options, arg_values, arg_count);
+    if (req == nullptr) {
       LOG(ERROR) << "Failed to allocate DebugInvokeReq";
       return JDWP::ERR_OUT_OF_MEMORY;
     }
 
-    // Attach the DebugInvokeReq to the target thread so it executes the method when
-    // it is resumed. Once the invocation completes, it will detach it and signal us
-    // before suspending itself.
-    targetThread->SetDebugInvokeReq(req.get());
+    // Attaches the DebugInvokeReq to the target thread so it executes the method when
+    // it is resumed. Once the invocation completes, the target thread will delete it before
+    // suspending itself (see ThreadList::SuspendSelfForDebugger).
+    targetThread->SetDebugInvokeReq(req);
   }
 
   // The fact that we've released the thread list lock is a bit risky --- if the thread goes
-  // away we're sitting high and dry -- but we must release this before the ResumeAllThreads
-  // call, and it's unwise to hold it during WaitForSuspend.
+  // away we're sitting high and dry -- but we must release this before the UndoDebuggerSuspensions
+  // call.
 
-  {
-    /*
-     * We change our (JDWP thread) status, which should be THREAD_RUNNING,
-     * so we can suspend for a GC if the invoke request causes us to
-     * run out of memory.  It's also a good idea to change it before locking
-     * the invokeReq mutex, although that should never be held for long.
-     */
-    self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend);
-
-    VLOG(jdwp) << "    Transferring control to event thread";
-    {
-      MutexLock mu(self, req->lock);
-
-      if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
-        VLOG(jdwp) << "      Resuming all threads";
-        thread_list->UndoDebuggerSuspensions();
-      } else {
-        VLOG(jdwp) << "      Resuming event thread only";
-        thread_list->Resume(targetThread, true);
-      }
-
-      // The target thread is resumed but needs the JDWP token we're holding.
-      // We release it now and will acquire it again when the invocation is
-      // complete and the target thread suspends itself.
-      gJdwpState->ReleaseJdwpTokenForCommand();
-
-      // Wait for the request to finish executing.
-      while (targetThread->GetInvokeReq() != nullptr) {
-        req->cond.Wait(self);
-      }
-    }
-    VLOG(jdwp) << "    Control has returned from event thread";
-
-    /* wait for thread to re-suspend itself */
-    SuspendThread(thread_id, false /* request_suspension */);
-
-    // Now the thread is suspended again, we can re-acquire the JDWP token.
-    gJdwpState->AcquireJdwpTokenForCommand();
-
-    self->TransitionFromSuspendedToRunnable();
-  }
-
-  /*
-   * Suspend the threads.  We waited for the target thread to suspend
-   * itself, so all we need to do is suspend the others.
-   *
-   * The SuspendAllForDebugger() call will double-suspend the event thread,
-   * so we want to resume the target thread once to keep the books straight.
-   */
   if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
-    self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
-    VLOG(jdwp) << "      Suspending all threads";
-    thread_list->SuspendAllForDebugger();
-    self->TransitionFromSuspendedToRunnable();
-    VLOG(jdwp) << "      Resuming event thread to balance the count";
+    VLOG(jdwp) << "      Resuming all threads";
+    thread_list->UndoDebuggerSuspensions();
+  } else {
+    VLOG(jdwp) << "      Resuming event thread only";
     thread_list->Resume(targetThread, true);
   }
 
-  // Copy the result.
-  *pResultTag = req->result_tag;
-  *pResultValue = req->result_value;
-  *pExceptionId = req->exception;
-  return req->error;
+  return JDWP::ERR_NONE;
 }
 
 void Dbg::ExecuteMethod(DebugInvokeReq* pReq) {
-  ScopedObjectAccess soa(Thread::Current());
+  Thread* const self = Thread::Current();
+  CHECK_NE(self, GetDebugThread()) << "This must be called by the event thread";
+
+  ScopedObjectAccess soa(self);
 
   // We can be called while an exception is pending. We need
   // to preserve that across the method invocation.
-  StackHandleScope<3> hs(soa.Self());
-  auto old_exception = hs.NewHandle<mirror::Throwable>(soa.Self()->GetException());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::Throwable> old_exception = hs.NewHandle(soa.Self()->GetException());
   soa.Self()->ClearException();
 
+  // Execute the method then sends reply to the debugger.
+  ExecuteMethodWithoutPendingException(soa, pReq);
+
+  // If an exception was pending before the invoke, restore it now.
+  if (old_exception.Get() != nullptr) {
+    soa.Self()->SetException(old_exception.Get());
+  }
+}
+
+// Helper function: write a variable-width value into the output input buffer.
+static void WriteValue(JDWP::ExpandBuf* pReply, int width, uint64_t value) {
+  switch (width) {
+    case 1:
+      expandBufAdd1(pReply, value);
+      break;
+    case 2:
+      expandBufAdd2BE(pReply, value);
+      break;
+    case 4:
+      expandBufAdd4BE(pReply, value);
+      break;
+    case 8:
+      expandBufAdd8BE(pReply, value);
+      break;
+    default:
+      LOG(FATAL) << width;
+      UNREACHABLE();
+  }
+}
+
+void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq) {
+  soa.Self()->AssertNoPendingException();
+
   // Translate the method through the vtable, unless the debugger wants to suppress it.
-  auto* m = pReq->method;
-  auto image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  ArtMethod* m = pReq->method;
+  size_t image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver.Read() != nullptr) {
     ArtMethod* actual_method =
         pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size);
@@ -3889,39 +3944,133 @@
 
   CHECK_EQ(sizeof(jvalue), sizeof(uint64_t));
 
+  // Invoke the method.
   ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read()));
   JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m),
-                                    reinterpret_cast<jvalue*>(pReq->arg_values));
+                                    reinterpret_cast<jvalue*>(pReq->arg_values.get()));
 
-  pReq->result_tag = BasicTagFromDescriptor(m->GetShorty());
-  const bool is_object_result = (pReq->result_tag == JDWP::JT_OBJECT);
+  // Prepare JDWP ids for the reply.
+  JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty());
+  const bool is_object_result = (result_tag == JDWP::JT_OBJECT);
+  StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr);
   Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException());
   soa.Self()->ClearException();
-  pReq->exception = gRegistry->Add(exception);
-  if (pReq->exception != 0) {
+
+  if (!IsDebuggerActive()) {
+    // The debugger detached: we must not re-suspend threads. We also don't need to fill the reply
+    // because it won't be sent either.
+    return;
+  }
+
+  JDWP::ObjectId exceptionObjectId = gRegistry->Add(exception);
+  uint64_t result_value = 0;
+  if (exceptionObjectId != 0) {
     VLOG(jdwp) << "  JDWP invocation returning with exception=" << exception.Get()
                << " " << exception->Dump();
-    pReq->result_value = 0;
+    result_value = 0;
   } else if (is_object_result) {
-    /* if no exception thrown, examine object result more closely */
+    /* if no exception was thrown, examine object result more closely */
     JDWP::JdwpTag new_tag = TagFromObject(soa, object_result.Get());
-    if (new_tag != pReq->result_tag) {
-      VLOG(jdwp) << "  JDWP promoted result from " << pReq->result_tag << " to " << new_tag;
-      pReq->result_tag = new_tag;
+    if (new_tag != result_tag) {
+      VLOG(jdwp) << "  JDWP promoted result from " << result_tag << " to " << new_tag;
+      result_tag = new_tag;
     }
 
     // Register the object in the registry and reference its ObjectId. This ensures
     // GC safety and prevents from accessing stale reference if the object is moved.
-    pReq->result_value = gRegistry->Add(object_result.Get());
+    result_value = gRegistry->Add(object_result.Get());
   } else {
     // Primitive result.
-    DCHECK(IsPrimitiveTag(pReq->result_tag));
-    pReq->result_value = result.GetJ();
+    DCHECK(IsPrimitiveTag(result_tag));
+    result_value = result.GetJ();
+  }
+  const bool is_constructor = m->IsConstructor() && !m->IsStatic();
+  if (is_constructor) {
+    // If we invoked a constructor (which actually returns void), return the receiver,
+    // unless we threw, in which case we return null.
+    result_tag = JDWP::JT_OBJECT;
+    if (exceptionObjectId == 0) {
+      // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
+      // object registry.
+      result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+    } else {
+      result_value = 0;
+    }
   }
 
-  if (old_exception.Get() != nullptr) {
-    soa.Self()->SetException(old_exception.Get());
+  // Suspend other threads if the invoke is not single-threaded.
+  if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
+    soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
+    VLOG(jdwp) << "      Suspending all threads";
+    Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
+    soa.Self()->TransitionFromSuspendedToRunnable();
+  }
+
+  VLOG(jdwp) << "  --> returned " << result_tag
+             << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", result_value,
+                             exceptionObjectId);
+
+  // Show detailed debug output.
+  if (result_tag == JDWP::JT_STRING && exceptionObjectId == 0) {
+    if (result_value != 0) {
+      if (VLOG_IS_ON(jdwp)) {
+        std::string result_string;
+        JDWP::JdwpError error = Dbg::StringToUtf8(result_value, &result_string);
+        CHECK_EQ(error, JDWP::ERR_NONE);
+        VLOG(jdwp) << "      string '" << result_string << "'";
+      }
+    } else {
+      VLOG(jdwp) << "      string (null)";
+    }
+  }
+
+  // Attach the reply to DebugInvokeReq so it can be sent to the debugger when the event thread
+  // is ready to suspend.
+  BuildInvokeReply(pReq->reply, pReq->request_id, result_tag, result_value, exceptionObjectId);
+}
+
+void Dbg::BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id, JDWP::JdwpTag result_tag,
+                           uint64_t result_value, JDWP::ObjectId exception) {
+  // Make room for the JDWP header since we do not know the size of the reply yet.
+  JDWP::expandBufAddSpace(pReply, kJDWPHeaderLen);
+
+  size_t width = GetTagWidth(result_tag);
+  JDWP::expandBufAdd1(pReply, result_tag);
+  if (width != 0) {
+    WriteValue(pReply, width, result_value);
+  }
+  JDWP::expandBufAdd1(pReply, JDWP::JT_OBJECT);
+  JDWP::expandBufAddObjectId(pReply, exception);
+
+  // Now we know the size, we can complete the JDWP header.
+  uint8_t* buf = expandBufGetBuffer(pReply);
+  JDWP::Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReply));
+  JDWP::Set4BE(buf + kJDWPHeaderIdOffset, request_id);
+  JDWP::Set1(buf + kJDWPHeaderFlagsOffset, kJDWPFlagReply);  // flags
+  JDWP::Set2BE(buf + kJDWPHeaderErrorCodeOffset, JDWP::ERR_NONE);
+}
+
+void Dbg::FinishInvokeMethod(DebugInvokeReq* pReq) {
+  CHECK_NE(Thread::Current(), GetDebugThread()) << "This must be called by the event thread";
+
+  JDWP::ExpandBuf* const pReply = pReq->reply;
+  CHECK(pReply != nullptr) << "No reply attached to DebugInvokeReq";
+
+  // We need to prevent other threads (including JDWP thread) from interacting with the debugger
+  // while we send the reply but are not yet suspended. The JDWP token will be released just before
+  // we suspend ourself again (see ThreadList::SuspendSelfForDebugger).
+  gJdwpState->AcquireJdwpTokenForEvent(pReq->thread_id);
+
+  // Send the reply unless the debugger detached before the completion of the method.
+  if (IsDebuggerActive()) {
+    const size_t replyDataLength = expandBufGetLength(pReply) - kJDWPHeaderLen;
+    VLOG(jdwp) << StringPrintf("REPLY INVOKE id=0x%06x (length=%zu)",
+                               pReq->request_id, replyDataLength);
+
+    gJdwpState->SendRequest(pReply);
+  } else {
+    VLOG(jdwp) << "Not sending invoke reply because debugger detached";
   }
 }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 3b92d36..fd7d46c 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -42,6 +42,7 @@
 class ArtField;
 class ArtMethod;
 class ObjectRegistry;
+class ScopedObjectAccess;
 class ScopedObjectAccessUnchecked;
 class StackVisitor;
 class Thread;
@@ -50,33 +51,32 @@
  * Invoke-during-breakpoint support.
  */
 struct DebugInvokeReq {
-  DebugInvokeReq(mirror::Object* invoke_receiver, mirror::Class* invoke_class,
+  DebugInvokeReq(uint32_t invoke_request_id, JDWP::ObjectId invoke_thread_id,
+                 mirror::Object* invoke_receiver, mirror::Class* invoke_class,
                  ArtMethod* invoke_method, uint32_t invoke_options,
-                 uint64_t* args, uint32_t args_count)
-      : receiver(invoke_receiver), klass(invoke_class), method(invoke_method),
-        arg_count(args_count), arg_values(args), options(invoke_options),
-        error(JDWP::ERR_NONE), result_tag(JDWP::JT_VOID), result_value(0), exception(0),
-        lock("a DebugInvokeReq lock", kBreakpointInvokeLock),
-        cond("a DebugInvokeReq condition variable", lock) {
+                 uint64_t args[], uint32_t args_count)
+      : request_id(invoke_request_id), thread_id(invoke_thread_id), receiver(invoke_receiver),
+        klass(invoke_class), method(invoke_method), arg_count(args_count), arg_values(args),
+        options(invoke_options), reply(JDWP::expandBufAlloc()) {
   }
 
-  /* request */
-  GcRoot<mirror::Object> receiver;      // not used for ClassType.InvokeMethod
+  ~DebugInvokeReq() {
+    JDWP::expandBufFree(reply);
+  }
+
+  // Request
+  const uint32_t request_id;
+  const JDWP::ObjectId thread_id;
+  GcRoot<mirror::Object> receiver;      // not used for ClassType.InvokeMethod.
   GcRoot<mirror::Class> klass;
-  ArtMethod* method;
+  ArtMethod* const method;
   const uint32_t arg_count;
-  uint64_t* const arg_values;   // will be null if arg_count_ == 0
+  std::unique_ptr<uint64_t[]> arg_values;   // will be null if arg_count_ == 0. We take ownership
+                                            // of this array so we must delete it upon destruction.
   const uint32_t options;
 
-  /* result */
-  JDWP::JdwpError error;
-  JDWP::JdwpTag result_tag;
-  uint64_t result_value;        // either a primitive value or an ObjectId
-  JDWP::ObjectId exception;
-
-  /* condition variable to wait on while the method executes */
-  Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER;
-  ConditionVariable cond GUARDED_BY(lock);
+  // Reply
+  JDWP::ExpandBuf* const reply;
 
   void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -605,19 +605,39 @@
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Invoke support for commands ClassType.InvokeMethod, ClassType.NewInstance and
-  // ObjectReference.InvokeMethod.
-  static JDWP::JdwpError InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id,
-                                      JDWP::RefTypeId class_id, JDWP::MethodId method_id,
-                                      uint32_t arg_count, uint64_t* arg_values,
-                                      JDWP::JdwpTag* arg_types, uint32_t options,
-                                      JDWP::JdwpTag* pResultTag, uint64_t* pResultValue,
-                                      JDWP::ObjectId* pExceptObj)
+  /*
+   * Invoke support
+   */
+
+  // Called by the JDWP thread to prepare invocation in the event thread (suspended on an event).
+  // If the information sent by the debugger is incorrect, it will send a reply with the
+  // appropriate error code. Otherwise, it will attach a DebugInvokeReq object to the event thread
+  // and resume it (and possibly other threads depending on the invoke options).
+  // Unlike other commands, the JDWP thread will not send the reply to the debugger (see
+  // JdwpState::ProcessRequest). The reply will be sent by the event thread itself after method
+  // invocation completes (see FinishInvokeMethod). This is required to allow the JDWP thread to
+  // process incoming commands from the debugger while the invocation is still in progress in the
+  // event thread, especially if it gets suspended by a debug event occurring in another thread.
+  static JDWP::JdwpError PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id,
+                                             JDWP::ObjectId object_id, JDWP::RefTypeId class_id,
+                                             JDWP::MethodId method_id, uint32_t arg_count,
+                                             uint64_t arg_values[], JDWP::JdwpTag* arg_types,
+                                             uint32_t options)
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Called by the event thread to execute a method prepared by the JDWP thread in the given
+  // DebugInvokeReq object. Once the invocation completes, the event thread attaches a reply
+  // to that DebugInvokeReq object so it can be sent to the debugger only when the event thread
+  // is ready to suspend (see FinishInvokeMethod).
   static void ExecuteMethod(DebugInvokeReq* pReq);
 
+  // Called by the event thread to send the reply of the invoke (created in ExecuteMethod)
+  // before suspending itself. This is to ensure the thread is ready to suspend before the
+  // debugger receives the reply.
+  static void FinishInvokeMethod(DebugInvokeReq* pReq);
+
   /*
    * DDM support.
    */
@@ -694,6 +714,14 @@
   }
 
  private:
+  static void ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static void BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id,
+                               JDWP::JdwpTag result_tag, uint64_t result_value,
+                               JDWP::ObjectId exception)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor,
                                        ScopedObjectAccessUnchecked& soa, int slot,
                                        JDWP::JdwpTag tag, uint8_t* buf, size_t width)
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index d017601..7ac264a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -264,13 +264,18 @@
 
   // Raw code_item.
   struct CodeItem {
-    uint16_t registers_size_;
-    uint16_t ins_size_;
-    uint16_t outs_size_;
-    uint16_t tries_size_;
-    uint32_t debug_info_off_;  // file offset to debug info stream
+    uint16_t registers_size_;            // the number of registers used by this code
+                                         //   (locals + parameters)
+    uint16_t ins_size_;                  // the number of words of incoming arguments to the method
+                                         //   that this code is for
+    uint16_t outs_size_;                 // the number of words of outgoing argument space required
+                                         //   by this code for method invocation
+    uint16_t tries_size_;                // the number of try_items for this instance. If non-zero,
+                                         //   then these appear as the tries array just after the
+                                         //   insns in this instance.
+    uint32_t debug_info_off_;            // file offset to debug info stream
     uint32_t insns_size_in_code_units_;  // size of the insns array, in 2 byte code units
-    uint16_t insns_[1];
+    uint16_t insns_[1];                  // actual array of bytecode.
 
    private:
     DISALLOW_COPY_AND_ASSIGN(CodeItem);
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index c16f5d3..006d2c7 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -159,6 +159,7 @@
 inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) {
   uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
   DCHECK_GE(addr, heap_begin_);
+  DCHECK(HasAddress(obj)) << obj;
   const uintptr_t offset = addr - heap_begin_;
   const size_t index = OffsetToIndex(offset);
   const uintptr_t mask = OffsetToMask(offset);
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index fe2b284..6546eb4 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -35,6 +35,11 @@
 }
 
 template<size_t kAlignment>
+size_t SpaceBitmap<kAlignment>::ComputeHeapSize(uint64_t bitmap_bytes) {
+  return bitmap_bytes * kBitsPerByte * kAlignment;
+}
+
+template<size_t kAlignment>
 SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap(
     const std::string& name, MemMap* mem_map, uint8_t* heap_begin, size_t heap_capacity) {
   CHECK(mem_map != nullptr);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index d6b3ed4..35faff3 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -188,15 +188,16 @@
 
   std::string Dump() const;
 
+  // Helper function for computing bitmap size based on a 64 bit capacity.
+  static size_t ComputeBitmapSize(uint64_t capacity);
+  static size_t ComputeHeapSize(uint64_t bitmap_bytes);
+
  private:
   // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
   // however, we document that this is expected on heap_end_
   SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size,
               const void* heap_begin);
 
-  // Helper function for computing bitmap size based on a 64 bit capacity.
-  static size_t ComputeBitmapSize(uint64_t capacity);
-
   template<bool kSetBit>
   bool Modify(const mirror::Object* obj);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 14fa740..22207ee 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1001,6 +1001,27 @@
   BaseMutex::DumpAll(os);
 }
 
+void Heap::ResetGcPerformanceInfo() {
+  for (auto& collector : garbage_collectors_) {
+    collector->ResetMeasurements();
+  }
+  total_allocation_time_.StoreRelaxed(0);
+  total_bytes_freed_ever_ = 0;
+  total_objects_freed_ever_ = 0;
+  total_wait_time_ = 0;
+  blocking_gc_count_ = 0;
+  blocking_gc_time_ = 0;
+  gc_count_last_window_ = 0;
+  blocking_gc_count_last_window_ = 0;
+  last_update_time_gc_count_rate_histograms_ =  // Round down by the window duration.
+      (NanoTime() / kGcCountRateHistogramWindowDuration) * kGcCountRateHistogramWindowDuration;
+  {
+    MutexLock mu(Thread::Current(), *gc_complete_lock_);
+    gc_count_rate_histogram_.Reset();
+    blocking_gc_count_rate_histogram_.Reset();
+  }
+}
+
 uint64_t Heap::GetGcCount() const {
   uint64_t gc_count = 0U;
   for (auto& collector : garbage_collectors_) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 09bd370..18244c8 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -598,6 +598,7 @@
 
   // GC performance measuring
   void DumpGcPerformanceInfo(std::ostream& os);
+  void ResetGcPerformanceInfo();
 
   // Returns true if we currently care about pause times.
   bool CareAboutPauseTimes() const {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 437fd8c..f7ceb84 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -694,7 +694,7 @@
       const auto section_idx = static_cast<ImageHeader::ImageSections>(i);
       auto& section = image_header.GetImageSection(section_idx);
       LOG(INFO) << section_idx << " start="
-          << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset())
+          << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) << " "
           << section;
     }
   }
@@ -730,9 +730,9 @@
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename,
                                        bitmap_index));
   std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap(
-      accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(),
-                                                          reinterpret_cast<uint8_t*>(map->Begin()),
-                                                          map->Size()));
+      accounting::ContinuousSpaceBitmap::CreateFromMemMap(
+          bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()),
+          accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size())));
   if (bitmap.get() == nullptr) {
     *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
     return nullptr;
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 85eefb0..f32d5a1 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -75,7 +75,7 @@
 static constexpr size_t kMaxBytesPerSegment = 4096;
 
 // The static field-name for the synthetic object generated to account for class static overhead.
-static constexpr const char* kStaticOverheadName = "$staticOverhead";
+static constexpr const char* kClassOverheadName = "$classOverhead";
 
 enum HprofTag {
   HPROF_TAG_STRING = 0x01,
@@ -1092,17 +1092,23 @@
     // Class is allocated but not yet loaded: we cannot access its fields or super class.
     return;
   }
-  size_t sFieldCount = klass->NumStaticFields();
-  if (sFieldCount != 0) {
-    int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
+  const size_t num_static_fields = klass->NumStaticFields();
+  // Total class size including embedded IMT, embedded vtable, and static fields.
+  const size_t class_size = klass->GetClassSize();
+  // Class size excluding static fields (relies on reference fields being the first static fields).
+  const size_t class_size_without_overhead = sizeof(mirror::Class);
+  CHECK_LE(class_size_without_overhead, class_size);
+  const size_t overhead_size = class_size - class_size_without_overhead;
+
+  if (overhead_size != 0) {
     // Create a byte array to reflect the allocation of the
     // StaticField array at the end of this class.
     __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
     __ AddClassStaticsId(klass);
     __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass));
-    __ AddU4(byteLength);
+    __ AddU4(overhead_size);
     __ AddU1(hprof_basic_byte);
-    for (int i = 0; i < byteLength; ++i) {
+    for (size_t i = 0; i < overhead_size; ++i) {
       __ AddU1(0);
     }
   }
@@ -1119,7 +1125,7 @@
   if (klass->IsClassClass()) {
     // ClassObjects have their static fields appended, so aren't all the same size.
     // But they're at least this size.
-    __ AddU4(sizeof(mirror::Class));  // instance size
+    __ AddU4(class_size_without_overhead);  // instance size
   } else if (klass->IsStringClass()) {
     // Strings are variable length with character data at the end like arrays.
     // This outputs the size of an empty string.
@@ -1133,15 +1139,15 @@
   __ AddU2(0);  // empty const pool
 
   // Static fields
-  if (sFieldCount == 0) {
-    __ AddU2((uint16_t)0);
+  if (overhead_size == 0) {
+    __ AddU2(static_cast<uint16_t>(0));
   } else {
-    __ AddU2((uint16_t)(sFieldCount+1));
-    __ AddStringId(LookupStringId(kStaticOverheadName));
+    __ AddU2(static_cast<uint16_t>(num_static_fields + 1));
+    __ AddStringId(LookupStringId(kClassOverheadName));
     __ AddU1(hprof_basic_object);
     __ AddClassStaticsId(klass);
 
-    for (size_t i = 0; i < sFieldCount; ++i) {
+    for (size_t i = 0; i < num_static_fields; ++i) {
       ArtField* f = klass->GetStaticField(i);
 
       size_t size;
diff --git a/runtime/image.cc b/runtime/image.cc
index 947c914..44193da 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '6', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '7', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index c6be7ef..d856f21 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -142,6 +142,7 @@
     kSectionObjects,
     kSectionArtFields,
     kSectionArtMethods,
+    kSectionInternedStrings,
     kSectionImageBitmap,
     kSectionCount,  // Number of elements in enum.
   };
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 9abbca8..2a96278 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -152,20 +152,28 @@
   CHECK(image_space != nullptr);
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   if (!image_added_to_intern_table_) {
-    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-    mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
-    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-      mirror::DexCache* dex_cache = dex_caches->Get(i);
-      const DexFile* dex_file = dex_cache->GetDexFile();
-      const size_t num_strings = dex_file->NumStringIds();
-      for (size_t j = 0; j < num_strings; ++j) {
-        mirror::String* image_string = dex_cache->GetResolvedString(j);
-        if (image_string != nullptr) {
-          mirror::String* found = LookupStrong(image_string);
-          if (found == nullptr) {
-            InsertStrong(image_string);
-          } else {
-            DCHECK_EQ(found, image_string);
+    const ImageHeader* const header = &image_space->GetImageHeader();
+    // Check if we have the interned strings section.
+    const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings);
+    if (section.Size() > 0) {
+      ReadFromMemoryLocked(image_space->Begin() + section.Offset());
+    } else {
+      // TODO: Delete this logic?
+      mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches);
+      mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
+      for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+        mirror::DexCache* dex_cache = dex_caches->Get(i);
+        const DexFile* dex_file = dex_cache->GetDexFile();
+        const size_t num_strings = dex_file->NumStringIds();
+        for (size_t j = 0; j < num_strings; ++j) {
+          mirror::String* image_string = dex_cache->GetResolvedString(j);
+          if (image_string != nullptr) {
+            mirror::String* found = LookupStrong(image_string);
+            if (found == nullptr) {
+              InsertStrong(image_string);
+            } else {
+              DCHECK_EQ(found, image_string);
+            }
           }
         }
       }
@@ -285,6 +293,29 @@
   weak_interns_.SweepWeaks(callback, arg);
 }
 
+void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) {
+  const ImageSection& intern_section = image_space->GetImageHeader().GetImageSection(
+      ImageHeader::kSectionInternedStrings);
+  // Read the string tables from the image.
+  const uint8_t* ptr = image_space->Begin() + intern_section.Offset();
+  const size_t offset = ReadFromMemory(ptr);
+  CHECK_LE(offset, intern_section.Size());
+}
+
+size_t InternTable::ReadFromMemory(const uint8_t* ptr) {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return ReadFromMemoryLocked(ptr);
+}
+
+size_t InternTable::ReadFromMemoryLocked(const uint8_t* ptr) {
+  return strong_interns_.ReadIntoPreZygoteTable(ptr);
+}
+
+size_t InternTable::WriteToMemory(uint8_t* ptr) {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  return strong_interns_.WriteFromPostZygoteTable(ptr);
+}
+
 std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) const {
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
@@ -300,6 +331,17 @@
   return a.Read()->Equals(b.Read());
 }
 
+size_t InternTable::Table::ReadIntoPreZygoteTable(const uint8_t* ptr) {
+  CHECK_EQ(pre_zygote_table_.Size(), 0u);
+  size_t read_count = 0;
+  pre_zygote_table_ = UnorderedSet(ptr, false /* make copy */, &read_count);
+  return read_count;
+}
+
+size_t InternTable::Table::WriteFromPostZygoteTable(uint8_t* ptr) {
+  return post_zygote_table_.WriteToMemory(ptr);
+}
+
 void InternTable::Table::Remove(mirror::String* s) {
   auto it = post_zygote_table_.Find(GcRoot<mirror::String>(s));
   if (it != post_zygote_table_.end()) {
@@ -325,9 +367,13 @@
 }
 
 void InternTable::Table::SwapPostZygoteWithPreZygote() {
-  CHECK(pre_zygote_table_.Empty());
-  std::swap(pre_zygote_table_, post_zygote_table_);
-  VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table";
+  if (pre_zygote_table_.Empty()) {
+    std::swap(pre_zygote_table_, post_zygote_table_);
+    VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table";
+  } else {
+    // This case happens if read the intern table from the image.
+    VLOG(heap) << "Not swapping due to non-empty pre_zygote_table_";
+  }
 }
 
 void InternTable::Table::Insert(mirror::String* s) {
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 1e5d3c2..97ce73c 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -97,6 +97,20 @@
   void SwapPostZygoteWithPreZygote()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_);
 
+  // Add an intern table which was serialized to the image.
+  void AddImageInternTable(gc::space::ImageSpace* image_space)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_);
+
+  // Read the intern table from memory. The elements aren't copied, the intern hash set data will
+  // point to somewhere within ptr. Only reads the strong interns.
+  size_t ReadFromMemory(const uint8_t* ptr) LOCKS_EXCLUDED(Locks::intern_table_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Write the post zygote intern table to a pointer. Only writes the strong interns since it is
+  // expected that there is no weak interns since this is called from the image writer.
+  size_t WriteToMemory(uint8_t* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::intern_table_lock_);
+
  private:
   class StringHashEquals {
    public:
@@ -133,6 +147,16 @@
         EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
     void SwapPostZygoteWithPreZygote() EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
     size_t Size() const EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    // Read pre zygote table is called from ReadFromMemory which happens during runtime creation
+    // when we load the image intern table. Returns how many bytes were read.
+    size_t ReadIntoPreZygoteTable(const uint8_t* ptr)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+    // The image writer calls WritePostZygoteTable through WriteToMemory, it writes the interns in
+    // the post zygote table. Returns how many bytes were written.
+    size_t WriteFromPostZygoteTable(uint8_t* ptr)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
    private:
     typedef HashSet<GcRoot<mirror::String>, GcRootEmptyFn, StringHashEquals, StringHashEquals,
@@ -192,6 +216,10 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   friend class Transaction;
 
+  size_t ReadFromMemoryLocked(const uint8_t* ptr)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 86a79ce..0f6f788 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -450,10 +450,13 @@
 static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame,
                                   size_t dest_reg, size_t src_reg)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  // If both register locations contains the same value, the register probably holds a reference.
   // Uint required, so that sign extension does not make this wrong on 64b systems
   uint32_t src_value = shadow_frame.GetVReg(src_reg);
   mirror::Object* o = shadow_frame.GetVRegReference<kVerifyNone>(src_reg);
+
+  // If both register locations contains the same value, the register probably holds a reference.
+  // Note: As an optimization, non-moving collectors leave a stale reference value
+  // in the references array even after the original vreg was overwritten to a non-reference.
   if (src_value == reinterpret_cast<uintptr_t>(o)) {
     new_shadow_frame->SetVRegReference(dest_reg, o);
   } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index dd7aa40..fcf083c 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -56,7 +56,7 @@
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register) {
-  bool do_assignability_check = do_access_check;
+  constexpr bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
     LOG(FATAL) << "Invalid shadow frame for interpreter use";
     return JValue();
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index e18d10f..7c48985 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -297,7 +297,7 @@
 
  private:
   explicit JdwpState(const JdwpOptions* options);
-  size_t ProcessRequest(Request* request, ExpandBuf* pReply);
+  size_t ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply);
   bool InvokeInProgress();
   bool IsConnected();
   void SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId thread_self_id)
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 612af8b..14f097f 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -99,10 +99,6 @@
 don't allow anyone else to interfere with us.
 */
 
-
-#define kJdwpEventCommandSet    64
-#define kJdwpCompositeCommand   100
-
 namespace art {
 
 namespace JDWP {
@@ -612,13 +608,10 @@
      */
     DebugInvokeReq* const pReq = Dbg::GetInvokeReq();
     if (pReq == nullptr) {
-      /*LOGD("SuspendByPolicy: no invoke needed");*/
       break;
     }
 
-    /* grab this before posting/suspending again */
-    AcquireJdwpTokenForEvent(thread_self_id);
-
+    // Execute method.
     Dbg::ExecuteMethod(pReq);
   }
 }
@@ -749,11 +742,11 @@
 void JdwpState::EventFinish(ExpandBuf* pReq) {
   uint8_t* buf = expandBufGetBuffer(pReq);
 
-  Set4BE(buf, expandBufGetLength(pReq));
-  Set4BE(buf + 4, NextRequestSerial());
-  Set1(buf + 8, 0);     /* flags */
-  Set1(buf + 9, kJdwpEventCommandSet);
-  Set1(buf + 10, kJdwpCompositeCommand);
+  Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReq));
+  Set4BE(buf + kJDWPHeaderIdOffset, NextRequestSerial());
+  Set1(buf + kJDWPHeaderFlagsOffset, 0);     /* flags */
+  Set1(buf + kJDWPHeaderCmdSetOffset, kJDWPEventCmdSet);
+  Set1(buf + kJDWPHeaderCmdOffset, kJDWPEventCompositeCmd);
 
   SendRequest(pReq);
 
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index f7f70f6..d4e2656 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -52,17 +52,6 @@
   return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str());
 }
 
-// Helper function: write a variable-width value into the output input buffer.
-static void WriteValue(ExpandBuf* pReply, int width, uint64_t value) {
-  switch (width) {
-  case 1:     expandBufAdd1(pReply, value); break;
-  case 2:     expandBufAdd2BE(pReply, value); break;
-  case 4:     expandBufAdd4BE(pReply, value); break;
-  case 8:     expandBufAdd8BE(pReply, value); break;
-  default:    LOG(FATAL) << width; break;
-  }
-}
-
 static JdwpError WriteTaggedObject(ExpandBuf* reply, ObjectId object_id)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   uint8_t tag;
@@ -92,7 +81,7 @@
  * If "is_constructor" is set, this returns "object_id" rather than the
  * expected-to-be-void return value of the called function.
  */
-static JdwpError RequestInvoke(JdwpState*, Request* request, ExpandBuf* pReply,
+static JdwpError RequestInvoke(JdwpState*, Request* request,
                                ObjectId thread_id, ObjectId object_id,
                                RefTypeId class_id, MethodId method_id, bool is_constructor)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -122,49 +111,15 @@
                              (options & INVOKE_SINGLE_THREADED) ? " (SINGLE_THREADED)" : "",
                              (options & INVOKE_NONVIRTUAL) ? " (NONVIRTUAL)" : "");
 
-  JdwpTag resultTag;
-  uint64_t resultValue;
-  ObjectId exceptObjId;
-  JdwpError err = Dbg::InvokeMethod(thread_id, object_id, class_id, method_id, arg_count,
-                                    argValues.get(), argTypes.get(), options, &resultTag,
-                                    &resultValue, &exceptObjId);
-  if (err != ERR_NONE) {
-    return err;
+  JDWP::JdwpError error =  Dbg::PrepareInvokeMethod(request->GetId(), thread_id, object_id,
+                                                    class_id, method_id, arg_count,
+                                                    argValues.get(), argTypes.get(), options);
+  if (error == JDWP::ERR_NONE) {
+    // We successfully requested the invoke. The event thread now owns the arguments array in its
+    // DebugInvokeReq mailbox.
+    argValues.release();
   }
-
-  if (is_constructor) {
-    // If we invoked a constructor (which actually returns void), return the receiver,
-    // unless we threw, in which case we return null.
-    resultTag = JT_OBJECT;
-    resultValue = (exceptObjId == 0) ? object_id : 0;
-  }
-
-  size_t width = Dbg::GetTagWidth(resultTag);
-  expandBufAdd1(pReply, resultTag);
-  if (width != 0) {
-    WriteValue(pReply, width, resultValue);
-  }
-  expandBufAdd1(pReply, JT_OBJECT);
-  expandBufAddObjectId(pReply, exceptObjId);
-
-  VLOG(jdwp) << "  --> returned " << resultTag
-      << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId);
-
-  /* show detailed debug output */
-  if (resultTag == JT_STRING && exceptObjId == 0) {
-    if (resultValue != 0) {
-      if (VLOG_IS_ON(jdwp)) {
-        std::string result_string;
-        JDWP::JdwpError error = Dbg::StringToUtf8(resultValue, &result_string);
-        CHECK_EQ(error, JDWP::ERR_NONE);
-        VLOG(jdwp) << "      string '" << result_string << "'";
-      }
-    } else {
-      VLOG(jdwp) << "      string (null)";
-    }
-  }
-
-  return err;
+  return error;
 }
 
 static JdwpError VM_Version(JdwpState*, Request*, ExpandBuf* pReply)
@@ -684,13 +639,14 @@
  * Example: Eclipse sometimes uses java/lang/Class.forName(String s) on
  * values in the "variables" display.
  */
-static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply)
+static JdwpError CT_InvokeMethod(JdwpState* state, Request* request,
+                                 ExpandBuf* pReply ATTRIBUTE_UNUSED)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   RefTypeId class_id = request->ReadRefTypeId();
   ObjectId thread_id = request->ReadThreadId();
   MethodId method_id = request->ReadMethodId();
 
-  return RequestInvoke(state, request, pReply, thread_id, 0, class_id, method_id, false);
+  return RequestInvoke(state, request, thread_id, 0, class_id, method_id, false);
 }
 
 /*
@@ -700,7 +656,8 @@
  * Example: in IntelliJ, create a watch on "new String(myByteArray)" to
  * see the contents of a byte[] as a string.
  */
-static JdwpError CT_NewInstance(JdwpState* state, Request* request, ExpandBuf* pReply)
+static JdwpError CT_NewInstance(JdwpState* state, Request* request,
+                                ExpandBuf* pReply ATTRIBUTE_UNUSED)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   RefTypeId class_id = request->ReadRefTypeId();
   ObjectId thread_id = request->ReadThreadId();
@@ -711,7 +668,7 @@
   if (status != ERR_NONE) {
     return status;
   }
-  return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, true);
+  return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, true);
 }
 
 /*
@@ -863,14 +820,15 @@
  * object), it will try to invoke the object's toString() function.  This
  * feature becomes crucial when examining ArrayLists with Eclipse.
  */
-static JdwpError OR_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply)
+static JdwpError OR_InvokeMethod(JdwpState* state, Request* request,
+                                 ExpandBuf* pReply ATTRIBUTE_UNUSED)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectId object_id = request->ReadObjectId();
   ObjectId thread_id = request->ReadThreadId();
   RefTypeId class_id = request->ReadRefTypeId();
   MethodId method_id = request->ReadMethodId();
 
-  return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, false);
+  return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, false);
 }
 
 static JdwpError OR_DisableCollection(JdwpState*, Request* request, ExpandBuf*)
@@ -1602,13 +1560,27 @@
   return result;
 }
 
+// Returns true if the given command_set and command identify an "invoke" command.
+static bool IsInvokeCommand(uint8_t command_set, uint8_t command) {
+  if (command_set == kJDWPClassTypeCmdSet) {
+    return command == kJDWPClassTypeInvokeMethodCmd || command == kJDWPClassTypeNewInstanceCmd;
+  } else if (command_set == kJDWPObjectReferenceCmdSet) {
+    return command == kJDWPObjectReferenceInvokeCmd;
+  } else {
+    return false;
+  }
+}
+
 /*
- * Process a request from the debugger.
+ * Process a request from the debugger. The skip_reply flag is set to true to indicate to the
+ * caller the reply must not be sent to the debugger. This is used for invoke commands where the
+ * reply is sent by the event thread after completing the invoke.
  *
  * On entry, the JDWP thread is in VMWAIT.
  */
-size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply) {
+size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply) {
   JdwpError result = ERR_NONE;
+  *skip_reply = false;
 
   if (request->GetCommandSet() != kJDWPDdmCmdSet) {
     /*
@@ -1661,24 +1633,31 @@
     result = ERR_NOT_IMPLEMENTED;
   }
 
-  /*
-   * Set up the reply header.
-   *
-   * If we encountered an error, only send the header back.
-   */
-  uint8_t* replyBuf = expandBufGetBuffer(pReply);
-  size_t replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen;
-  Set4BE(replyBuf + 0, replyLength);
-  Set4BE(replyBuf + 4, request->GetId());
-  Set1(replyBuf + 8, kJDWPFlagReply);
-  Set2BE(replyBuf + 9, result);
+  size_t replyLength = 0U;
+  if (result == ERR_NONE && IsInvokeCommand(request->GetCommandSet(), request->GetCommand())) {
+    // We successfully request an invoke in the event thread. It will send the reply once the
+    // invoke completes so we must not send it now.
+    *skip_reply = true;
+  } else {
+    /*
+     * Set up the reply header.
+     *
+     * If we encountered an error, only send the header back.
+     */
+    uint8_t* replyBuf = expandBufGetBuffer(pReply);
+    replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen;
+    Set4BE(replyBuf + kJDWPHeaderSizeOffset, replyLength);
+    Set4BE(replyBuf + kJDWPHeaderIdOffset, request->GetId());
+    Set1(replyBuf + kJDWPHeaderFlagsOffset, kJDWPFlagReply);
+    Set2BE(replyBuf + kJDWPHeaderErrorCodeOffset, result);
 
-  CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId();
+    CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId();
 
-  size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen;
-  VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")";
-  if (false) {
-    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, "");
+    size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen;
+    VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")";
+    if (false) {
+      VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, "");
+    }
   }
 
   VLOG(jdwp) << "----------";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e6b97a2..6bc5e27 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -395,8 +395,15 @@
   JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_);
 
   ExpandBuf* pReply = expandBufAlloc();
-  size_t replyLength = ProcessRequest(&request, pReply);
-  ssize_t cc = netStateBase->WritePacket(pReply, replyLength);
+  bool skip_reply = false;
+  size_t replyLength = ProcessRequest(&request, pReply, &skip_reply);
+  ssize_t cc = 0;
+  if (!skip_reply) {
+    cc = netStateBase->WritePacket(pReply, replyLength);
+  } else {
+    DCHECK_EQ(replyLength, 0U);
+  }
+  expandBufFree(pReply);
 
   /*
    * We processed this request and sent its reply so we can release the JDWP token.
@@ -405,10 +412,8 @@
 
   if (cc != static_cast<ssize_t>(replyLength)) {
     PLOG(ERROR) << "Failed sending reply to debugger";
-    expandBufFree(pReply);
     return false;
   }
-  expandBufFree(pReply);
   netStateBase->ConsumeBytes(request.GetLength());
   {
     MutexLock mu(self, shutdown_lock_);
diff --git a/runtime/jdwp/jdwp_priv.h b/runtime/jdwp/jdwp_priv.h
index f290be0..d58467d 100644
--- a/runtime/jdwp/jdwp_priv.h
+++ b/runtime/jdwp/jdwp_priv.h
@@ -29,15 +29,32 @@
 /*
  * JDWP constants.
  */
-#define kJDWPHeaderLen  11
-#define kJDWPFlagReply  0x80
+static constexpr size_t kJDWPHeaderSizeOffset = 0U;
+static constexpr size_t kJDWPHeaderIdOffset = 4U;
+static constexpr size_t kJDWPHeaderFlagsOffset = 8U;
+static constexpr size_t kJDWPHeaderErrorCodeOffset = 9U;
+static constexpr size_t kJDWPHeaderCmdSetOffset = 9U;
+static constexpr size_t kJDWPHeaderCmdOffset = 10U;
+static constexpr size_t kJDWPHeaderLen = 11U;
+static constexpr uint8_t kJDWPFlagReply = 0x80;
 
-#define kMagicHandshake     "JDWP-Handshake"
-#define kMagicHandshakeLen  (sizeof(kMagicHandshake)-1)
+static constexpr const char kMagicHandshake[] = "JDWP-Handshake";
+static constexpr size_t kMagicHandshakeLen = sizeof(kMagicHandshake) - 1;
+
+/* Invoke commands */
+static constexpr uint8_t kJDWPClassTypeCmdSet = 3U;
+static constexpr uint8_t kJDWPClassTypeInvokeMethodCmd = 3U;
+static constexpr uint8_t kJDWPClassTypeNewInstanceCmd = 4U;
+static constexpr uint8_t kJDWPObjectReferenceCmdSet = 9U;
+static constexpr uint8_t kJDWPObjectReferenceInvokeCmd = 6U;
+
+/* Event command */
+static constexpr uint8_t kJDWPEventCmdSet = 64U;
+static constexpr uint8_t kJDWPEventCompositeCmd = 100U;
 
 /* DDM support */
-#define kJDWPDdmCmdSet  199     /* 0xc7, or 'G'+128 */
-#define kJDWPDdmCmd     1
+static constexpr uint8_t kJDWPDdmCmdSet = 199U;  // 0xc7, or 'G'+128
+static constexpr uint8_t kJDWPDdmCmd = 1U;
 
 namespace art {
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 189306d..fa041a9 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -439,6 +439,14 @@
                     page_aligned_byte_count, prot, false);
 }
 
+MemMap* MemMap::MapDummy(const char* name, uint8_t* addr, size_t byte_count) {
+  if (byte_count == 0) {
+    return new MemMap(name, nullptr, 0, nullptr, 0, 0, false);
+  }
+  const size_t page_aligned_byte_count = RoundUp(byte_count, kPageSize);
+  return new MemMap(name, addr, byte_count, addr, page_aligned_byte_count, 0, true /* reuse */);
+}
+
 MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int prot, int flags,
                                  int fd, off_t start, bool reuse, const char* filename,
                                  std::string* error_msg) {
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 6023a70..14387ee 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -64,6 +64,12 @@
   static MemMap* MapAnonymous(const char* ashmem_name, uint8_t* addr, size_t byte_count, int prot,
                               bool low_4gb, bool reuse, std::string* error_msg);
 
+  // Create placeholder for a region allocated by direct call to mmap.
+  // This is useful when we do not have control over the code calling mmap,
+  // but when we still want to keep track of it in the list.
+  // The region is not considered to be owned and will not be unmmaped.
+  static MemMap* MapDummy(const char* name, uint8_t* addr, size_t byte_count);
+
   // Map part of a file, taking care of non-page aligned offsets.  The
   // "start" offset is absolute, not relative.
   //
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 835b94a..8c9222f 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -666,7 +666,7 @@
 inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
   VisitInstanceFieldsReferences<kVisitClass>(klass, visitor);
   // Right after a class is allocated, but not yet loaded
-  // (kStatusNotReady, see ClassLinkder::LoadClass()), GC may find it
+  // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
   // and scan it. IsTemp() may call Class::GetAccessFlags() but may
   // fail in the DCHECK in Class::GetAccessFlags() because the class
   // status is kStatusNotReady. To avoid it, rely on IsResolved()
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 9f6cd11..8d9c08d 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -176,11 +176,13 @@
 }
 
 template <bool kIsInstrumented>
-inline String* String::AllocFromCharArray(Thread* self, int32_t array_length,
+inline String* String::AllocFromCharArray(Thread* self, int32_t count,
                                           Handle<CharArray> array, int32_t offset,
                                           gc::AllocatorType allocator_type) {
-  SetStringCountAndValueVisitorFromCharArray visitor(array_length, array, offset);
-  String* new_string = Alloc<kIsInstrumented>(self, array_length, allocator_type, visitor);
+  // It is a caller error to have a count less than the actual array's size.
+  DCHECK_GE(array->GetLength(), count);
+  SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset);
+  String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor);
   return new_string;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index a8f16d7..af06385 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -95,7 +95,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kIsInstrumented>
-  ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t array_length,
+  ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t count,
                                                   Handle<CharArray> array, int32_t offset,
                                                   gc::AllocatorType allocator_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 94024ef..67dcc9c 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -380,8 +380,8 @@
 static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis,
                                                       jboolean publicOnly) {
   ScopedFastNativeObjectAccess soa(env);
-  StackHandleScope<3> hs(soa.Self());
-  auto* klass = DecodeClass(soa, javaThis);
+  StackHandleScope<2> hs(soa.Self());
+  auto klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t num_methods = 0;
   for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
     auto modifiers = m.GetAccessFlags();
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 6fda790..55a2fbb 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -21,6 +21,9 @@
 #include <unistd.h>
 
 #include <cstdlib>
+#ifndef __APPLE__
+#include <link.h>  // for dl_iterate_phdr.
+#endif
 #include <sstream>
 
 // dlopen_ext support from bionic.
@@ -35,6 +38,7 @@
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "oat.h"
+#include "mem_map.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "os.h"
@@ -45,13 +49,13 @@
 namespace art {
 
 // Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
-static constexpr bool kUseDlopen = false;
+static constexpr bool kUseDlopen = true;
 
 // Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
-static constexpr bool kUseDlopenOnHost = false;
+static constexpr bool kUseDlopenOnHost = true;
 
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
@@ -210,6 +214,15 @@
 
 bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
                      const char* abs_dex_location, std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename);
+  UNUSED(requested_base);
+  UNUSED(abs_dex_location);
+  UNUSED(error_msg);
+  return false;
+#else
   std::unique_ptr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
   if (absolute_path == nullptr) {
     *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
@@ -217,7 +230,7 @@
   }
 #ifdef HAVE_ANDROID_OS
   android_dlextinfo extinfo;
-  extinfo.flags = ANDROID_DLEXT_FORCE_LOAD;
+  extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
   dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
 #else
   dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
@@ -264,7 +277,27 @@
     bss_end_ += sizeof(uint32_t);
   }
 
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      if (info->dlpi_name != nullptr && info->dlpi_name == context->so_name) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            auto vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr);
+            MemMap::MapDummy(info->dlpi_name, vaddr, info->dlpi_phdr[i].p_memsz);
+          }
+        }
+      }
+      return 0;
+    }
+    std::string so_name;
+  } context;
+  context.so_name = elf_filename;
+  dl_iterate_phdr(dl_iterate_context::callback, &context);
+
   return Setup(abs_dex_location, error_msg);
+#endif  // __APPLE__
 }
 
 bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index d341ee1..8d84c35 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -31,7 +31,7 @@
 template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
-  const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
+  constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
   if (with_read_barrier && kUseBakerReadBarrier) {
     // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero)
     // is used to create artificial data dependency from the is_gray
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index cb7f5f3..66ec7cc 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -646,6 +646,10 @@
 
   // Create the thread pools.
   heap_->CreateThreadPool();
+  // Reset the gc performance data at zygote fork so that the GCs
+  // before fork aren't attributed to an app.
+  heap_->ResetGcPerformanceInfo();
+
   if (jit_.get() == nullptr && jit_options_->UseJIT()) {
     // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
     CreateJit();
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 922334e..4a307d5 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -30,6 +30,8 @@
 // If a default value is omitted here, T{} is used as the default value, which is
 // almost-always the value of the type as if it was memset to all 0.
 //
+// Please keep the columns aligned if possible when adding new rows.
+//
 
 // Parse-able keys from the command line.
 RUNTIME_OPTIONS_KEY (Unit,                Zygote)
@@ -64,9 +66,9 @@
 RUNTIME_OPTIONS_KEY (Unit,                LowMemoryMode)
 RUNTIME_OPTIONS_KEY (bool,                UseTLAB,                        (kUseTlab || kUseReadBarrier))
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
-RUNTIME_OPTIONS_KEY (bool,                UseJIT,      false)
-RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold, jit::Jit::kDefaultCompileThreshold)
-RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity, jit::JitCodeCache::kDefaultCapacity)
+RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
@@ -105,9 +107,12 @@
                                           ImageCompilerOptions)  // -Ximage-compiler-option ...
 RUNTIME_OPTIONS_KEY (bool,                Verify,                         true)
 RUNTIME_OPTIONS_KEY (std::string,         NativeBridge)
+RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
+RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 
 // Not parse-able from command line, but can be provided explicitly.
+// (Do not add anything here that is defined in ParsedOptions::MakeParser)
 RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \
                                           BootClassPathDexList)  // TODO: make unique_ptr
 RUNTIME_OPTIONS_KEY (InstructionSet,      ImageInstructionSet,            kRuntimeISA)
@@ -120,7 +125,5 @@
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
-RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
-RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 
 #undef RUNTIME_OPTIONS_KEY
diff --git a/runtime/stack.h b/runtime/stack.h
index 79d2f40..d60714f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -95,6 +95,8 @@
   }
   ~ShadowFrame() {}
 
+  // TODO(iam): Clean references array up since they're always there,
+  // we don't need to do conditionals.
   bool HasReferenceArray() const {
     return true;
   }
@@ -149,6 +151,9 @@
     return *reinterpret_cast<unaligned_double*>(vreg);
   }
 
+  // Look up the reference given its virtual register number.
+  // If this returns non-null then this does not mean the vreg is currently a reference
+  // on non-moving collectors. Check that the raw reg with GetVReg is equal to this if not certain.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   mirror::Object* GetVRegReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, NumberOfVRegs());
@@ -283,6 +288,8 @@
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
       : number_of_vregs_(num_vregs), link_(link), method_(method), dex_pc_(dex_pc) {
+    // TODO(iam): Remove this parameter, it's an an artifact of portable removal
+    DCHECK(has_reference_array);
     if (has_reference_array) {
       memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(StackReference<mirror::Object>)));
     } else {
@@ -306,6 +313,15 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   uint32_t dex_pc_;
+
+  // This is a two-part array:
+  //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
+  //    bytes.
+  //  - [number_of_vregs..number_of_vregs*2) holds only reference registers. Each element here is
+  //    ptr-sized.
+  // In other words when a primitive is stored in vX, the second (reference) part of the array will
+  // be null. When a reference is stored in vX, the second (reference) part of the array will be a
+  // copy of vX.
   uint32_t vregs_[0];
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index fe98b0a..fe8b0d8 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2578,12 +2578,11 @@
 }
 
 void Thread::ClearDebugInvokeReq() {
-  CHECK(Dbg::IsDebuggerActive());
   CHECK(GetInvokeReq() != nullptr) << "Debug invoke req not active in thread " << *this;
   CHECK(Thread::Current() == this) << "Debug invoke must be finished by the thread itself";
-  // We do not own the DebugInvokeReq* so we must not delete it, it is the responsibility of
-  // the owner (the JDWP thread).
+  DebugInvokeReq* req = tlsPtr_.debug_invoke_req;
   tlsPtr_.debug_invoke_req = nullptr;
+  delete req;
 }
 
 void Thread::PushVerifier(verifier::MethodVerifier* verifier) {
diff --git a/runtime/thread.h b/runtime/thread.h
index 9311bef..0e71c08 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -781,15 +781,14 @@
   void DeactivateSingleStepControl();
 
   // Sets debug invoke request for debugging. When the thread is resumed,
-  // it executes the method described by this request then suspends itself.
-  // The thread does not take ownership of the given DebugInvokeReq*, it is
-  // owned by the JDWP thread which is waiting for the execution of the
-  // method.
+  // it executes the method described by this request then sends the reply
+  // before suspending itself. The thread takes the ownership of the given
+  // DebugInvokeReq*. It is deleted by a call to ClearDebugInvokeReq.
   void SetDebugInvokeReq(DebugInvokeReq* req);
 
   // Clears debug invoke request for debugging. When the thread completes
-  // method invocation, it clears its debug invoke request, signals the
-  // JDWP thread and suspends itself.
+  // method invocation, it deletes its debug invoke request and suspends
+  // itself.
   void ClearDebugInvokeReq();
 
   // Returns the fake exception used to activate deoptimization.
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index af9ba68..b697b43 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -875,31 +875,36 @@
 
   // The debugger thread must not suspend itself due to debugger activity!
   Thread* debug_thread = Dbg::GetDebugThread();
-  CHECK(debug_thread != nullptr);
   CHECK(self != debug_thread);
   CHECK_NE(self->GetState(), kRunnable);
   Locks::mutator_lock_->AssertNotHeld(self);
 
-  {
+  // The debugger may have detached while we were executing an invoke request. In that case, we
+  // must not suspend ourself.
+  DebugInvokeReq* pReq = self->GetInvokeReq();
+  const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive());
+  if (!skip_thread_suspension) {
     // Collisions with other suspends aren't really interesting. We want
     // to ensure that we're the only one fiddling with the suspend count
     // though.
     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     self->ModifySuspendCount(self, +1, true);
     CHECK_GT(self->GetSuspendCount(), 0);
+
+    VLOG(threads) << *self << " self-suspending (debugger)";
+  } else {
+    // We must no longer be subject to debugger suspension.
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+    CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us";
+
+    VLOG(threads) << *self << " not self-suspending because debugger detached during invoke";
   }
 
-  VLOG(threads) << *self << " self-suspending (debugger)";
-
-  // Tell JDWP we've completed invocation and are ready to suspend.
-  DebugInvokeReq* const pReq = self->GetInvokeReq();
+  // If the debugger requested an invoke, we need to send the reply and clear the request.
   if (pReq != nullptr) {
-    // Clear debug invoke request before signaling.
+    Dbg::FinishInvokeMethod(pReq);
     self->ClearDebugInvokeReq();
-
-    VLOG(jdwp) << "invoke complete, signaling";
-    MutexLock mu(self, pReq->lock);
-    pReq->cond.Signal(self);
+    pReq = nullptr;  // object has been deleted, clear it for safety.
   }
 
   // Tell JDWP that we've completed suspension. The JDWP thread can't
diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java
index f74a17e..4914ba2 100644
--- a/test/098-ddmc/src/Main.java
+++ b/test/098-ddmc/src/Main.java
@@ -43,7 +43,9 @@
 
         System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248");
         final int overflowAllocations = 64 * 1024;  // Won't fit in unsigned 16-bit value.
-        // Keep the new objects live so they are not garbage collected.
+        // TODO: Temporary fix. Keep the new objects live so they are not garbage collected.
+        // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of
+        // getRecentAllocations(). Working on restoring its old behaviour. b/20037135
         Object[] objects = new Object[overflowAllocations];
         for (int i = 0; i < overflowAllocations; i++) {
             objects[i] = new Object();
@@ -53,6 +55,12 @@
         System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries));
         System.out.println("after.numberOfEntries=" + after.numberOfEntries);
 
+        // TODO: Temporary fix as above. b/20037135
+        objects = null;
+        Runtime.getRuntime().gc();
+        final int fillerStrings = 16 * 1024;
+        String[] strings = new String[fillerStrings];
+
         System.out.println("Disable and confirm back to empty");
         DdmVmInternal.enableRecentAllocations(false);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
@@ -68,8 +76,8 @@
         System.out.println("Confirm we can reenable twice in a row without losing allocations");
         DdmVmInternal.enableRecentAllocations(true);
         System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus());
-        for (int i = 0; i < 16 * 1024; i++) {
-            objects[i] = new String("fnord");
+        for (int i = 0; i < fillerStrings; i++) {
+            strings[i] = new String("fnord");
         }
         Allocations first = new Allocations(DdmVmInternal.getRecentAllocations());
         DdmVmInternal.enableRecentAllocations(true);
diff --git a/test/137-cfi/run b/test/137-cfi/run
new file mode 100755
index 0000000..78cf2aa
--- /dev/null
+++ b/test/137-cfi/run
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Temporarily disable address space layout randomization (ASLR).
+# This is need on host so that the linker loads core.oat at fixed address.
+export LD_USE_LOAD_BIAS=1
+
+exec ${RUN} "$@"
diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java
index 658ba53..a88adc1 100644
--- a/test/137-cfi/src/Main.java
+++ b/test/137-cfi/src/Main.java
@@ -26,7 +26,7 @@
 public class Main implements Comparator<Main> {
   // Whether to test local unwinding. Libunwind uses linker info to find executables. As we do
   // not dlopen at the moment, this doesn't work, so keep it off for now.
-  public final static boolean TEST_LOCAL_UNWINDING = false;
+  public final static boolean TEST_LOCAL_UNWINDING = true;
 
   // Unwinding another process, modelling debuggerd. This doesn't use the linker, so should work
   // no matter whether we're using dlopen or not.
diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java
index 5754723..a4280de 100644
--- a/test/482-checker-loop-back-edge-use/src/Main.java
+++ b/test/482-checker-loop-back-edge-use/src/Main.java
@@ -36,8 +36,8 @@
   }
 
   /// CHECK-START: void Main.loop3(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,62)} uses:[58,62]
-  /// CHECK:         Goto            liveness:60
+  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,64)} uses:[60,64]
+  /// CHECK:         Goto            liveness:62
 
   /// CHECK-START: void Main.loop3(boolean) liveness (after)
   /// CHECK-NOT:     Goto liveness:56
@@ -63,9 +63,9 @@
   }
 
   /// CHECK-START: void Main.loop5(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,52)} uses:[35,44,48,52]
-  /// CHECK:         Goto            liveness:46
-  /// CHECK:         Goto            liveness:50
+  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,54)} uses:[37,46,50,54]
+  /// CHECK:         Goto            liveness:48
+  /// CHECK:         Goto            liveness:52
   public static void loop5(boolean incoming) {
     // 'incoming' must have a use at both back edges.
     while (Runtime.getRuntime() != null) {
@@ -76,8 +76,8 @@
   }
 
   /// CHECK-START: void Main.loop6(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,48)} uses:[26,48]
-  /// CHECK:         Goto            liveness:46
+  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,50)} uses:[26,50]
+  /// CHECK:         Goto            liveness:48
 
   /// CHECK-START: void Main.loop6(boolean) liveness (after)
   /// CHECK-NOT:     Goto            liveness:24
@@ -90,9 +90,9 @@
   }
 
   /// CHECK-START: void Main.loop7(boolean) liveness (after)
-  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,52)} uses:[34,43,48,52]
-  /// CHECK:         Goto            liveness:46
-  /// CHECK:         Goto            liveness:50
+  /// CHECK:         ParameterValue  liveness:4  ranges:{[4,54)} uses:[36,45,50,54]
+  /// CHECK:         Goto            liveness:48
+  /// CHECK:         Goto            liveness:52
   public static void loop7(boolean incoming) {
     // 'incoming' must have a use at both back edges.
     while (Runtime.getRuntime() != null) {
@@ -102,9 +102,9 @@
   }
 
   /// CHECK-START: void Main.loop8() liveness (after)
-  /// CHECK:         StaticFieldGet  liveness:14 ranges:{[14,46)} uses:[37,42,46]
-  /// CHECK:         Goto            liveness:40
-  /// CHECK:         Goto            liveness:44
+  /// CHECK:         StaticFieldGet  liveness:14 ranges:{[14,48)} uses:[39,44,48]
+  /// CHECK:         Goto            liveness:42
+  /// CHECK:         Goto            liveness:46
   public static void loop8() {
     // 'incoming' must have a use at both back edges.
     boolean incoming = field;
@@ -114,8 +114,8 @@
   }
 
   /// CHECK-START: void Main.loop9() liveness (after)
-  /// CHECK:         StaticFieldGet  liveness:24 ranges:{[24,38)} uses:[33,38]
-  /// CHECK:         Goto            liveness:40
+  /// CHECK:         StaticFieldGet  liveness:26 ranges:{[26,40)} uses:[35,40]
+  /// CHECK:         Goto            liveness:42
   public static void loop9() {
     while (Runtime.getRuntime() != null) {
       // 'incoming' must only have a use in the inner loop.
diff --git a/test/496-checker-inlining-and-class-loader/expected.txt b/test/496-checker-inlining-and-class-loader/expected.txt
new file mode 100644
index 0000000..c6fcb51
--- /dev/null
+++ b/test/496-checker-inlining-and-class-loader/expected.txt
@@ -0,0 +1,4 @@
+Request for LoadedByMyClassLoader
+Request for Main
+In between the two calls.
+In $noinline$bar
diff --git a/test/496-checker-inlining-and-class-loader/info.txt b/test/496-checker-inlining-and-class-loader/info.txt
new file mode 100644
index 0000000..aa4b256
--- /dev/null
+++ b/test/496-checker-inlining-and-class-loader/info.txt
@@ -0,0 +1,2 @@
+Regression test to ensure compilers preserve JLS
+semantics of class loading.
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
new file mode 100644
index 0000000..f6d0b41
--- /dev/null
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.List;
+
+class MyClassLoader extends ClassLoader {
+  MyClassLoader() throws Exception {
+    super(MyClassLoader.class.getClassLoader());
+
+    // Some magic to get access to the pathList field of BaseDexClassLoader.
+    ClassLoader loader = getClass().getClassLoader();
+    Class<?> baseDexClassLoader = loader.getClass().getSuperclass();
+    Field f = baseDexClassLoader.getDeclaredField("pathList");
+    f.setAccessible(true);
+    Object pathList = f.get(loader);
+
+    // Some magic to get access to the dexField field of pathList.
+    f = pathList.getClass().getDeclaredField("dexElements");
+    f.setAccessible(true);
+    dexElements = (Object[]) f.get(pathList);
+    dexFileField = dexElements[0].getClass().getDeclaredField("dexFile");
+    dexFileField.setAccessible(true);
+  }
+
+  Object[] dexElements;
+  Field dexFileField;
+
+  protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException {
+    System.out.println("Request for " + className);
+
+    // We're only going to handle LoadedByMyClassLoader.
+    if (className != "LoadedByMyClassLoader") {
+      return getParent().loadClass(className);
+    }
+
+    // Mimic what DexPathList.findClass is doing.
+    try {
+      for (Object element : dexElements) {
+        Object dex = dexFileField.get(element);
+        Method method = dex.getClass().getDeclaredMethod(
+            "loadClassBinaryName", String.class, ClassLoader.class, List.class);
+
+        if (dex != null) {
+          Class clazz = (Class)method.invoke(dex, className, this, null);
+          if (clazz != null) {
+            return clazz;
+          }
+        }
+      }
+    } catch (Exception e) { /* Ignore */ }
+    return null;
+  }
+}
+
+class LoadedByMyClassLoader {
+  /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (before)
+  /// CHECK:      LoadClass
+  /// CHECK-NEXT: ClinitCheck
+  /// CHECK-NEXT: InvokeStaticOrDirect
+  /// CHECK-NEXT: LoadClass
+  /// CHECK-NEXT: ClinitCheck
+  /// CHECK-NEXT: StaticFieldGet
+  /// CHECK-NEXT: LoadString
+  /// CHECK-NEXT: NullCheck
+  /// CHECK-NEXT: InvokeVirtual
+
+  /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (after)
+  /// CHECK:      LoadClass
+  /// CHECK-NEXT: ClinitCheck
+                /* We inlined Main.$inline$bar */
+  /// CHECK-NEXT: LoadClass
+  /// CHECK-NEXT: ClinitCheck
+  /// CHECK-NEXT: StaticFieldGet
+  /// CHECK-NEXT: LoadString
+  /// CHECK-NEXT: NullCheck
+  /// CHECK-NEXT: InvokeVirtual
+
+  /// CHECK-START: void LoadedByMyClassLoader.bar() register (before)
+                /* Load and initialize Main */
+  /// CHECK:      LoadClass gen_clinit_check:true
+                /* Load and initialize System */
+  /// CHECK-NEXT: LoadClass gen_clinit_check:true
+  /// CHECK-NEXT: StaticFieldGet
+  /// CHECK-NEXT: LoadString
+  /// CHECK-NEXT: NullCheck
+  /// CHECK-NEXT: InvokeVirtual
+  public static void bar() {
+    Main.$inline$bar();
+    System.out.println("In between the two calls.");
+    Main.$noinline$bar();
+  }
+}
+
+class Main {
+  public static void main(String[] args) throws Exception {
+    MyClassLoader o = new MyClassLoader();
+    Class foo = o.loadClass("LoadedByMyClassLoader");
+    Method m = foo.getDeclaredMethod("bar");
+    m.invoke(null);
+  }
+
+  public static void $inline$bar() {
+  }
+
+  public static void $noinline$bar() {
+    try {
+      System.out.println("In $noinline$bar");
+    } catch (Throwable t) { /* Ignore */ }
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index e95f147..82a6295 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -229,16 +229,10 @@
 
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
-# Tests that are broken with GC stress.
-TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
-
-ifneq (,$(filter gcstress,$(GC_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DBEUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
+# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+    $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES))
 
 # 115-native-bridge setup is complicated. Need to implement it correctly for the target.
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
@@ -382,7 +376,8 @@
 
 # Known broken tests for the default compiler (Quick).
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  457-regs
+  457-regs \
+  496-checker-inlining-and-class-loader
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 77e6b1a..da7131c 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,7 +19,7 @@
   exit 1
 fi
 
-common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex out/host/linux-x86/bin/adb jsr166-tests"
+common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex out/host/linux-x86/bin/adb jsr166-tests conscrypt-tests"
 android_root="/data/local/tmp/system"
 linker="linker"
 mode="target"
@@ -60,7 +60,7 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg build-art-host-tests $common_targets"
+  make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
   echo "Executing $make_command"
   $make_command
 elif [[ $mode == "target" ]]; then
@@ -70,7 +70,7 @@
   # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS.
   # Also, we build extra tools that will be used by tests, so that
   # they are compiled with our own linker.
-  make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto linker toybox toolbox sh"
+  make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh"
   echo "Executing env $env $make_command"
   env $env $make_command
 fi
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index b053f0d..5d58c29 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -132,5 +132,42 @@
   result: EXEC_FAILED,
   names: ["libcore.javax.crypto.CipherTest#testCipher_ShortBlock_Failure",
           "libcore.javax.crypto.CipherTest#testCipher_Success"]
+},
+{
+  description: "Tests newly added to the build bot (b/21722374)",
+  result: EXEC_FAILED,
+  names: ["com.android.org.conscrypt.NativeCryptoTest#test_SSL_do_handshake_clientCertificateRequested_throws_after_renegotiate",
+          "com.android.org.conscrypt.NativeCryptoTest#test_SSL_new",
+          "com.android.org.conscrypt.NativeCryptoTest#test_SSL_renegotiate",
+          "com.android.org.conscrypt.NativeCryptoTest#test_SSL_set_session_creation_enabled",
+          "libcore.java.security.KeyPairGeneratorTest#test_getInstance_provider1",
+          "libcore.java.security.KeyStoreTest",
+          "libcore.java.security.ProviderTest#test_Provider_getServices",
+          "libcore.java.security.SignatureTest#test18566_AlgorithmOid_MissingNull_Failure",
+          "libcore.java.security.SignatureTest#testVerify_NONEwithRSA_Key_SignatureTooLarge_Failure",
+          "libcore.java.security.SignatureTest#testVerify_SHA1withRSA_Key_WrongExpectedSignature_Failure",
+          "libcore.java.security.SignatureTest#test_getInstance",
+          "libcore.javax.net.ssl.KeyManagerFactoryTest#test_KeyManagerFactory_getDefaultAlgorithm",
+          "libcore.javax.net.ssl.KeyManagerFactoryTest#test_KeyManagerFactory_getInstance",
+          "libcore.javax.net.ssl.SSLContextTest#test_SSLContext_defaultConfiguration",
+          "libcore.javax.net.ssl.SSLSocketTest#test_SSLSocket_NoEnabledCipherSuites_Failure",
+          "libcore.javax.net.ssl.SSLSocketTest#test_SSLSocket_sendsTlsFallbackScsv_InappropriateFallback_Failure"]
+},
+{
+  description: "Tests newly added to the build bot (b/21722374). Not failing with adb sync; ./run-libcore-tests.sh",
+  result: EXEC_FAILED,
+  names: ["com.android.org.conscrypt.NativeCryptoTest#test_SSL_set_cipher_lists",
+          "libcore.java.security.KeyStoreTest#test_KeyStore_load_InputStream",
+          "libcore.java.security.KeyStoreTest#test_KeyStore_load_LoadStoreParameter",
+          "libcore.java.security.KeyStoreTest#test_KeyStore_size",
+          "libcore.java.security.MessageDigestTest#test_getInstance",
+          "libcore.java.security.cert.X509CertificateTest#test_Provider",
+          "libcore.javax.net.ssl.TrustManagerFactoryTest#test_TrustManagerFactory_getDefaultAlgorithm",
+          "libcore.javax.net.ssl.TrustManagerFactoryTest#test_TrustManagerFactory_getInstance"]
+},
+{
+  description: "Test failing because of wrong dates in build bot devices",
+  result: EXEC_FAILED,
+  names: ["libcore.java.security.cert.X509CertificateNistPkitsTest"]
 }
 ]
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 344d2ded..03016fa 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -22,6 +22,9 @@
 # Jar containing jsr166 tests.
 jsr166_test_jar=out/target/common/obj/JAVA_LIBRARIES/jsr166-tests_intermediates/javalib.jar
 
+# Jar containing conscrypt tests.
+conscrypt_test_jar=out/target/common/obj/JAVA_LIBRARIES/conscrypt-tests_intermediates/javalib.jar
+
 # Jar containing all the other tests.
 test_jar=out/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/javalib.jar
 
@@ -33,13 +36,17 @@
 fi
 
 # Packages that currently work correctly with the expectation files.
-working_packages=("libcore.icu"
+working_packages=("com.android.org.conscrypt"
+                  "dalvik.system"
+                  "libcore.icu"
                   "libcore.io"
                   "libcore.java.lang"
                   "libcore.java.math"
+                  "libcore.java.security"
                   "libcore.java.text"
                   "libcore.java.util"
                   "libcore.javax.crypto"
+                  "libcore.javax.net"
                   "libcore.javax.security"
                   "libcore.javax.sql"
                   "libcore.javax.xml"
@@ -65,4 +72,4 @@
 # Run the tests using vogar.
 echo "Running tests for the following test packages:"
 echo ${working_packages[@]} | tr " " "\n"
-vogar $@ --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}
+vogar $@ --expectations art/tools/libcore_failures.txt --classpath $conscrypt_test_jar --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]}