Merge "ART: A workaround for a wrongly-read caller's frame"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 5b83056..ef5819d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -177,7 +177,7 @@
     LOCAL_CLANG := $(ART_TARGET_CLANG)
     LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) $(ART_TARGET_DEBUG_CFLAGS)
     LOCAL_CFLAGS_x86 := $(ART_TARGET_CFLAGS_x86)
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
     LOCAL_STATIC_LIBRARIES += libgtest
     LOCAL_MODULE_PATH_32 := $(ART_BASE_NATIVETEST_OUT)
     LOCAL_MODULE_PATH_64 := $(ART_BASE_NATIVETEST_OUT)64
@@ -200,7 +200,7 @@
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
     LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libz-host
-    LOCAL_STATIC_LIBRARIES += libcutils
+    LOCAL_STATIC_LIBRARIES += libcutils libvixl
     ifneq ($(WITHOUT_HOST_CLANG),true)
         # GCC host compiled tests fail with this linked, presumably due to destructors that run.
         LOCAL_STATIC_LIBRARIES += libgtest_host
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 6c8c85d..38d37b0 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -324,7 +324,6 @@
 
 enum ThrowKind {
   kThrowNullPointer,
-  kThrowDivZero,
   kThrowArrayBounds,
   kThrowConstantArrayBounds,
   kThrowNoSuchMethod,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 1abb91d..8b4171d 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -90,8 +90,7 @@
  *     neg   rX
  * done:
  */
-void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
   LIR* target1;
   LIR* target2;
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -101,7 +100,7 @@
   OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh());
   LIR* branch1 = OpCondBranch(kCondLt, NULL);
   LIR* branch2 = OpCondBranch(kCondGt, NULL);
-  OpRegRegReg(kOpSub, t_reg, rl_src1.reg, rl_src2.reg);
+  OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetLow(), rl_src2.reg.GetLow());
   LIR* branch3 = OpCondBranch(kCondEq, NULL);
 
   LIR* it = OpIT(kCondHi, "E");
@@ -917,7 +916,7 @@
   RegStorage t_reg = AllocTemp();
   NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0);
   FreeTemp(t_reg);
-  GenCheck(kCondEq, kThrowDivZero);
+  AddDivZeroSlowPath(kCondEq);
 }
 
 // Test suspend flag, return target of taken suspend branch
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 80a88b8..8806e68 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -412,7 +412,8 @@
       result = GenInlineIGet(mir_graph, bb, invoke, move_result, method, method_idx);
       break;
     case kInlineOpIPut:
-      result = GenInlineIPut(mir_graph, bb, invoke, method, method_idx);
+      move_result = mir_graph->FindMoveResult(bb, invoke);
+      result = GenInlineIPut(mir_graph, bb, invoke, move_result, method, method_idx);
       break;
     default:
       LOG(FATAL) << "Unexpected inline op: " << method.opcode;
@@ -646,7 +647,11 @@
   bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
   if (!object_is_this) {
     // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE).
-    return false;
+    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
+    if (!InlineMethodAnalyser::IsSyntheticAccessor(
+        mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) {
+      return false;
+    }
   }
 
   if (object_is_this) {
@@ -672,7 +677,8 @@
 }
 
 bool DexFileMethodInliner::GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                                         const InlineMethod& method, uint32_t method_idx) {
+                                         MIR* move_result, const InlineMethod& method,
+                                         uint32_t method_idx) {
   CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit();
   if (cu->enable_debug & (1 << kDebugSlowFieldPath)) {
     return false;
@@ -683,19 +689,32 @@
   DCHECK_EQ(InlineMethodAnalyser::IPutVariant(opcode), data.op_variant);
   uint32_t object_reg = GetInvokeReg(invoke, data.object_arg);
   uint32_t src_reg = GetInvokeReg(invoke, data.src_arg);
+  uint32_t return_reg =
+      data.return_arg_plus1 != 0u ? GetInvokeReg(invoke, data.return_arg_plus1 - 1u) : 0u;
 
   if (opcode == Instruction::IPUT_WIDE && !WideArgIsInConsecutiveDalvikRegs(invoke, data.src_arg)) {
     // The two halfs of the source value are not in consecutive dalvik registers in INVOKE.
     return false;
   }
 
+  DCHECK(move_result == nullptr || data.return_arg_plus1 != 0u);
+  if (move_result != nullptr && move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE &&
+      !WideArgIsInConsecutiveDalvikRegs(invoke, data.return_arg_plus1 - 1u)) {
+    // The two halfs of the return value are not in consecutive dalvik registers in INVOKE.
+    return false;
+  }
+
   DCHECK_EQ(data.method_is_static != 0u,
             invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC ||
             invoke->dalvikInsn.opcode == Instruction::INVOKE_STATIC_RANGE);
   bool object_is_this = (data.method_is_static == 0u && data.object_arg == 0u);
   if (!object_is_this) {
     // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE).
-    return false;
+    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
+    if (!InlineMethodAnalyser::IsSyntheticAccessor(
+        mir_graph->GetMethodLoweringInfo(invoke).GetTargetMethod())) {
+      return false;
+    }
   }
 
   if (object_is_this) {
@@ -703,7 +722,7 @@
     invoke->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
   }
 
-  MIR* insn = AllocReplacementMIR(mir_graph, invoke, nullptr);
+  MIR* insn = AllocReplacementMIR(mir_graph, invoke, move_result);
   insn->dalvikInsn.opcode = opcode;
   insn->dalvikInsn.vA = src_reg;
   insn->dalvikInsn.vB = object_reg;
@@ -715,6 +734,24 @@
   DCHECK_EQ(data.is_volatile, mir_graph->GetIFieldLoweringInfo(insn).IsVolatile() ? 1u : 0u);
 
   bb->InsertMIRAfter(invoke, insn);
+
+  if (move_result != nullptr) {
+    MIR* move = AllocReplacementMIR(mir_graph, invoke, move_result);
+    insn->width = invoke->width;
+    move->offset = move_result->offset;
+    move->width = move_result->width;
+    if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT) {
+      move->dalvikInsn.opcode = Instruction::MOVE_FROM16;
+    } else if (move_result->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) {
+      move->dalvikInsn.opcode = Instruction::MOVE_OBJECT_FROM16;
+    } else {
+      DCHECK_EQ(move_result->dalvikInsn.opcode, Instruction::MOVE_RESULT_WIDE);
+      move->dalvikInsn.opcode = Instruction::MOVE_WIDE_FROM16;
+    }
+    move->dalvikInsn.vA = move_result->dalvikInsn.vA;
+    move->dalvikInsn.vB = return_reg;
+    bb->InsertMIRAfter(insn, move);
+  }
   return true;
 }
 
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index b4e190a..c03f89c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -302,7 +302,7 @@
     static bool GenInlineIGet(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
                               MIR* move_result, const InlineMethod& method, uint32_t method_idx);
     static bool GenInlineIPut(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke,
-                              const InlineMethod& method, uint32_t method_idx);
+                              MIR* move_result, const InlineMethod& method, uint32_t method_idx);
 
     ReaderWriterMutex lock_;
     /*
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a3fb420..4522379 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -42,17 +42,6 @@
   barrier->u.m.def_mask = ENCODE_ALL;
 }
 
-// TODO: need to do some work to split out targets with
-// condition codes and those without
-LIR* Mir2Lir::GenCheck(ConditionCode c_code, ThrowKind kind) {
-  DCHECK_NE(cu_->instruction_set, kMips);
-  LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, current_dalvik_offset_);
-  LIR* branch = OpCondBranch(c_code, tgt);
-  // Remember branch target - will process later
-  throw_launchpads_.Insert(tgt);
-  return branch;
-}
-
 LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, RegStorage reg, int imm_val, ThrowKind kind) {
   LIR* tgt;
   LIR* branch;
@@ -69,6 +58,38 @@
   return branch;
 }
 
+void Mir2Lir::AddDivZeroSlowPath(ConditionCode c_code) {
+  LIR* branch = OpCondBranch(c_code, nullptr);
+  AddDivZeroCheckSlowPath(branch);
+}
+
+void Mir2Lir::AddDivZeroSlowPath(ConditionCode c_code, RegStorage reg, int imm_val) {
+  LIR* branch;
+  if (c_code == kCondAl) {
+    branch = OpUnconditionalBranch(nullptr);
+  } else {
+    branch = OpCmpImmBranch(c_code, reg, imm_val, nullptr);
+  }
+  AddDivZeroCheckSlowPath(branch);
+}
+
+void Mir2Lir::AddDivZeroCheckSlowPath(LIR* branch) {
+  class DivZeroCheckSlowPath : public Mir2Lir::LIRSlowPath {
+   public:
+    DivZeroCheckSlowPath(Mir2Lir* m2l, LIR* branch)
+        : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch) {
+    }
+
+    void Compile() {
+      m2l_->ResetRegPool();
+      m2l_->ResetDefTracking();
+      GenerateTargetLabel();
+      m2l_->CallRuntimeHelper(QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero), true);
+    }
+  };
+
+  AddSlowPath(new (arena_) DivZeroCheckSlowPath(this, branch));
+}
 
 /* Perform null-check on a register.  */
 LIR* Mir2Lir::GenNullCheck(RegStorage m_reg, int opt_flags) {
@@ -689,9 +710,6 @@
         }
         func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds);
         break;
-      case kThrowDivZero:
-        func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowDivZero);
-        break;
       case kThrowNoSuchMethod:
         OpRegCopy(TargetReg(kArg0), RegStorage::Solo32(v1));
         func_offset =
@@ -1533,7 +1551,7 @@
       rl_src1 = LoadValue(rl_src1, kCoreReg);
       rl_src2 = LoadValue(rl_src2, kCoreReg);
       if (check_zero) {
-          GenImmedCheck(kCondEq, rl_src2.reg, 0, kThrowDivZero);
+          AddDivZeroSlowPath(kCondEq, rl_src2.reg, 0);
       }
       rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
       done = true;
@@ -1544,7 +1562,7 @@
         rl_src1 = LoadValue(rl_src1, kCoreReg);
         rl_src2 = LoadValue(rl_src2, kCoreReg);
         if (check_zero) {
-            GenImmedCheck(kCondEq, rl_src2.reg, 0, kThrowDivZero);
+            AddDivZeroSlowPath(kCondEq, rl_src2.reg, 0);
         }
         rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
         done = true;
@@ -1559,7 +1577,7 @@
       RegStorage r_tgt = CallHelperSetup(func_offset);
       LoadValueDirectFixed(rl_src1, TargetReg(kArg0));
       if (check_zero) {
-        GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero);
+        AddDivZeroSlowPath(kCondEq, TargetReg(kArg1), 0);
       }
       // NOTE: callout here is not a safepoint.
       CallHelper(r_tgt, func_offset, false /* not a safepoint */);
@@ -1784,7 +1802,7 @@
     case Instruction::REM_INT_LIT8:
     case Instruction::REM_INT_LIT16: {
       if (lit == 0) {
-        GenImmedCheck(kCondAl, RegStorage::InvalidReg(), 0, kThrowDivZero);
+        AddDivZeroSlowPath(kCondAl, RegStorage::InvalidReg(), 0);
         return;
       }
       if ((opcode == Instruction::DIV_INT) ||
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 396a709..d827568 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -87,6 +87,12 @@
   return call_inst;
 }
 
+void Mir2Lir::CallRuntimeHelper(ThreadOffset<4> helper_offset, bool safepoint_pc) {
+  RegStorage r_tgt = CallHelperSetup(helper_offset);
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 void Mir2Lir::CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc) {
   RegStorage r_tgt = CallHelperSetup(helper_offset);
   LoadConstant(TargetReg(kArg0), arg0);
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 5fe96d2..0492fdb 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -346,7 +346,7 @@
   DCHECK(reg.IsPair());   // TODO: support k64BitSolo.
   RegStorage t_reg = AllocTemp();
   OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
-  GenImmedCheck(kCondEq, t_reg, 0, kThrowDivZero);
+  AddDivZeroSlowPath(kCondEq, t_reg, 0);
   FreeTemp(t_reg);
 }
 
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 73fdc82..6fcdf70 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -120,7 +120,7 @@
 bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
   // FastInstance() already checked by DexFileMethodInliner.
   const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static || data.object_arg != 0) {
+  if (data.method_is_static != 0u || data.object_arg != 0u) {
     // The object is not "this" and has to be null-checked.
     return false;
   }
@@ -151,10 +151,14 @@
 bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
   // FastInstance() already checked by DexFileMethodInliner.
   const InlineIGetIPutData& data = special.d.ifield_data;
-  if (data.method_is_static || data.object_arg != 0) {
+  if (data.method_is_static != 0u || data.object_arg != 0u) {
     // The object is not "this" and has to be null-checked.
     return false;
   }
+  if (data.return_arg_plus1 != 0u) {
+    // The setter returns a method argument which we don't support here.
+    return false;
+  }
 
   bool wide = (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE));
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 35f948e..6dbeb34 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -562,7 +562,8 @@
     void HandleThrowLaunchPads();
     void HandleSlowPaths();
     void GenBarrier();
-    LIR* GenCheck(ConditionCode c_code, ThrowKind kind);
+    void AddDivZeroSlowPath(ConditionCode c_code);
+    void AddDivZeroSlowPath(ConditionCode c_code, RegStorage reg, int imm_val);
     void MarkPossibleNullPointerException(int opt_flags);
     void MarkPossibleStackOverflowException();
     void ForceImplicitNullCheck(RegStorage reg, int opt_flags);
@@ -619,6 +620,7 @@
     LIR* CallHelper(RegStorage r_tgt, ThreadOffset<4> helper_offset, bool safepoint_pc,
                     bool use_link = true);
     RegStorage CallHelperSetup(ThreadOffset<4> helper_offset);
+    void CallRuntimeHelper(ThreadOffset<4> helper_offset, bool safepoint_pc);
     void CallRuntimeHelperImm(ThreadOffset<4> helper_offset, int arg0, bool safepoint_pc);
     void CallRuntimeHelperReg(ThreadOffset<4> helper_offset, RegStorage arg0, bool safepoint_pc);
     void CallRuntimeHelperRegLocation(ThreadOffset<4> helper_offset, RegLocation arg0,
@@ -1220,6 +1222,7 @@
      */
     bool GenSpecialIdentity(MIR* mir, const InlineMethod& special);
 
+    void AddDivZeroCheckSlowPath(LIR* branch);
 
   public:
     // TODO: add accessors for these.
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index c1d1e01..a5f3b61 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -629,7 +629,7 @@
 
   if (check_zero) {
     // Handle division by zero case.
-    GenImmedCheck(kCondEq, rs_r1, 0, kThrowDivZero);
+    AddDivZeroSlowPath(kCondEq, rs_r1, 0);
   }
 
   // Have to catch 0x80000000/-1 case, or we will get an exception!
@@ -885,7 +885,7 @@
   OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
 
   // In case of zero, throw ArithmeticException.
-  GenCheck(kCondEq, kThrowDivZero);
+  AddDivZeroSlowPath(kCondEq);
 
   // The temp is no longer needed so free it at this time.
   FreeTemp(t_reg);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a241d51..b66082d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,10 +19,8 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <utils/Trace.h>
 
-#include <fstream>
 #include <vector>
 #include <unistd.h>
-#include <utility>
 
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
@@ -372,7 +370,7 @@
 
   // Read the profile file if one is provided.
   if (profile_file != "") {
-    profile_ok_ = ReadProfile(profile_file);
+    profile_ok_ = ProfileHelper::LoadProfileMap(profile_map_, profile_file);
   }
 
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
@@ -2036,83 +2034,6 @@
     }
   }
 
-bool CompilerDriver::ReadProfile(const std::string& filename) {
-  VLOG(compiler) << "reading profile file " << filename;
-  struct stat st;
-  int err = stat(filename.c_str(), &st);
-  if (err == -1) {
-    VLOG(compiler) << "not found";
-    return false;
-  }
-  std::ifstream in(filename.c_str());
-  if (!in) {
-    VLOG(compiler) << "profile file " << filename << " exists but can't be opened";
-    VLOG(compiler) << "file owner: " << st.st_uid << ":" << st.st_gid;
-    VLOG(compiler) << "me: " << getuid() << ":" << getgid();
-    VLOG(compiler) << "file permissions: " << std::oct << st.st_mode;
-    VLOG(compiler) << "errno: " << errno;
-    return false;
-  }
-  // The first line contains summary information.
-  std::string line;
-  std::getline(in, line);
-  if (in.eof()) {
-    return false;
-  }
-  std::vector<std::string> summary_info;
-  Split(line, '/', summary_info);
-  if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/total/bootpath.
-    return false;
-  }
-  // This is the number of hits in all methods.
-  uint32_t total_count = 0;
-  for (int i = 0 ; i < 3; ++i) {
-    total_count += atoi(summary_info[i].c_str());
-  }
-
-  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
-  // Store the info in descending order given by the most used methods.
-  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
-  ProfileSet countSet;
-  while (!in.eof()) {
-    std::getline(in, line);
-    if (in.eof()) {
-      break;
-    }
-    std::vector<std::string> info;
-    Split(line, '/', info);
-    if (info.size() != 3) {
-      // Malformed.
-      break;
-    }
-    int count = atoi(info[1].c_str());
-    countSet.insert(std::make_pair(-count, info));
-  }
-
-  uint32_t curTotalCount = 0;
-  ProfileSet::iterator end = countSet.end();
-  const ProfileData* prevData = nullptr;
-  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
-    const std::string& methodname = it->second[0];
-    uint32_t count = -it->first;
-    uint32_t size = atoi(it->second[2].c_str());
-    double usedPercent = (count * 100.0) / total_count;
-
-    curTotalCount += count;
-    // Methods with the same count should be part of the same top K percentage bucket.
-    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
-      ? prevData->GetTopKUsedPercentage()
-      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
-
-    // Add it to the profile map.
-    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profile_map_[methodname] = curData;
-    prevData = &curData;
-  }
-  return true;
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_ok_) {
     return true;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 802f859..d49523a 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -32,6 +32,7 @@
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -594,43 +595,9 @@
     return cfi_info_.get();
   }
 
-  // Profile data.  This is generated from previous runs of the program and stored
-  // in a file.  It is used to determine whether to compile a particular method or not.
-  class ProfileData {
-   public:
-    ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
-    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-      double usedPercent, double topKUsedPercentage) :
-      method_name_(method_name), count_(count), method_size_(method_size),
-      usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
-      // TODO: currently method_size_ and count_ are unused.
-      UNUSED(method_size_);
-      UNUSED(count_);
-    }
-
-    bool IsAbove(double v) const { return usedPercent_ >= v; }
-    double GetUsedPercent() const { return usedPercent_; }
-    uint32_t GetCount() const { return count_; }
-    double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
-
-   private:
-    std::string method_name_;    // Method name.
-    uint32_t count_;             // Number of times it has been called.
-    uint32_t method_size_;       // Size of the method on dex instructions.
-    double usedPercent_;         // Percentage of how many times this method was called.
-    double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
-                                 // methods this methods belongs to.
-  };
-
-  // Profile data is stored in a map, indexed by the full method name.
-  typedef std::map<const std::string, ProfileData> ProfileMap;
   ProfileMap profile_map_;
   bool profile_ok_;
 
-  // Read the profile data from the given file.  Calculates the percentage for each method.
-  // Returns false if there was no profile file or it was malformed.
-  bool ReadProfile(const std::string& filename);
-
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f89583d..beccf01 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -44,15 +44,14 @@
   graph_->SetNumberOfInVRegs(number_of_parameters);
   const char* shorty = dex_compilation_unit_->GetShorty();
   int locals_index = locals_.Size() - number_of_parameters;
-  HBasicBlock* first_block = entry_block_->GetSuccessors()->Get(0);
   int parameter_index = 0;
 
   if (!dex_compilation_unit_->IsStatic()) {
     // Add the implicit 'this' argument, not expressed in the signature.
     HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++);
-    first_block->AddInstruction(parameter);
+    entry_block_->AddInstruction(parameter);
     HLocal* local = GetLocalAt(locals_index++);
-    first_block->AddInstruction(new (arena_) HStoreLocal(local, parameter));
+    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
     number_of_parameters--;
   }
 
@@ -68,11 +67,11 @@
       default: {
         // integer and reference parameters.
         HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++);
-        first_block->AddInstruction(parameter);
+        entry_block_->AddInstruction(parameter);
         HLocal* local = GetLocalAt(locals_index++);
         // Store the parameter value in the local that the dex code will use
         // to reference that parameter.
-        first_block->AddInstruction(new (arena_) HStoreLocal(local, parameter));
+        entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
         break;
       }
     }
@@ -87,6 +86,24 @@
   return true;
 }
 
+template<typename T>
+void HGraphBuilder::If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not) {
+  HInstruction* first = LoadLocal(instruction.VRegA());
+  HInstruction* second = LoadLocal(instruction.VRegB());
+  current_block_->AddInstruction(new (arena_) T(first, second));
+  if (is_not) {
+    current_block_->AddInstruction(new (arena_) HNot(current_block_->GetLastInstruction()));
+  }
+  current_block_->AddInstruction(new (arena_) HIf(current_block_->GetLastInstruction()));
+  HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset);
+  DCHECK(target != nullptr);
+  current_block_->AddSuccessor(target);
+  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  DCHECK(target != nullptr);
+  current_block_->AddSuccessor(target);
+  current_block_ = nullptr;
+}
+
 HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
   if (!CanHandleCodeItem(code_item)) {
     return nullptr;
@@ -238,6 +255,19 @@
       break;
     }
 
+    case Instruction::CONST_16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = GetConstant(instruction.VRegB_21s());
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::MOVE: {
+      HInstruction* value = LoadLocal(instruction.VRegB());
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
     case Instruction::RETURN_VOID: {
       current_block_->AddInstruction(new (arena_) HReturnVoid());
       current_block_->AddSuccessor(exit_block_);
@@ -246,17 +276,12 @@
     }
 
     case Instruction::IF_EQ: {
-      HInstruction* first = LoadLocal(instruction.VRegA());
-      HInstruction* second = LoadLocal(instruction.VRegB());
-      current_block_->AddInstruction(new (arena_) HEqual(first, second));
-      current_block_->AddInstruction(new (arena_) HIf(current_block_->GetLastInstruction()));
-      HBasicBlock* target = FindBlockStartingAt(instruction.GetTargetOffset() + dex_offset);
-      DCHECK(target != nullptr);
-      current_block_->AddSuccessor(target);
-      target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
-      DCHECK(target != nullptr);
-      current_block_->AddSuccessor(target);
-      current_block_ = nullptr;
+      If_22t<HEqual>(instruction, dex_offset, false);
+      break;
+    }
+
+    case Instruction::IF_NE: {
+      If_22t<HEqual>(instruction, dex_offset, true);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index df64d71..60d9982 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -79,6 +79,7 @@
   template<typename T> void Binop_12x(const Instruction& instruction);
   template<typename T> void Binop_22b(const Instruction& instruction, bool reverse);
   template<typename T> void Binop_22s(const Instruction& instruction, bool reverse);
+  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset, bool is_not);
 
   ArenaAllocator* const arena_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 40a7b6f..7e63c69 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -33,8 +33,8 @@
   const GrowableArray<HBasicBlock*>* blocks = GetGraph()->GetBlocks();
   DCHECK(blocks->Get(0) == GetGraph()->GetEntryBlock());
   DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks->Get(1)));
-  CompileEntryBlock();
-  for (size_t i = 1; i < blocks->Size(); i++) {
+  GenerateFrameEntry();
+  for (size_t i = 0; i < blocks->Size(); i++) {
     CompileBlock(blocks->Get(i));
   }
   size_t code_size = GetAssembler()->CodeSize();
@@ -43,30 +43,11 @@
   GetAssembler()->FinalizeInstructions(code);
 }
 
-void CodeGenerator::CompileEntryBlock() {
-  HGraphVisitor* location_builder = GetLocationBuilder();
-  HGraphVisitor* instruction_visitor = GetInstructionVisitor();
-  if (kIsDebugBuild) {
-    for (HInstructionIterator it(GetGraph()->GetEntryBlock()); !it.Done(); it.Advance()) {
-      HInstruction* current = it.Current();
-      // Instructions in the entry block should not generate code.
-      current->Accept(location_builder);
-      DCHECK(current->GetLocations() == nullptr);
-      current->Accept(instruction_visitor);
-    }
-  }
-  GenerateFrameEntry();
-}
-
 void CodeGenerator::CompileBlock(HBasicBlock* block) {
   Bind(GetLabelOf(block));
   HGraphVisitor* location_builder = GetLocationBuilder();
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   for (HInstructionIterator it(block); !it.Done(); it.Advance()) {
-    // For each instruction, we emulate a stack-based machine, where the inputs are popped from
-    // the runtime stack, and the result is pushed on the stack. We currently can do this because
-    // we do not perform any code motion, and the Dex format does not reference individual
-    // instructions but uses registers instead (our equivalent of HLocal).
     HInstruction* current = it.Current();
     current->Accept(location_builder);
     InitLocations(current);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e144733..6648598 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -180,7 +180,6 @@
  private:
   void InitLocations(HInstruction* instruction);
   void CompileBlock(HBasicBlock* block);
-  void CompileEntryBlock();
 
   HGraph* const graph_;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2364bc8..4e88765 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -149,7 +149,6 @@
 
 void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) {
   DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-  codegen_->SetFrameSize(codegen_->GetFrameSize() + kArmWordSize);
 }
 
 void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
@@ -384,5 +383,17 @@
   }
 }
 
+void LocationsBuilderARM::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location(R0));
+  locations->SetOut(Location(R0));
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  __ eor(locations->Out().reg<Register>(), locations->InAt(0).reg<Register>(), ShifterOperand(1));
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 540a72a..88198dc 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -379,5 +379,18 @@
   }
 }
 
+void LocationsBuilderX86::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location(EAX));
+  locations->SetOut(Location(EAX));
+  instruction->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK_EQ(locations->InAt(0).reg<Register>(), locations->Out().reg<Register>());
+  __ xorl(locations->Out().reg<Register>(), Immediate(1));
+}
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d1f672f..adea0ba 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -227,6 +227,7 @@
   M(LoadLocal)                                             \
   M(Local)                                                 \
   M(NewInstance)                                           \
+  M(Not)                                                   \
   M(ParameterValue)                                        \
   M(PushArgument)                                          \
   M(Return)                                                \
@@ -740,6 +741,18 @@
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
+class HNot : public HTemplateInstruction<1> {
+ public:
+  explicit HNot(HInstruction* input) {
+    SetRawInputAt(0, input);
+  }
+
+  DECLARE_INSTRUCTION(Not);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNot);
+};
+
 class HGraphVisitor : public ValueObject {
  public:
   explicit HGraphVisitor(HGraph* graph) : graph_(graph) { }
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 0220724..8acd1f9 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -197,6 +197,8 @@
     CHECK(code < kNumberOfCoreRegisters) << code;
     if (code == SP) {
       return vixl::sp;
+    } else if (code == XZR) {
+      return vixl::xzr;
     }
     return vixl::Register::XRegFromCode(code);
   }
@@ -243,6 +245,9 @@
 
   // List of exception blocks to generate at the end of the code cache.
   std::vector<Arm64Exception*> exception_blocks_;
+
+  // Used for testing.
+  friend class Arm64ManagedRegister_VixlRegisters_Test;
 };
 
 class Arm64Exception {
diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc
index de5cb8c..8977313 100644
--- a/compiler/utils/arm64/managed_register_arm64.cc
+++ b/compiler/utils/arm64/managed_register_arm64.cc
@@ -53,7 +53,7 @@
   CHECK(!IsNoRegister());
   int no;
   if (IsCoreRegister()) {
-    if (IsStackPointer()) {
+    if (IsZeroRegister()) {
       no = static_cast<int>(X31);
     } else {
       no = static_cast<int>(AsCoreRegister());
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 80f17f5..a0f520f 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -24,7 +24,7 @@
 namespace art {
 namespace arm64 {
 
-const int kNumberOfCoreRegIds = 32;
+const int kNumberOfCoreRegIds = kNumberOfCoreRegisters;
 const int kNumberOfWRegIds = kNumberOfWRegisters;
 const int kNumberOfDRegIds = kNumberOfDRegisters;
 const int kNumberOfSRegIds = kNumberOfSRegisters;
@@ -78,7 +78,7 @@
 
   WRegister AsOverlappingCoreRegisterLow() const {
     CHECK(IsValidManagedRegister());
-    if (IsStackPointer()) return W31;
+    if (IsZeroRegister()) return W31;
     return static_cast<WRegister>(AsCoreRegister());
   }
 
@@ -189,6 +189,10 @@
     return IsCoreRegister() && (id_ == SP);
   }
 
+  bool IsZeroRegister() const {
+    return IsCoreRegister() && (id_ == XZR);
+  }
+
   int RegId() const {
     CHECK(!IsNoRegister());
     return id_;
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index 88c01ee..f149f1b 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "globals.h"
+#include "assembler_arm64.h"
 #include "managed_register_arm64.h"
 #include "gtest/gtest.h"
 
@@ -295,9 +296,8 @@
 
   Arm64ManagedRegister reg_X31 = Arm64ManagedRegister::FromCoreRegister(X31);
   EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::NoRegister()));
-  // TODO: Fix the infrastructure, then re-enable.
-  // EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::FromCoreRegister(SP)));
-  // EXPECT_TRUE(reg_X31.Equals(Arm64ManagedRegister::FromCoreRegister(XZR)));
+  EXPECT_TRUE(reg_X31.Equals(Arm64ManagedRegister::FromCoreRegister(SP)));
+  EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::FromCoreRegister(XZR)));
   EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::FromWRegister(W31)));
   EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::FromWRegister(WZR)));
   EXPECT_TRUE(!reg_X31.Equals(Arm64ManagedRegister::FromSRegister(S0)));
@@ -305,8 +305,7 @@
 
   Arm64ManagedRegister reg_SP = Arm64ManagedRegister::FromCoreRegister(SP);
   EXPECT_TRUE(!reg_SP.Equals(Arm64ManagedRegister::NoRegister()));
-  // TODO: We expect these to pass - SP has a different semantic than X31/XZR.
-  // EXPECT_TRUE(!reg_SP.Equals(Arm64ManagedRegister::FromCoreRegister(X31)));
+  EXPECT_TRUE(reg_SP.Equals(Arm64ManagedRegister::FromCoreRegister(X31)));
   EXPECT_TRUE(!reg_SP.Equals(Arm64ManagedRegister::FromCoreRegister(XZR)));
   EXPECT_TRUE(!reg_SP.Equals(Arm64ManagedRegister::FromWRegister(W31)));
   EXPECT_TRUE(!reg_SP.Equals(Arm64ManagedRegister::FromSRegister(S0)));
@@ -453,17 +452,17 @@
 
   reg = Arm64ManagedRegister::FromCoreRegister(XZR);
   reg_o = Arm64ManagedRegister::FromWRegister(WZR);
-  // TODO: Overlap not implemented, yet
-  // EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromCoreRegister(X31)));
+  EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromCoreRegister(X31)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromCoreRegister(X1)));
-  // EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromCoreRegister(SP)));
-  // EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromWRegister(W31)));
+  EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromCoreRegister(SP)));
+  EXPECT_TRUE(reg.Overlaps(Arm64ManagedRegister::FromWRegister(W31)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromWRegister(W1)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromWRegister(W12)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromWRegister(W19)));
   EXPECT_EQ(X31, reg_o.AsOverlappingWRegisterCore());
-  // TODO: XZR is not a core register right now.
-  // EXPECT_EQ(W31, reg.AsOverlappingCoreRegisterLow());
+  EXPECT_EQ(SP, reg_o.AsOverlappingWRegisterCore());
+  EXPECT_NE(XZR, reg_o.AsOverlappingWRegisterCore());
+  EXPECT_EQ(W31, reg.AsOverlappingCoreRegisterLow());
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromSRegister(S0)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromSRegister(S1)));
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromSRegister(S2)));
@@ -610,5 +609,154 @@
   EXPECT_TRUE(!reg.Overlaps(Arm64ManagedRegister::FromDRegister(D20)));
 }
 
+TEST(Arm64ManagedRegister, VixlRegisters) {
+  // X Registers.
+  EXPECT_TRUE(vixl::x0.Is(Arm64Assembler::reg_x(X0)));
+  EXPECT_TRUE(vixl::x1.Is(Arm64Assembler::reg_x(X1)));
+  EXPECT_TRUE(vixl::x2.Is(Arm64Assembler::reg_x(X2)));
+  EXPECT_TRUE(vixl::x3.Is(Arm64Assembler::reg_x(X3)));
+  EXPECT_TRUE(vixl::x4.Is(Arm64Assembler::reg_x(X4)));
+  EXPECT_TRUE(vixl::x5.Is(Arm64Assembler::reg_x(X5)));
+  EXPECT_TRUE(vixl::x6.Is(Arm64Assembler::reg_x(X6)));
+  EXPECT_TRUE(vixl::x7.Is(Arm64Assembler::reg_x(X7)));
+  EXPECT_TRUE(vixl::x8.Is(Arm64Assembler::reg_x(X8)));
+  EXPECT_TRUE(vixl::x9.Is(Arm64Assembler::reg_x(X9)));
+  EXPECT_TRUE(vixl::x10.Is(Arm64Assembler::reg_x(X10)));
+  EXPECT_TRUE(vixl::x11.Is(Arm64Assembler::reg_x(X11)));
+  EXPECT_TRUE(vixl::x12.Is(Arm64Assembler::reg_x(X12)));
+  EXPECT_TRUE(vixl::x13.Is(Arm64Assembler::reg_x(X13)));
+  EXPECT_TRUE(vixl::x14.Is(Arm64Assembler::reg_x(X14)));
+  EXPECT_TRUE(vixl::x15.Is(Arm64Assembler::reg_x(X15)));
+  EXPECT_TRUE(vixl::x16.Is(Arm64Assembler::reg_x(X16)));
+  EXPECT_TRUE(vixl::x17.Is(Arm64Assembler::reg_x(X17)));
+  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(X18)));
+  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(X19)));
+  EXPECT_TRUE(vixl::x20.Is(Arm64Assembler::reg_x(X20)));
+  EXPECT_TRUE(vixl::x21.Is(Arm64Assembler::reg_x(X21)));
+  EXPECT_TRUE(vixl::x22.Is(Arm64Assembler::reg_x(X22)));
+  EXPECT_TRUE(vixl::x23.Is(Arm64Assembler::reg_x(X23)));
+  EXPECT_TRUE(vixl::x24.Is(Arm64Assembler::reg_x(X24)));
+  EXPECT_TRUE(vixl::x25.Is(Arm64Assembler::reg_x(X25)));
+  EXPECT_TRUE(vixl::x26.Is(Arm64Assembler::reg_x(X26)));
+  EXPECT_TRUE(vixl::x27.Is(Arm64Assembler::reg_x(X27)));
+  EXPECT_TRUE(vixl::x28.Is(Arm64Assembler::reg_x(X28)));
+  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
+  EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
+  // FIXME: Take a look here.
+  EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(X31)));
+  EXPECT_TRUE(!vixl::x31.Is(Arm64Assembler::reg_x(X31)));
+
+  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
+  EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
+  EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
+  EXPECT_TRUE(vixl::lr.Is(Arm64Assembler::reg_x(LR)));
+  EXPECT_TRUE(vixl::sp.Is(Arm64Assembler::reg_x(SP)));
+  EXPECT_TRUE(vixl::xzr.Is(Arm64Assembler::reg_x(XZR)));
+
+  // W Registers.
+  EXPECT_TRUE(vixl::w0.Is(Arm64Assembler::reg_w(W0)));
+  EXPECT_TRUE(vixl::w1.Is(Arm64Assembler::reg_w(W1)));
+  EXPECT_TRUE(vixl::w2.Is(Arm64Assembler::reg_w(W2)));
+  EXPECT_TRUE(vixl::w3.Is(Arm64Assembler::reg_w(W3)));
+  EXPECT_TRUE(vixl::w4.Is(Arm64Assembler::reg_w(W4)));
+  EXPECT_TRUE(vixl::w5.Is(Arm64Assembler::reg_w(W5)));
+  EXPECT_TRUE(vixl::w6.Is(Arm64Assembler::reg_w(W6)));
+  EXPECT_TRUE(vixl::w7.Is(Arm64Assembler::reg_w(W7)));
+  EXPECT_TRUE(vixl::w8.Is(Arm64Assembler::reg_w(W8)));
+  EXPECT_TRUE(vixl::w9.Is(Arm64Assembler::reg_w(W9)));
+  EXPECT_TRUE(vixl::w10.Is(Arm64Assembler::reg_w(W10)));
+  EXPECT_TRUE(vixl::w11.Is(Arm64Assembler::reg_w(W11)));
+  EXPECT_TRUE(vixl::w12.Is(Arm64Assembler::reg_w(W12)));
+  EXPECT_TRUE(vixl::w13.Is(Arm64Assembler::reg_w(W13)));
+  EXPECT_TRUE(vixl::w14.Is(Arm64Assembler::reg_w(W14)));
+  EXPECT_TRUE(vixl::w15.Is(Arm64Assembler::reg_w(W15)));
+  EXPECT_TRUE(vixl::w16.Is(Arm64Assembler::reg_w(W16)));
+  EXPECT_TRUE(vixl::w17.Is(Arm64Assembler::reg_w(W17)));
+  EXPECT_TRUE(vixl::w18.Is(Arm64Assembler::reg_w(W18)));
+  EXPECT_TRUE(vixl::w19.Is(Arm64Assembler::reg_w(W19)));
+  EXPECT_TRUE(vixl::w20.Is(Arm64Assembler::reg_w(W20)));
+  EXPECT_TRUE(vixl::w21.Is(Arm64Assembler::reg_w(W21)));
+  EXPECT_TRUE(vixl::w22.Is(Arm64Assembler::reg_w(W22)));
+  EXPECT_TRUE(vixl::w23.Is(Arm64Assembler::reg_w(W23)));
+  EXPECT_TRUE(vixl::w24.Is(Arm64Assembler::reg_w(W24)));
+  EXPECT_TRUE(vixl::w25.Is(Arm64Assembler::reg_w(W25)));
+  EXPECT_TRUE(vixl::w26.Is(Arm64Assembler::reg_w(W26)));
+  EXPECT_TRUE(vixl::w27.Is(Arm64Assembler::reg_w(W27)));
+  EXPECT_TRUE(vixl::w28.Is(Arm64Assembler::reg_w(W28)));
+  EXPECT_TRUE(vixl::w29.Is(Arm64Assembler::reg_w(W29)));
+  EXPECT_TRUE(vixl::w30.Is(Arm64Assembler::reg_w(W30)));
+  EXPECT_TRUE(vixl::w31.Is(Arm64Assembler::reg_w(W31)));
+  EXPECT_TRUE(vixl::wzr.Is(Arm64Assembler::reg_w(WZR)));
+
+  // D Registers.
+  EXPECT_TRUE(vixl::d0.Is(Arm64Assembler::reg_d(D0)));
+  EXPECT_TRUE(vixl::d1.Is(Arm64Assembler::reg_d(D1)));
+  EXPECT_TRUE(vixl::d2.Is(Arm64Assembler::reg_d(D2)));
+  EXPECT_TRUE(vixl::d3.Is(Arm64Assembler::reg_d(D3)));
+  EXPECT_TRUE(vixl::d4.Is(Arm64Assembler::reg_d(D4)));
+  EXPECT_TRUE(vixl::d5.Is(Arm64Assembler::reg_d(D5)));
+  EXPECT_TRUE(vixl::d6.Is(Arm64Assembler::reg_d(D6)));
+  EXPECT_TRUE(vixl::d7.Is(Arm64Assembler::reg_d(D7)));
+  EXPECT_TRUE(vixl::d8.Is(Arm64Assembler::reg_d(D8)));
+  EXPECT_TRUE(vixl::d9.Is(Arm64Assembler::reg_d(D9)));
+  EXPECT_TRUE(vixl::d10.Is(Arm64Assembler::reg_d(D10)));
+  EXPECT_TRUE(vixl::d11.Is(Arm64Assembler::reg_d(D11)));
+  EXPECT_TRUE(vixl::d12.Is(Arm64Assembler::reg_d(D12)));
+  EXPECT_TRUE(vixl::d13.Is(Arm64Assembler::reg_d(D13)));
+  EXPECT_TRUE(vixl::d14.Is(Arm64Assembler::reg_d(D14)));
+  EXPECT_TRUE(vixl::d15.Is(Arm64Assembler::reg_d(D15)));
+  EXPECT_TRUE(vixl::d16.Is(Arm64Assembler::reg_d(D16)));
+  EXPECT_TRUE(vixl::d17.Is(Arm64Assembler::reg_d(D17)));
+  EXPECT_TRUE(vixl::d18.Is(Arm64Assembler::reg_d(D18)));
+  EXPECT_TRUE(vixl::d19.Is(Arm64Assembler::reg_d(D19)));
+  EXPECT_TRUE(vixl::d20.Is(Arm64Assembler::reg_d(D20)));
+  EXPECT_TRUE(vixl::d21.Is(Arm64Assembler::reg_d(D21)));
+  EXPECT_TRUE(vixl::d22.Is(Arm64Assembler::reg_d(D22)));
+  EXPECT_TRUE(vixl::d23.Is(Arm64Assembler::reg_d(D23)));
+  EXPECT_TRUE(vixl::d24.Is(Arm64Assembler::reg_d(D24)));
+  EXPECT_TRUE(vixl::d25.Is(Arm64Assembler::reg_d(D25)));
+  EXPECT_TRUE(vixl::d26.Is(Arm64Assembler::reg_d(D26)));
+  EXPECT_TRUE(vixl::d27.Is(Arm64Assembler::reg_d(D27)));
+  EXPECT_TRUE(vixl::d28.Is(Arm64Assembler::reg_d(D28)));
+  EXPECT_TRUE(vixl::d29.Is(Arm64Assembler::reg_d(D29)));
+  EXPECT_TRUE(vixl::d30.Is(Arm64Assembler::reg_d(D30)));
+  EXPECT_TRUE(vixl::d31.Is(Arm64Assembler::reg_d(D31)));
+
+  // S Registers.
+  EXPECT_TRUE(vixl::s0.Is(Arm64Assembler::reg_s(S0)));
+  EXPECT_TRUE(vixl::s1.Is(Arm64Assembler::reg_s(S1)));
+  EXPECT_TRUE(vixl::s2.Is(Arm64Assembler::reg_s(S2)));
+  EXPECT_TRUE(vixl::s3.Is(Arm64Assembler::reg_s(S3)));
+  EXPECT_TRUE(vixl::s4.Is(Arm64Assembler::reg_s(S4)));
+  EXPECT_TRUE(vixl::s5.Is(Arm64Assembler::reg_s(S5)));
+  EXPECT_TRUE(vixl::s6.Is(Arm64Assembler::reg_s(S6)));
+  EXPECT_TRUE(vixl::s7.Is(Arm64Assembler::reg_s(S7)));
+  EXPECT_TRUE(vixl::s8.Is(Arm64Assembler::reg_s(S8)));
+  EXPECT_TRUE(vixl::s9.Is(Arm64Assembler::reg_s(S9)));
+  EXPECT_TRUE(vixl::s10.Is(Arm64Assembler::reg_s(S10)));
+  EXPECT_TRUE(vixl::s11.Is(Arm64Assembler::reg_s(S11)));
+  EXPECT_TRUE(vixl::s12.Is(Arm64Assembler::reg_s(S12)));
+  EXPECT_TRUE(vixl::s13.Is(Arm64Assembler::reg_s(S13)));
+  EXPECT_TRUE(vixl::s14.Is(Arm64Assembler::reg_s(S14)));
+  EXPECT_TRUE(vixl::s15.Is(Arm64Assembler::reg_s(S15)));
+  EXPECT_TRUE(vixl::s16.Is(Arm64Assembler::reg_s(S16)));
+  EXPECT_TRUE(vixl::s17.Is(Arm64Assembler::reg_s(S17)));
+  EXPECT_TRUE(vixl::s18.Is(Arm64Assembler::reg_s(S18)));
+  EXPECT_TRUE(vixl::s19.Is(Arm64Assembler::reg_s(S19)));
+  EXPECT_TRUE(vixl::s20.Is(Arm64Assembler::reg_s(S20)));
+  EXPECT_TRUE(vixl::s21.Is(Arm64Assembler::reg_s(S21)));
+  EXPECT_TRUE(vixl::s22.Is(Arm64Assembler::reg_s(S22)));
+  EXPECT_TRUE(vixl::s23.Is(Arm64Assembler::reg_s(S23)));
+  EXPECT_TRUE(vixl::s24.Is(Arm64Assembler::reg_s(S24)));
+  EXPECT_TRUE(vixl::s25.Is(Arm64Assembler::reg_s(S25)));
+  EXPECT_TRUE(vixl::s26.Is(Arm64Assembler::reg_s(S26)));
+  EXPECT_TRUE(vixl::s27.Is(Arm64Assembler::reg_s(S27)));
+  EXPECT_TRUE(vixl::s28.Is(Arm64Assembler::reg_s(S28)));
+  EXPECT_TRUE(vixl::s29.Is(Arm64Assembler::reg_s(S29)));
+  EXPECT_TRUE(vixl::s30.Is(Arm64Assembler::reg_s(S30)));
+  EXPECT_TRUE(vixl::s31.Is(Arm64Assembler::reg_s(S31)));
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6043c17..6a3efc5 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -863,6 +863,10 @@
   EmitOperand(dst, Operand(src));
 }
 
+void X86Assembler::xorl(Register dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitComplex(6, Operand(dst), imm);
+}
 
 void X86Assembler::addl(Register reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index f8fc4c0..057c80a 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -354,6 +354,7 @@
   void orl(Register dst, Register src);
 
   void xorl(Register dst, Register src);
+  void xorl(Register dst, const Immediate& imm);
 
   void addl(Register dst, Register src);
   void addl(Register reg, const Immediate& imm);
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index ca904bc..43c0ad6 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -63,8 +63,8 @@
   LR  = 30,
   SP  = 31,     // SP is X31 and overlaps with XRZ but we encode it as a
                 // special register, due to the different instruction semantics.
-  XZR = 32,     // FIXME This needs to be reconciled with the JNI assembler.
-  kNumberOfCoreRegisters = 32,
+  XZR = 32,
+  kNumberOfCoreRegisters = 33,
   kNoRegister = -1,
 };
 std::ostream& operator<<(std::ostream& os, const Register& rhs);
@@ -103,7 +103,6 @@
   W29 = 29,
   W30 = 30,
   W31 = 31,
-  WSP = 31,
   WZR = 31,
   kNumberOfWRegisters = 32,
   kNoWRegister = -1,
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 4b881f6..b50c098 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -76,6 +76,7 @@
   kClassLinkerClassesLock,
   kBreakpointLock,
   kMonitorLock,
+  kMonitorListLock,
   kThreadListLock,
   kBreakpointInvokeLock,
   kDeoptimizationLock,
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 920741f..cbefa6a 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -2005,6 +2005,61 @@
   }
 }
 
+size_t RosAlloc::ReleasePages() {
+  VLOG(heap) << "RosAlloc::ReleasePages()";
+  DCHECK(!DoesReleaseAllPages());
+  Thread* self = Thread::Current();
+  size_t reclaimed_bytes = 0;
+  size_t i = 0;
+  while (true) {
+    MutexLock mu(self, lock_);
+    // Check the page map size which might have changed due to grow/shrink.
+    size_t pm_end = page_map_size_;
+    if (i >= pm_end) {
+      // Reached the end.
+      break;
+    }
+    byte pm = page_map_[i];
+    switch (pm) {
+      case kPageMapEmpty: {
+        // The start of a free page run. Release pages.
+        FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+        DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+        size_t fpr_size = fpr->ByteSize(this);
+        DCHECK(IsAligned<kPageSize>(fpr_size));
+        byte* start = reinterpret_cast<byte*>(fpr);
+        if (kIsDebugBuild) {
+          // In the debug build, the first page of a free page run
+          // contains a magic number for debugging. Exclude it.
+          start = reinterpret_cast<byte*>(fpr) + kPageSize;
+        }
+        byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
+        CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
+        reclaimed_bytes += fpr_size;
+        size_t num_pages = fpr_size / kPageSize;
+        if (kIsDebugBuild) {
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            DCHECK_EQ(page_map_[j], kPageMapEmpty);
+          }
+        }
+        i += num_pages;
+        DCHECK_LE(i, pm_end);
+        break;
+      }
+      case kPageMapLargeObject:      // Fall through.
+      case kPageMapLargeObjectPart:  // Fall through.
+      case kPageMapRun:              // Fall through.
+      case kPageMapRunPart:          // Fall through.
+        ++i;
+        break;  // Skip.
+      default:
+        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        break;
+    }
+  }
+  return reclaimed_bytes;
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 0b4b189..5d9d75c 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -539,6 +539,8 @@
   void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
                   void* arg)
       LOCKS_EXCLUDED(lock_);
+  // Release empty pages.
+  size_t ReleasePages() LOCKS_EXCLUDED(lock_);
   // Returns the current footprint.
   size_t Footprint() LOCKS_EXCLUDED(lock_);
   // Returns the current capacity, maximum footprint.
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 07951e0..82340f5 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -201,7 +201,7 @@
 
 uint64_t GarbageCollector::GetEstimatedLastIterationThroughput() const {
   // Add 1ms to prevent possible division by 0.
-  return (freed_bytes_ * 1000) / (NsToMs(GetDurationNs()) + 1);
+  return (static_cast<uint64_t>(freed_bytes_) * 1000) / (NsToMs(GetDurationNs()) + 1);
 }
 
 }  // namespace collector
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 915e54f..e3fa834 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -914,8 +914,16 @@
   // Transition the collector if the desired collector type is not the same as the current
   // collector type.
   TransitionCollector(desired_collector_type);
-  // Do a heap trim if it is needed.
-  Trim();
+  if (!CareAboutPauseTimes()) {
+    // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
+    // about pauses.
+    Runtime* runtime = Runtime::Current();
+    runtime->GetThreadList()->SuspendAll();
+    runtime->GetMonitorList()->DeflateMonitors();
+    runtime->GetThreadList()->ResumeAll();
+    // Do a heap trim if it is needed.
+    Trim();
+  }
 }
 
 void Heap::Trim() {
@@ -2663,6 +2671,10 @@
 }
 
 void Heap::RequestHeapTrim() {
+  // Request a heap trim only if we do not currently care about pause times.
+  if (CareAboutPauseTimes()) {
+    return;
+  }
   // GC completed and now we must decide whether to request a heap trim (advising pages back to the
   // kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
   // a space it will hold its lock and can become a cause of jank.
@@ -2684,21 +2696,17 @@
     // as we don't hold the lock while requesting the trim).
     return;
   }
-
-  // Request a heap trim only if we do not currently care about pause times.
-  if (!CareAboutPauseTimes()) {
-    {
-      MutexLock mu(self, *heap_trim_request_lock_);
-      if (last_trim_time_ + kHeapTrimWait >= NanoTime()) {
-        // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one
-        // just yet.
-        return;
-      }
-      heap_trim_request_pending_ = true;
+  {
+    MutexLock mu(self, *heap_trim_request_lock_);
+    if (last_trim_time_ + kHeapTrimWait >= NanoTime()) {
+      // We have done a heap trim in the last kHeapTrimWait nanosecs, don't request another one
+      // just yet.
+      return;
     }
-    // Notify the daemon thread which will actually do the heap trim.
-    SignalHeapTrimDaemon(self);
+    heap_trim_request_pending_ = true;
   }
+  // Notify the daemon thread which will actually do the heap trim.
+  SignalHeapTrimDaemon(self);
 }
 
 void Heap::SignalHeapTrimDaemon(Thread* self) {
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 012267b..5c5e7f8 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -222,6 +222,7 @@
 }
 
 size_t RosAllocSpace::Trim() {
+  VLOG(heap) << "RosAllocSpace::Trim() ";
   {
     MutexLock mu(Thread::Current(), lock_);
     // Trim to release memory at the end of the space.
@@ -229,10 +230,7 @@
   }
   // Attempt to release pages if it does not release all empty pages.
   if (!rosalloc_->DoesReleaseAllPages()) {
-    VLOG(heap) << "RosAllocSpace::Trim() ";
-    size_t reclaimed = 0;
-    InspectAllRosAlloc(DlmallocMadviseCallback, &reclaimed, false);
-    return reclaimed;
+    return rosalloc_->ReleasePages();
   }
   return 0;
 }
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bcaf8ec..bbc7dd0 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -205,7 +205,7 @@
 void Monitor::Lock(Thread* self) {
   MutexLock mu(self, monitor_lock_);
   while (true) {
-    if (owner_ == NULL) {  // Unowned.
+    if (owner_ == nullptr) {  // Unowned.
       owner_ = self;
       CHECK_EQ(lock_count_, 0);
       // When debugging, save the current monitor holder for future
@@ -223,15 +223,15 @@
     uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
     mirror::ArtMethod* owners_method = locking_method_;
     uint32_t owners_dex_pc = locking_dex_pc_;
+    // Do this before releasing the lock so that we don't get deflated.
+    ++num_waiters_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
     {
       ScopedThreadStateChange tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
       self->SetMonitorEnterObject(obj_);
       MutexLock mu2(self, monitor_lock_);  // Reacquire monitor_lock_ without mutator_lock_ for Wait.
       if (owner_ != NULL) {  // Did the owner_ give the lock up?
-        ++num_waiters_;
         monitor_contenders_.Wait(self);  // Still contended so wait.
-        --num_waiters_;
         // Woken from contention.
         if (log_contention) {
           uint64_t wait_ms = MilliTime() - wait_start_ms;
@@ -252,6 +252,7 @@
       self->SetMonitorEnterObject(nullptr);
     }
     monitor_lock_.Lock(self);  // Reacquire locks in order.
+    --num_waiters_;
   }
 }
 
@@ -431,6 +432,7 @@
    * not order sensitive as we hold the pthread mutex.
    */
   AppendToWaitSet(self);
+  ++num_waiters_;
   int prev_lock_count = lock_count_;
   lock_count_ = 0;
   owner_ = NULL;
@@ -507,6 +509,7 @@
   lock_count_ = prev_lock_count;
   locking_method_ = saved_method;
   locking_dex_pc_ = saved_dex_pc;
+  --num_waiters_;
   RemoveFromWaitSet(self);
 
   if (was_interrupted) {
@@ -575,8 +578,12 @@
   // If the lock isn't an inflated monitor, then we don't need to deflate anything.
   if (lw.GetState() == LockWord::kFatLocked) {
     Monitor* monitor = lw.FatLockMonitor();
-    CHECK(monitor != nullptr);
+    DCHECK(monitor != nullptr);
     MutexLock mu(self, monitor->monitor_lock_);
+    // Can't deflate if we have anybody waiting on the CV.
+    if (monitor->num_waiters_ > 0) {
+      return false;
+    }
     Thread* owner = monitor->owner_;
     if (owner != nullptr) {
       // Can't deflate if we are locked and have a hash code.
@@ -587,17 +594,16 @@
       if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
         return false;
       }
-      // Can't deflate if we have anybody waiting on the CV.
-      if (monitor->num_waiters_ > 0) {
-        return false;
-      }
       // Deflate to a thin lock.
-      obj->SetLockWord(LockWord::FromThinLockId(owner->GetTid(), monitor->lock_count_));
+      obj->SetLockWord(LockWord::FromThinLockId(owner->GetThreadId(), monitor->lock_count_));
+      VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / " << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
       obj->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode()));
+      VLOG(monitor) << "Deflated " << obj << " to hash monitor " << monitor->GetHashCode();
     } else {
       // No lock and no hash, just put an empty lock word inside the object.
       obj->SetLockWord(LockWord());
+      VLOG(monitor) << "Deflated" << obj << " to empty lock word";
     }
     // The monitor is deflated, mark the object as nullptr so that we know to delete it during the
     // next GC.
@@ -1054,7 +1060,7 @@
 }
 
 MonitorList::MonitorList()
-    : allow_new_monitors_(true), monitor_list_lock_("MonitorList lock"),
+    : allow_new_monitors_(true), monitor_list_lock_("MonitorList lock", kMonitorListLock),
       monitor_add_condition_("MonitorList disallow condition", monitor_list_lock_) {
 }
 
@@ -1103,6 +1109,22 @@
   }
 }
 
+static mirror::Object* MonitorDeflateCallback(mirror::Object* object, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (Monitor::Deflate(reinterpret_cast<Thread*>(arg), object)) {
+    DCHECK_NE(object->GetLockWord().GetState(), LockWord::kFatLocked);
+    // If we deflated, return nullptr so that the monitor gets removed from the array.
+    return nullptr;
+  }
+  return object;  // Monitor was not deflated.
+}
+
+void MonitorList::DeflateMonitors() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  SweepMonitorList(MonitorDeflateCallback, reinterpret_cast<Thread*>(self));
+}
+
 MonitorInfo::MonitorInfo(mirror::Object* obj) : owner_(NULL), entry_count_(0) {
   DCHECK(obj != NULL);
 
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 55504b5..c459278 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -224,9 +224,11 @@
   void Add(Monitor* m);
 
   void SweepMonitorList(IsMarkedCallback* callback, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void DisallowNewMonitors();
-  void AllowNewMonitors();
+      LOCKS_EXCLUDED(monitor_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DisallowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
+  void AllowNewMonitors() LOCKS_EXCLUDED(monitor_list_lock_);
+  void DeflateMonitors() LOCKS_EXCLUDED(monitor_list_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
   bool allow_new_monitors_ GUARDED_BY(monitor_list_lock_);
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index bab0604..15a5779 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include <unistd.h>
+#include <algorithm>
 #include <fcntl.h>
+#include <set>
+#include <unistd.h>
 
 #include "base/logging.h"
 #include "class_linker.h"
@@ -30,6 +32,7 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "os.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -230,13 +233,31 @@
   close(fd2);
 }
 
+static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) {
+#ifndef HAVE_ANDROID_OS
+  return defaultValue;
+#else
+  char buf[PROP_VALUE_MAX];
+  char* endptr;
+
+  property_get(property, buf, "");
+  double value = strtod(buf, &endptr);
+
+  if (value == 0 && endptr == buf) {
+    value = defaultValue;
+  } else if (value < minValue || value > maxValue) {
+    value = defaultValue;
+  }
+  return value;
+#endif
+}
+
 static jboolean DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
     jstring javaPkgname, jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
   const bool kDebugLogging = true;  // Logging useful for debugging.
 
   ScopedUtfChars filename(env, javaFilename);
-
   if ((filename.c_str() == nullptr) || !OS::FileExists(filename.c_str())) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
@@ -282,7 +303,6 @@
     struct stat profstat, prevstat;
     int e1 = stat(profile_file.c_str(), &profstat);
     int e2 = stat(prev_profile_file.c_str(), &prevstat);
-
     if (e1 < 0) {
       // No profile file, need to run dex2oat
       if (kDebugLogging) {
@@ -290,48 +310,47 @@
       }
       return JNI_TRUE;
     }
+
     if (e2 == 0) {
       // There is a previous profile file.  Check if the profile has changed significantly.
-      // Let's use the file size as a proxy for significance.  If the new profile is 10%
-      // different in size than the the old profile then we run dex2oat.
-      double newsize = profstat.st_size;
-      double oldsize = prevstat.st_size;
-      bool need_profile = false;
+      // A change in profile is considered significant if X% (change_thr property) of the top K%
+      // (compile_thr property) samples has changed.
 
-      double ratio = 0;     // If the old file was empty and the new one not
-      if (oldsize > 0 && newsize > 0) {
-        ratio = newsize / oldsize;
-      } else if (oldsize == 0 && newsize > 0) {
-        need_profile = true;
-      } else if (oldsize > 0 && newsize == 0) {
-        // Unlikely to happen, but cover all the bases.
-        need_profile = true;
+      double topKThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.compile_thr", 10.0, 90.0, 90.0);
+      double changeThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.change_thr", 1.0, 90.0, 10.0);
+      double changePercent = 0.0;
+      std::set<std::string> newTopK, oldTopK;
+      bool newOk = ProfileHelper::LoadTopKSamples(newTopK, profile_file, topKThreshold);
+      bool oldOk = ProfileHelper::LoadTopKSamples(oldTopK, prev_profile_file, topKThreshold);
+      if (!newOk || !oldOk) {
+        if (kDebugLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: "
+                    << (newOk ?  "" : profile_file) << " " << (oldOk ? "" : prev_profile_file);
+        }
+      } else if (newTopK.empty()) {
+        if (kDebugLogging && kVerboseLogging) {
+          LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file;
+        }
+        // If the new topK is empty we shouldn't optimize so we leave the changePercent at 0.0.
+      } else {
+        std::set<std::string> diff;
+        std::set_difference(newTopK.begin(), newTopK.end(), oldTopK.begin(), oldTopK.end(),
+          std::inserter(diff, diff.end()));
+        // TODO: consider using the usedPercentage instead of the plain diff count.
+        changePercent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(newTopK.size());
+        if (kDebugLogging && kVerboseLogging) {
+          std::set<std::string>::iterator end = diff.end();
+          for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
+            LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it;
+          }
+        }
       }
 
-      double significant_difference = 10.0;
-#ifdef HAVE_ANDROID_OS
-      // Switch off profiler if the dalvik.vm.profiler property has value 0.
-      char buf[PROP_VALUE_MAX];
-      property_get("dalvik.vm.profiler.dex2oat.threshold", buf, "10.0");
-      significant_difference = strtod(buf, nullptr);
-
-      // Something reasonable?
-      if (significant_difference < 1.0 || significant_difference > 90.0) {
-        significant_difference = 10.0;
-      }
-#endif      // The percentage difference that we consider as being significant.
-      double diff_hwm = 1.0 + significant_difference/10.0;
-      double diff_lwm = 1.0 - significant_difference/10.0;
-
-      if (ratio > diff_hwm || ratio < diff_lwm) {
-        need_profile = true;
-      }
-
-      if (need_profile) {
+      if (changePercent > changeThreshold) {
         if (kDebugLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file <<
-          " is significantly different from old profile file " << prev_profile_file << " (new: " <<
-          newsize << ", old: " << oldsize << ", ratio: " << ratio << ")";
+          " is significantly different from old profile file " << prev_profile_file << " (top "
+          << topKThreshold << "% samples changed in proportion of " << changePercent << "%)";
         }
         if (!defer) {
           CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 08a674f..bc8f51f 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -194,6 +194,7 @@
   profile_duration_s_ = 20;          // Seconds.
   profile_interval_us_ = 500;       // Microseconds.
   profile_backoff_coefficient_ = 2.0;
+  profile_start_immediately_ = true;
   profile_clock_source_ = kDefaultProfilerClockSource;
 
   verify_ = true;
@@ -509,6 +510,8 @@
       if (!ParseDouble(option, ':', 1.0, 10.0, &profile_backoff_coefficient_)) {
         return false;
       }
+    } else if (option == "-Xprofile-start-lazy") {
+      profile_start_immediately_ = false;
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 416bc78..126096a 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -79,6 +79,7 @@
   uint32_t profile_duration_s_;
   uint32_t profile_interval_us_;
   double profile_backoff_coefficient_;
+  bool profile_start_immediately_;
   ProfilerClockSource profile_clock_source_;
   bool verify_;
 
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 4770a54..223fe87 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -16,6 +16,7 @@
 
 #include "profiler.h"
 
+#include <fstream>
 #include <sys/uio.h>
 #include <sys/file.h>
 
@@ -579,5 +580,101 @@
     previous_[methodname] = PreviousValue(count, size);
   }
 }
-}  // namespace art
 
+bool ProfileHelper::LoadProfileMap(ProfileMap& profileMap, const std::string& fileName) {
+  LOG(VERBOSE) << "reading profile file " << fileName;
+  struct stat st;
+  int err = stat(fileName.c_str(), &st);
+  if (err == -1) {
+    LOG(VERBOSE) << "not found";
+    return false;
+  }
+  if (st.st_size == 0) {
+    return true;  // empty profiles are ok.
+  }
+  std::ifstream in(fileName.c_str());
+  if (!in) {
+    LOG(VERBOSE) << "profile file " << fileName << " exists but can't be opened";
+    LOG(VERBOSE) << "file owner: " << st.st_uid << ":" << st.st_gid;
+    LOG(VERBOSE) << "me: " << getuid() << ":" << getgid();
+    LOG(VERBOSE) << "file permissions: " << std::oct << st.st_mode;
+    LOG(VERBOSE) << "errno: " << errno;
+    return false;
+  }
+  // The first line contains summary information.
+  std::string line;
+  std::getline(in, line);
+  if (in.eof()) {
+    return false;
+  }
+  std::vector<std::string> summary_info;
+  Split(line, '/', summary_info);
+  if (summary_info.size() != 3) {
+    // Bad summary info.  It should be count/total/bootpath.
+    return false;
+  }
+  // This is the number of hits in all methods.
+  uint32_t total_count = 0;
+  for (int i = 0 ; i < 3; ++i) {
+    total_count += atoi(summary_info[i].c_str());
+  }
+
+  // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
+  // Store the info in descending order given by the most used methods.
+  typedef std::set<std::pair<int, std::vector<std::string>>> ProfileSet;
+  ProfileSet countSet;
+  while (!in.eof()) {
+    std::getline(in, line);
+    if (in.eof()) {
+      break;
+    }
+    std::vector<std::string> info;
+    Split(line, '/', info);
+    if (info.size() != 3) {
+      // Malformed.
+      break;
+    }
+    int count = atoi(info[1].c_str());
+    countSet.insert(std::make_pair(-count, info));
+  }
+
+  uint32_t curTotalCount = 0;
+  ProfileSet::iterator end = countSet.end();
+  const ProfileData* prevData = nullptr;
+  for (ProfileSet::iterator it = countSet.begin(); it != end ; it++) {
+    const std::string& methodname = it->second[0];
+    uint32_t count = -it->first;
+    uint32_t size = atoi(it->second[2].c_str());
+    double usedPercent = (count * 100.0) / total_count;
+
+    curTotalCount += count;
+    // Methods with the same count should be part of the same top K percentage bucket.
+    double topKPercentage = (prevData != nullptr) && (prevData->GetCount() == count)
+      ? prevData->GetTopKUsedPercentage()
+      : 100 * static_cast<double>(curTotalCount) / static_cast<double>(total_count);
+
+    // Add it to the profile map.
+    ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
+    profileMap[methodname] = curData;
+    prevData = &curData;
+  }
+  return true;
+}
+
+bool ProfileHelper::LoadTopKSamples(std::set<std::string>& topKSamples, const std::string& fileName,
+                                    double topKPercentage) {
+  ProfileMap profileMap;
+  bool loadOk = LoadProfileMap(profileMap, fileName);
+  if (!loadOk) {
+    return false;
+  }
+  ProfileMap::iterator end = profileMap.end();
+  for (ProfileMap::iterator it = profileMap.begin(); it != end; it++) {
+    if (it->second.GetTopKUsedPercentage() < topKPercentage) {
+      topKSamples.insert(it->first);
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/runtime/profiler.h b/runtime/profiler.h
index b03b170..31fdc79 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -39,7 +39,6 @@
 }  // namespace mirror
 class Thread;
 
-
 //
 // This class holds all the results for all runs of the profiler.  It also
 // counts the number of null methods (where we can't determine the method) and
@@ -63,7 +62,7 @@
  private:
   uint32_t Hash(mirror::ArtMethod* method);
   static constexpr int kHashSize = 17;
-  Mutex& lock_;         // Reference to the main profiler lock - we don't need two of them.
+  Mutex& lock_;                   // Reference to the main profiler lock - we don't need two of them.
   uint32_t num_samples_;          // Total number of samples taken.
   uint32_t num_null_methods_;     // Number of samples where can don't know the method.
   uint32_t num_boot_methods_;     // Number of samples in the boot path.
@@ -189,6 +188,54 @@
   DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
 };
 
+// TODO: incorporate in ProfileSampleResults
+
+// Profile data.  This is generated from previous runs of the program and stored
+// in a file.  It is used to determine whether to compile a particular method or not.
+class ProfileData {
+ public:
+  ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
+  ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
+    double usedPercent, double topKUsedPercentage) :
+    method_name_(method_name), count_(count), method_size_(method_size),
+    usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
+    // TODO: currently method_size_ and count_ are unused.
+    UNUSED(method_size_);
+    UNUSED(count_);
+  }
+
+  bool IsAbove(double v) const { return usedPercent_ >= v; }
+  double GetUsedPercent() const { return usedPercent_; }
+  uint32_t GetCount() const { return count_; }
+  double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
+
+ private:
+  std::string method_name_;    // Method name.
+  uint32_t count_;             // Number of times it has been called.
+  uint32_t method_size_;       // Size of the method on dex instructions.
+  double usedPercent_;         // Percentage of how many times this method was called.
+  double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
+                               // methods this methods belongs to.
+};
+
+// Profile data is stored in a map, indexed by the full method name.
+typedef std::map<std::string, ProfileData> ProfileMap;
+
+class ProfileHelper {
+ private:
+  ProfileHelper();
+
+ public:
+  // Read the profile data from the given file.  Calculates the percentage for each method.
+  // Returns false if there was no profile file or it was malformed.
+  static bool LoadProfileMap(ProfileMap& profileMap, const std::string& fileName);
+
+  // Read the profile data from the given file and computes the group that comprise
+  // topKPercentage of the total used methods.
+  static bool LoadTopKSamples(std::set<std::string>& topKMethods, const std::string& fileName,
+                              double topKPercentage);
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index a9072d8..8bd8dba 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -135,6 +135,12 @@
   }
 }
 
+bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) {
+  const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.dex_method_index);
+  const char* method_name = ref.dex_file->GetMethodName(method_id);
+  return strncmp(method_name, "access$", strlen("access$")) == 0;
+}
+
 bool InlineMethodAnalyser::AnalyseReturnMethod(const DexFile::CodeItem* code_item,
                                                InlineMethod* result) {
   const Instruction* return_instruction = Instruction::At(code_item->insns_);
@@ -218,13 +224,24 @@
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
   DCHECK_GE(object_reg, arg_start);
   DCHECK_LT(object_reg, code_item->registers_size_);
+  uint32_t object_arg = object_reg - arg_start;
+
   DCHECK_LT(opcode == Instruction::IGET_WIDE ? dst_reg + 1 : dst_reg, code_item->registers_size_);
   if (dst_reg != return_reg) {
     return false;  // Not returning the value retrieved by IGET?
   }
 
-  if ((verifier->GetAccessFlags() & kAccStatic) != 0 || object_reg != arg_start) {
-    // TODO: Support inlining IGET on other register than "this".
+  if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) {
+    // TODO: Implement inlining of IGET on non-"this" registers (needs correct stack trace for NPE).
+    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
+    if (!IsSyntheticAccessor(verifier->GetMethodReference())) {
+      return false;
+    }
+  }
+
+  // InlineIGetIPutData::object_arg is only 4 bits wide.
+  static constexpr uint16_t kMaxObjectArg = 15u;
+  if (object_arg > kMaxObjectArg) {
     return false;
   }
 
@@ -236,10 +253,10 @@
     result->opcode = kInlineOpIGet;
     result->flags = kInlineSpecial;
     data->op_variant = IGetVariant(opcode);
-    data->object_arg = object_reg - arg_start;  // Allow IGET on any register, not just "this".
-    data->src_arg = 0;
-    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
-    data->reserved = 0;
+    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u;
+    data->object_arg = object_arg;  // Allow IGET on any register, not just "this".
+    data->src_arg = 0u;
+    data->return_arg_plus1 = 0u;
   }
   return true;
 }
@@ -253,26 +270,45 @@
 
   const Instruction* return_instruction = instruction->Next();
   Instruction::Code return_opcode = return_instruction->Opcode();
+  uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
+  uint16_t return_arg_plus1 = 0u;
   if (return_opcode != Instruction::RETURN_VOID) {
-    // TODO: Support returning an argument.
-    // This is needed by builder classes and generated accessor setters.
-    //    builder.setX(value): iput value, this, fieldX; return-object this;
-    //    object.access$nnn(value): iput value, this, fieldX; return value;
-    // Use InlineIGetIPutData::reserved to hold the information.
-    return false;
+    if (return_opcode != Instruction::RETURN &&
+        return_opcode != Instruction::RETURN_OBJECT &&
+        return_opcode != Instruction::RETURN_WIDE) {
+      return false;
+    }
+    // Returning an argument.
+    uint32_t return_reg = return_instruction->VRegA_11x();
+    DCHECK_GE(return_reg, arg_start);
+    DCHECK_LT(return_opcode == Instruction::RETURN_WIDE ? return_reg + 1u : return_reg,
+              code_item->registers_size_);
+    return_arg_plus1 = return_reg - arg_start + 1u;
   }
 
   uint32_t src_reg = instruction->VRegA_22c();
   uint32_t object_reg = instruction->VRegB_22c();
   uint32_t field_idx = instruction->VRegC_22c();
-  uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
   DCHECK_GE(object_reg, arg_start);
   DCHECK_LT(object_reg, code_item->registers_size_);
   DCHECK_GE(src_reg, arg_start);
   DCHECK_LT(opcode == Instruction::IPUT_WIDE ? src_reg + 1 : src_reg, code_item->registers_size_);
+  uint32_t object_arg = object_reg - arg_start;
+  uint32_t src_arg = src_reg - arg_start;
 
-  if ((verifier->GetAccessFlags() & kAccStatic) != 0 || object_reg != arg_start) {
-    // TODO: Support inlining IPUT on other register than "this".
+  if ((verifier->GetAccessFlags() & kAccStatic) != 0u || object_arg != 0u) {
+    // TODO: Implement inlining of IPUT on non-"this" registers (needs correct stack trace for NPE).
+    // Allow synthetic accessors. We don't care about losing their stack frame in NPE.
+    if (!IsSyntheticAccessor(verifier->GetMethodReference())) {
+      return false;
+    }
+  }
+
+  // InlineIGetIPutData::object_arg/src_arg/return_arg_plus1 are each only 4 bits wide.
+  static constexpr uint16_t kMaxObjectArg = 15u;
+  static constexpr uint16_t kMaxSrcArg = 15u;
+  static constexpr uint16_t kMaxReturnArgPlus1 = 15u;
+  if (object_arg > kMaxObjectArg || src_arg > kMaxSrcArg || return_arg_plus1 > kMaxReturnArgPlus1) {
     return false;
   }
 
@@ -284,10 +320,10 @@
     result->opcode = kInlineOpIPut;
     result->flags = kInlineSpecial;
     data->op_variant = IPutVariant(opcode);
-    data->object_arg = object_reg - arg_start;  // Allow IPUT on any register, not just "this".
-    data->src_arg = src_reg - arg_start;
-    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0;
-    data->reserved = 0;
+    data->method_is_static = (verifier->GetAccessFlags() & kAccStatic) != 0u ? 1u : 0u;
+    data->object_arg = object_arg;  // Allow IPUT on any register, not just "this".
+    data->src_arg = src_arg;
+    data->return_arg_plus1 = return_arg_plus1;
   }
   return true;
 }
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 8e1a408..ddee89b 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -21,6 +21,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
+#include "method_reference.h"
 
 /*
  * NOTE: This code is part of the quick compiler. It lives in the runtime
@@ -98,10 +99,10 @@
   // opcode-Instruction::IPUT for IPUTs. This is because the runtime
   // doesn't know the OpSize enumeration.
   uint16_t op_variant : 3;
+  uint16_t method_is_static : 1;
   uint16_t object_arg : 4;
   uint16_t src_arg : 4;  // iput only
-  uint16_t method_is_static : 1;
-  uint16_t reserved : 4;
+  uint16_t return_arg_plus1 : 4;  // iput only, method argument to return + 1, 0 = return void.
   uint16_t field_idx;
   uint32_t is_volatile : 1;
   uint32_t field_offset : 31;
@@ -156,6 +157,9 @@
     return opcode - Instruction::IPUT;
   }
 
+  // Determines whether the method is a synthetic accessor (method name starts with "access$").
+  static bool IsSyntheticAccessor(MethodReference ref);
+
  private:
   static bool AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
   static bool AnalyseConstMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index edc3b33..a19fa53 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -121,6 +121,7 @@
       profile_duration_s_(0),
       profile_interval_us_(0),
       profile_backoff_coefficient_(0),
+      profile_start_immediately_(true),
       method_trace_(false),
       method_trace_file_size_(0),
       instrumentation_(),
@@ -391,7 +392,7 @@
     if (fd >= 0) {
       close(fd);
     }
-    StartProfiler(profile_output_filename_.c_str(), "", true);
+    StartProfiler(profile_output_filename_.c_str(), "");
   }
 
   return true;
@@ -616,6 +617,7 @@
   profile_duration_s_ = options->profile_duration_s_;
   profile_interval_us_ = options->profile_interval_us_;
   profile_backoff_coefficient_ = options->profile_backoff_coefficient_;
+  profile_start_immediately_ = options->profile_start_immediately_;
   profile_ = options->profile_;
   profile_output_filename_ = options->profile_output_filename_;
   // TODO: move this to just be an Trace::Start argument
@@ -1143,10 +1145,9 @@
   method_verifiers_.erase(it);
 }
 
-void Runtime::StartProfiler(const char* appDir, const char* procName, bool startImmediately) {
+void Runtime::StartProfiler(const char* appDir, const char* procName) {
   BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir,
-      procName, profile_interval_us_,
-      profile_backoff_coefficient_, startImmediately);
+      procName, profile_interval_us_, profile_backoff_coefficient_, profile_start_immediately_);
 }
 
 // Transaction support.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e94072c..462711e 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -374,7 +374,7 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void StartProfiler(const char* appDir, const char* procName, bool startImmediately = false);
+  void StartProfiler(const char* appDir, const char* procName);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -542,6 +542,8 @@
   uint32_t profile_duration_s_;         // Run profile for n seconds.
   uint32_t profile_interval_us_;        // Microseconds between samples.
   double profile_backoff_coefficient_;  // Coefficient to exponential backoff.
+  bool profile_start_immediately_;      // Whether the profile should start upon app
+                                        // startup or be delayed by some random offset.
 
   bool method_trace_;
   std::string method_trace_file_;
diff --git a/test/083-compiler-regressions/expected.txt b/test/083-compiler-regressions/expected.txt
index c1e30bc..db50300 100644
--- a/test/083-compiler-regressions/expected.txt
+++ b/test/083-compiler-regressions/expected.txt
@@ -21,6 +21,10 @@
 wideGetterSetterTest passes
 wideIdentityTest passes
 returnConstantTest passes
+setterTestWithReturnArgIgnoreReturn passes
+setterTestWithReturnArgUseReturn passes
+wideSetterTestWithReturnArgIgnoreReturn passes
+wideSetterTestWithReturnArgUseReturn passes
 LVNTests.testNPE1 passes
 LVNTests.testNPE2 passes
 longDivTest passes
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 586ff87..d32c037 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -43,6 +43,10 @@
         wideGetterSetterTest();
         wideIdentityTest();
         returnConstantTest();
+        setterTestWithReturnArgIgnoreReturn();
+        setterTestWithReturnArgUseReturn();
+        wideSetterTestWithReturnArgIgnoreReturn();
+        wideSetterTestWithReturnArgUseReturn();
         LVNTests.testNPE1();
         LVNTests.testNPE2();
         ZeroTests.longDivTest();
@@ -179,6 +183,576 @@
         }
     }
 
+    static void setterTestWithReturnArgIgnoreReturn() {
+        Foo foo = new Foo();
+        int sum = foo.getBar0();
+        sum += foo.getBar0();
+        foo.setBar1ReturnThis(sum);
+        sum += foo.getBar0();
+        foo.setBar2ReturnThis(1,sum);
+        sum += foo.getBar0();
+        foo.setBar3ReturnThis(1,2,sum);
+        sum += foo.getBar0();
+        foo.setBar4ReturnThis(1,2,3,sum);
+        sum += foo.getBar0();
+        foo.setBar5ReturnThis(1,2,3,4,sum);
+        sum += foo.getBar0();
+        foo.setBar1ReturnBarArg(sum);
+        sum += foo.getBar0();
+        foo.setBar2ReturnBarArg(1,sum);
+        sum += foo.getBar0();
+        foo.setBar3ReturnBarArg(1,2,sum);
+        sum += foo.getBar0();
+        foo.setBar4ReturnBarArg(1,2,3,sum);
+        sum += foo.getBar0();
+        foo.setBar5ReturnBarArg(1,2,3,4,sum);
+        sum += foo.getBar0();
+        foo.setBar2ReturnDummyArg1(1,sum);
+        sum += foo.getBar0();
+        foo.setBar3ReturnDummyArg2(1,2,sum);
+        sum += foo.getBar0();
+        foo.setBar4ReturnDummyArg3(1,2,3,sum);
+        sum += foo.getBar0();
+        foo.setBar5ReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.getBar0();
+        Foo nullFoo = Foo.getNullFoo();
+        try {
+            nullFoo.setBar1ReturnThis(sum);
+        } catch(NullPointerException npe) {
+            sum += 404;
+        }
+        try {
+            nullFoo.setBar2ReturnThis(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 2 * 404;
+        }
+        try {
+            nullFoo.setBar3ReturnThis(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 3 * 404;
+        }
+        try {
+            nullFoo.setBar4ReturnThis(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 4 * 404;
+        }
+        try {
+            nullFoo.setBar5ReturnThis(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 5 * 404;
+        }
+        try {
+            nullFoo.setBar1ReturnBarArg(sum);
+        } catch(NullPointerException npe) {
+            sum += 6 * 404;
+        }
+        try {
+            nullFoo.setBar2ReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 7 * 404;
+        }
+        try {
+            nullFoo.setBar3ReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 8 * 404;
+        }
+        try {
+            nullFoo.setBar4ReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 9 * 404;
+        }
+        try {
+            nullFoo.setBar5ReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 10 * 404;
+        }
+        try {
+            nullFoo.setBar2ReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 11 * 404;
+        }
+        try {
+            nullFoo.setBar3ReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 12 * 404;
+        }
+        try {
+            nullFoo.setBar4ReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 13 * 404;
+        }
+        try {
+            nullFoo.setBar5ReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 14 * 404;
+        }
+        int expected = (1234 << 15) + 404 * (15 * 14 / 2);
+        if (sum == expected) {
+            System.out.println("setterTestWithReturnArgIgnoreReturn passes");
+        }
+        else {
+            System.out.println("setterTestWithReturnArgIgnoreReturn fails: " + sum +
+                               " (expecting " + expected + ")");
+        }
+    }
+
+    static void setterTestWithReturnArgUseReturn() {
+        Foo foo = new Foo();
+        int sum = foo.getBar0();
+        int sumDummy = 0;
+        sum += foo.getBar0();
+        Foo foo2 = foo.setBar1ReturnThis(sum);
+        sum += foo2.getBar0();
+        foo = foo2.setBar2ReturnThis(1,sum);
+        sum += foo.getBar0();
+        foo2 = foo.setBar3ReturnThis(1,2,sum);
+        sum += foo2.getBar0();
+        foo = foo2.setBar4ReturnThis(1,2,3,sum);
+        sum += foo.getBar0();
+        foo = foo.setBar5ReturnThis(1,2,3,4,sum);
+        sum += foo.getBar0();
+        sum += foo.setBar1ReturnBarArg(sum);
+        sum += foo.getBar0();
+        sum += foo.setBar2ReturnBarArg(1,sum);
+        sum += foo.getBar0();
+        sum += foo.setBar3ReturnBarArg(1,2,sum);
+        sum += foo.getBar0();
+        sum += foo.setBar4ReturnBarArg(1,2,3,sum);
+        sum += foo.getBar0();
+        sum += foo.setBar5ReturnBarArg(1,2,3,4,sum);
+        sum += foo.getBar0();
+        sumDummy += foo.setBar2ReturnDummyArg1(1,sum);
+        sum += foo.getBar0();
+        sumDummy += foo.setBar3ReturnDummyArg2(1,2,sum);
+        sum += foo.getBar0();
+        sumDummy += foo.setBar4ReturnDummyArg3(1,2,3,sum);
+        sum += foo.getBar0();
+        sumDummy += foo.setBar5ReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.getBar0();
+        Foo nullFoo = Foo.getNullFoo();
+        try {
+            foo = nullFoo.setBar1ReturnThis(sum);
+        } catch(NullPointerException npe) {
+            sum += 404;
+        }
+        try {
+            foo = nullFoo.setBar2ReturnThis(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 2 * 404;
+        }
+        try {
+            foo = nullFoo.setBar3ReturnThis(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 3 * 404;
+        }
+        try {
+            foo = nullFoo.setBar4ReturnThis(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 4 * 404;
+        }
+        try {
+            foo = nullFoo.setBar5ReturnThis(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 5 * 404;
+        }
+        try {
+            sum += nullFoo.setBar1ReturnBarArg(sum);
+        } catch(NullPointerException npe) {
+            sum += 6 * 404;
+        }
+        try {
+            sum += nullFoo.setBar2ReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 7 * 404;
+        }
+        try {
+            sum += nullFoo.setBar3ReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 8 * 404;
+        }
+        try {
+            sum += nullFoo.setBar4ReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 9 * 404;
+        }
+        try {
+            sum += nullFoo.setBar5ReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 10 * 404;
+        }
+        try {
+            sumDummy += nullFoo.setBar2ReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 11 * 404;
+        }
+        try {
+            sumDummy += nullFoo.setBar3ReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 12 * 404;
+        }
+        try {
+            sumDummy += nullFoo.setBar4ReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 13 * 404;
+        }
+        try {
+            sumDummy += nullFoo.setBar5ReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 14 * 404;
+        }
+        int expected = (1234 << 10) * 3 * 3 * 3 * 3 * 3 + 404 * (15 * 14 / 2);
+        int expectedDummy = 5 * 4 / 2;
+        if (sum == expected && sumDummy == expectedDummy) {
+            System.out.println("setterTestWithReturnArgUseReturn passes");
+        }
+        else {
+            System.out.println("setterTestWithReturnArgUseReturn fails: " + sum +
+                               " (expecting " + expected + "), sumDummy = " + sumDummy +
+                               "(expecting " + expectedDummy + ")");
+        }
+    }
+
+    static void wideSetterTestWithReturnArgIgnoreReturn() {
+        Foo foo = new Foo();
+        long sum = foo.wideGetBar0();
+        sum += foo.wideGetBar0();
+        foo.wideSetBar1ReturnThis(sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2ReturnThis(1,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3ReturnThis(1,2,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4ReturnThis(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5ReturnThis(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar1ReturnBarArg(sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2ReturnBarArg(1,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3ReturnBarArg(1,2,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4ReturnBarArg(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5ReturnBarArg(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2iReturnBarArg(1,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3iReturnBarArg(1,2,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4iReturnBarArg(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5iReturnBarArg(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2ReturnDummyArg1(1,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3ReturnDummyArg2(1,2,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4ReturnDummyArg3(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5ReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2iReturnDummyArg1(1,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3iReturnDummyArg2(1,2,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4iReturnDummyArg3(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5iReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        Foo nullFoo = Foo.getNullFoo();
+        try {
+            nullFoo.wideSetBar1ReturnThis(sum);
+        } catch(NullPointerException npe) {
+            sum += 404;
+        }
+        try {
+            nullFoo.wideSetBar2ReturnThis(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 2 * 404;
+        }
+        try {
+            nullFoo.wideSetBar3ReturnThis(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 3 * 404;
+        }
+        try {
+            nullFoo.wideSetBar4ReturnThis(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 4 * 404;
+        }
+        try {
+            nullFoo.wideSetBar5ReturnThis(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 5 * 404;
+        }
+        try {
+            nullFoo.wideSetBar1ReturnBarArg(sum);
+        } catch(NullPointerException npe) {
+            sum += 6 * 404;
+        }
+        try {
+            nullFoo.wideSetBar2ReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 7 * 404;
+        }
+        try {
+            nullFoo.wideSetBar3ReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 8 * 404;
+        }
+        try {
+            nullFoo.wideSetBar4ReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 9 * 404;
+        }
+        try {
+            nullFoo.wideSetBar5ReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 10 * 404;
+        }
+        try {
+            nullFoo.wideSetBar2iReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 11 * 404;
+        }
+        try {
+            nullFoo.wideSetBar3iReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 12 * 404;
+        }
+        try {
+            nullFoo.wideSetBar4iReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 13 * 404;
+        }
+        try {
+            nullFoo.wideSetBar5iReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 14 * 404;
+        }
+        try {
+            nullFoo.wideSetBar2ReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 15 * 404;
+        }
+        try {
+            nullFoo.wideSetBar3ReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 16 * 404;
+        }
+        try {
+            nullFoo.wideSetBar4ReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 17 * 404;
+        }
+        try {
+            nullFoo.wideSetBar5ReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 18 * 404;
+        }
+        try {
+            nullFoo.wideSetBar2iReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 19 * 404;
+        }
+        try {
+            nullFoo.wideSetBar3iReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 20 * 404;
+        }
+        try {
+            nullFoo.wideSetBar4iReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 21 * 404;
+        }
+        try {
+            nullFoo.wideSetBar5iReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 22 * 404;
+        }
+        long expected = (1234L << 23) + 404 * (23 * 22 / 2);
+        if (sum == expected) {
+            System.out.println("wideSetterTestWithReturnArgIgnoreReturn passes");
+        }
+        else {
+            System.out.println("wideSetterTestWithReturnArgIgnoreReturn fails: " + sum +
+                               " (expecting " + expected + ")");
+        }
+    }
+
+    static void wideSetterTestWithReturnArgUseReturn() {
+        Foo foo = new Foo();
+        long sum = foo.wideGetBar0();
+        long sumDummy = 0;
+        sum += foo.wideGetBar0();
+        Foo foo2 = foo.wideSetBar1ReturnThis(sum);
+        sum += foo2.wideGetBar0();
+        foo = foo2.wideSetBar2ReturnThis(1,sum);
+        sum += foo.wideGetBar0();
+        foo2 = foo.wideSetBar3ReturnThis(1,2,sum);
+        sum += foo2.wideGetBar0();
+        foo = foo2.wideSetBar4ReturnThis(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        foo = foo.wideSetBar5ReturnThis(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar1ReturnBarArg(sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar2ReturnBarArg(1,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar3ReturnBarArg(1,2,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar4ReturnBarArg(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar5ReturnBarArg(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar2iReturnBarArg(1,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar3iReturnBarArg(1,2,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar4iReturnBarArg(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        sum += foo.wideSetBar5iReturnBarArg(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar2ReturnDummyArg1(1,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar3ReturnDummyArg2(1,2,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar4ReturnDummyArg3(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar5ReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar2iReturnDummyArg1(1,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar3iReturnDummyArg2(1,2,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar4iReturnDummyArg3(1,2,3,sum);
+        sum += foo.wideGetBar0();
+        sumDummy += foo.wideSetBar5iReturnDummyArg4(1,2,3,4,sum);
+        sum += foo.wideGetBar0();
+        Foo nullFoo = Foo.getNullFoo();
+        try {
+            foo = nullFoo.wideSetBar1ReturnThis(sum);
+        } catch(NullPointerException npe) {
+            sum += 404;
+        }
+        try {
+            foo = nullFoo.wideSetBar2ReturnThis(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 2 * 404;
+        }
+        try {
+            foo = nullFoo.wideSetBar3ReturnThis(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 3 * 404;
+        }
+        try {
+            foo = nullFoo.wideSetBar4ReturnThis(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 4 * 404;
+        }
+        try {
+            foo = nullFoo.wideSetBar5ReturnThis(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 5 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar1ReturnBarArg(sum);
+        } catch(NullPointerException npe) {
+            sum += 6 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar2ReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 7 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar3ReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 8 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar4ReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 9 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar5ReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 10 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar2iReturnBarArg(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 11 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar3iReturnBarArg(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 12 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar4iReturnBarArg(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 13 * 404;
+        }
+        try {
+            sum += nullFoo.wideSetBar5iReturnBarArg(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 14 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar2ReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 15 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar3ReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 16 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar4ReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 17 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar5ReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 18 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar2iReturnDummyArg1(1, sum);
+        } catch(NullPointerException npe) {
+            sum += 19 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar3iReturnDummyArg2(1, 2, sum);
+        } catch(NullPointerException npe) {
+            sum += 20 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar4iReturnDummyArg3(1, 2, 3, sum);
+        } catch(NullPointerException npe) {
+            sum += 21 * 404;
+        }
+        try {
+            sumDummy += nullFoo.wideSetBar5iReturnDummyArg4(1, 2, 3, 4, sum);
+        } catch(NullPointerException npe) {
+            sum += 22 * 404;
+        }
+        long expected = (1234L << 14) * 3 * 3 * 3 * 3 * 3 * 3 * 3 * 3 * 3  + 404 * (23 * 22 / 2);
+        long expectedDummy = 2 * (5 * 4 / 2);
+        if (sum == expected && sumDummy == expectedDummy) {
+            System.out.println("wideSetterTestWithReturnArgUseReturn passes");
+        }
+        else {
+            System.out.println("wideSetterTestWithReturnArgUseReturn fails: " + sum +
+                               " (expecting " + expected + "), sumDummy = " + sumDummy +
+                               "(expecting " + expectedDummy + ")");
+        }
+    }
+
     static void mulBy1Test() {
         long res;
         long j = 1;
@@ -8645,6 +9219,12 @@
     private int bar = 1234;
     private long lbar = 1234;
 
+    public static Foo getNullFoo() {
+      // Make this a bit complicated so that it's not inlined.
+      Foo foo = new Foo();
+      return (barBar(foo) != 0) ? null : foo;
+    }
+
     // Looks similar to a direct method, make sure we're null checking
     static int barBar(Foo foo) {
         return foo.bar;
@@ -8786,6 +9366,166 @@
     public long wideIdent5(int a6, int a5, int a4, int a3, int a2, long a1) {
         return a1;
     }
+    public Foo setBar1ReturnThis(int a1) {
+        bar = a1;
+        return this;
+    }
+    public Foo setBar2ReturnThis(int a1, int a2) {
+        bar = a2;
+        return this;
+    }
+    public Foo setBar3ReturnThis(int a1, int a2, int a3) {
+        bar = a3;
+        return this;
+    }
+    public Foo setBar4ReturnThis(int a1, int a2, int a3, int a4) {
+        bar = a4;
+        return this;
+    }
+    public Foo setBar5ReturnThis(int a1, int a2, int a3, int a4, int a5) {
+        bar = a5;
+        return this;
+    }
+    public Foo wideSetBar1ReturnThis(long a1) {
+        lbar = a1;
+        return this;
+    }
+    public Foo wideSetBar2ReturnThis(long a1, long a2) {
+        lbar = a2;
+        return this;
+    }
+    public Foo wideSetBar3ReturnThis(long a1, long a2, long a3) {
+        lbar = a3;
+        return this;
+    }
+    public Foo wideSetBar4ReturnThis(long a1, long a2, long a3, long a4) {
+        lbar = a4;
+        return this;
+    }
+    public Foo wideSetBar5ReturnThis(long a1, long a2, long a3, long a4, long a5) {
+        lbar = a5;
+        return this;
+    }
+    public Foo wideSetBar2iReturnThis(int a1, long a2) {
+        lbar = a2;
+        return this;
+    }
+    public Foo wideSetBar3iReturnThis(int a1, int a2, long a3) {
+        lbar = a3;
+        return this;
+    }
+    public Foo wideSetBar4iReturnThis(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+        return this;
+    }
+    public Foo wideSetBar5iReturnThis(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+        return this;
+    }
+    public int setBar1ReturnBarArg(int a1) {
+        bar = a1;
+        return a1;
+    }
+    public int setBar2ReturnBarArg(int a1, int a2) {
+        bar = a2;
+        return a2;
+    }
+    public int setBar3ReturnBarArg(int a1, int a2, int a3) {
+        bar = a3;
+        return a3;
+    }
+    public int setBar4ReturnBarArg(int a1, int a2, int a3, int a4) {
+        bar = a4;
+        return a4;
+    }
+    public int setBar5ReturnBarArg(int a1, int a2, int a3, int a4, int a5) {
+        bar = a5;
+        return a5;
+    }
+    public long wideSetBar1ReturnBarArg(long a1) {
+        lbar = a1;
+        return a1;
+    }
+    public long wideSetBar2ReturnBarArg(long a1, long a2) {
+        lbar = a2;
+        return a2;
+    }
+    public long wideSetBar3ReturnBarArg(long a1, long a2, long a3) {
+        lbar = a3;
+        return a3;
+    }
+    public long wideSetBar4ReturnBarArg(long a1, long a2, long a3, long a4) {
+        lbar = a4;
+        return a4;
+    }
+    public long wideSetBar5ReturnBarArg(long a1, long a2, long a3, long a4, long a5) {
+        lbar = a5;
+        return a5;
+    }
+    public long wideSetBar2iReturnBarArg(int a1, long a2) {
+        lbar = a2;
+        return a2;
+    }
+    public long wideSetBar3iReturnBarArg(int a1, int a2, long a3) {
+        lbar = a3;
+        return a3;
+    }
+    public long wideSetBar4iReturnBarArg(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+        return a4;
+    }
+    public long wideSetBar5iReturnBarArg(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+        return a5;
+    }
+    public int setBar2ReturnDummyArg1(int a1, int a2) {
+        bar = a2;
+        return a1;
+    }
+    public int setBar3ReturnDummyArg2(int a1, int a2, int a3) {
+        bar = a3;
+        return a2;
+    }
+    public int setBar4ReturnDummyArg3(int a1, int a2, int a3, int a4) {
+        bar = a4;
+        return a3;
+    }
+    public int setBar5ReturnDummyArg4(int a1, int a2, int a3, int a4, int a5) {
+        bar = a5;
+        return a4;
+    }
+    public long wideSetBar2ReturnDummyArg1(long a1, long a2) {
+        lbar = a2;
+        return a1;
+    }
+    public long wideSetBar3ReturnDummyArg2(long a1, long a2, long a3) {
+        lbar = a3;
+        return a2;
+    }
+    public long wideSetBar4ReturnDummyArg3(long a1, long a2, long a3, long a4) {
+        lbar = a4;
+        return a3;
+    }
+    public long wideSetBar5ReturnDummyArg4(long a1, long a2, long a3, long a4, long a5) {
+        lbar = a5;
+        return a4;
+    }
+    public int wideSetBar2iReturnDummyArg1(int a1, long a2) {
+        lbar = a2;
+        return a1;
+    }
+    public int wideSetBar3iReturnDummyArg2(int a1, int a2, long a3) {
+        lbar = a3;
+        return a2;
+    }
+    public int wideSetBar4iReturnDummyArg3(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+        return a3;
+    }
+    public int wideSetBar5iReturnDummyArg4(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+        return a4;
+    }
 }
 
 class LVNTests {
diff --git a/test/402-optimizing-control-flow/expected.txt b/test/402-optimizing-control-flow/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/402-optimizing-control-flow/expected.txt
diff --git a/test/402-optimizing-control-flow/info.txt b/test/402-optimizing-control-flow/info.txt
new file mode 100644
index 0000000..37d9458
--- /dev/null
+++ b/test/402-optimizing-control-flow/info.txt
@@ -0,0 +1 @@
+A set of tests for testing control flow instructions on the optimizing compiler.
diff --git a/test/402-optimizing-control-flow/src/Main.java b/test/402-optimizing-control-flow/src/Main.java
new file mode 100644
index 0000000..3339ef4
--- /dev/null
+++ b/test/402-optimizing-control-flow/src/Main.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+
+public class Main {
+
+  public static void expectEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected: " + expected + ", found: " + value);
+    }
+  }
+
+  public static void main(String[] args) {
+    int result = $opt$testIfEq1(42);
+    expectEquals(42, result);
+
+    result = $opt$testIfEq2(42);
+    expectEquals(7, result);
+
+    result = $opt$testWhileLoop(42);
+    expectEquals(45, result);
+
+    result = $opt$testDoWhileLoop(42);
+    expectEquals(45, result);
+
+    result = $opt$testForLoop(42);
+    expectEquals(44, result);
+  }
+
+  static int $opt$testIfEq1(int a) {
+    if (a + 1 == 43) {
+      return 42;
+    } else {
+      return 7;
+    }
+  }
+
+  static int $opt$testIfEq2(int a) {
+    if (a + 1 == 41) {
+      return 42;
+    } else {
+      return 7;
+    }
+  }
+
+  static int $opt$testWhileLoop(int a) {
+    while (a++ != 44) {}
+    return a;
+  }
+
+  static int $opt$testDoWhileLoop(int a) {
+    do {
+    } while (a++ != 44);
+    return a;
+  }
+
+  static int $opt$testForLoop(int a) {
+    for (; a != 44; a++) {}
+    return a;
+  }
+}