Merge "run-test: enable checker in dev mode"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f2f4550..1d4f5a3 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -60,7 +60,6 @@
 	dex/dex_to_dex_compiler.cc \
 	dex/bb_optimizations.cc \
 	dex/compiler_ir.cc \
-	dex/frontend.cc \
 	dex/mir_analysis.cc \
 	dex/mir_dataflow.cc \
 	dex/mir_field_info.cc \
@@ -107,6 +106,7 @@
 	optimizing/parallel_move_resolver.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/register_allocator.cc \
+	optimizing/side_effects_analysis.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
 	optimizing/ssa_phi_elimination.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index aac2644..7685200 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -18,8 +18,10 @@
 #define ART_COMPILER_DEX_BB_OPTIMIZATIONS_H_
 
 #include "base/casts.h"
-#include "compiler_internals.h"
+#include "compiler_ir.h"
+#include "dex_flags.h"
 #include "pass_me.h"
+#include "mir_graph.h"
 
 namespace art {
 
diff --git a/compiler/dex/compiler_ir.cc b/compiler/dex/compiler_ir.cc
index a2b3fe4..0cfa966 100644
--- a/compiler/dex/compiler_ir.cc
+++ b/compiler/dex/compiler_ir.cc
@@ -16,16 +16,19 @@
 
 #include "compiler_ir.h"
 
+#include "arch/instruction_set_features.h"
 #include "base/dumpable.h"
 #include "backend.h"
-#include "frontend.h"
+#include "dex_flags.h"
+#include "driver/compiler_driver.h"
 #include "mir_graph.h"
 
 namespace art {
 
-CompilationUnit::CompilationUnit(ArenaPool* pool)
-  : compiler_driver(nullptr),
-    class_linker(nullptr),
+CompilationUnit::CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver,
+                                 ClassLinker* linker)
+  : compiler_driver(driver),
+    class_linker(linker),
     dex_file(nullptr),
     class_loader(nullptr),
     class_def_idx(0),
@@ -36,10 +39,8 @@
     disable_opt(0),
     enable_debug(0),
     verbose(false),
-    compiler(nullptr),
-    instruction_set(kNone),
-    target64(false),
-    compiler_flip_match(false),
+    instruction_set(isa),
+    target64(Is64BitInstructionSet(isa)),
     arena(pool),
     arena_stack(pool),
     mir_graph(nullptr),
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 34585c1..e7182a9 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -17,31 +17,25 @@
 #ifndef ART_COMPILER_DEX_COMPILER_IR_H_
 #define ART_COMPILER_DEX_COMPILER_IR_H_
 
+#include "jni.h"
 #include <string>
 #include <vector>
 
-#include "compiler_enums.h"
-#include "driver/compiler_driver.h"
-#include "utils/scoped_arena_allocator.h"
 #include "base/timing_logger.h"
+#include "invoke_type.h"
+#include "safe_map.h"
 #include "utils/arena_allocator.h"
+#include "utils/scoped_arena_allocator.h"
 
 namespace art {
 
 class Backend;
 class ClassLinker;
+class CompilerDriver;
 class MIRGraph;
 
-/*
- * TODO: refactoring pass to move these (and other) typedefs towards usage style of runtime to
- * add type safety (see runtime/offsets.h).
- */
-typedef uint32_t DexOffset;          // Dex offset in code units.
-typedef uint16_t NarrowDexOffset;    // For use in structs, Dex offsets range from 0 .. 0xffff.
-typedef uint32_t CodeOffset;         // Native code offset in bytes.
-
 struct CompilationUnit {
-  explicit CompilationUnit(ArenaPool* pool);
+  CompilationUnit(ArenaPool* pool, InstructionSet isa, CompilerDriver* driver, ClassLinker* linker);
   ~CompilationUnit();
 
   void StartTimingSplit(const char* label);
@@ -52,30 +46,20 @@
    * Fields needed/generated by common frontend and generally used throughout
    * the compiler.
   */
-  CompilerDriver* compiler_driver;
-  ClassLinker* class_linker;           // Linker to resolve fields and methods.
-  const DexFile* dex_file;             // DexFile containing the method being compiled.
-  jobject class_loader;                // compiling method's class loader.
-  uint16_t class_def_idx;              // compiling method's defining class definition index.
-  uint32_t method_idx;                 // compiling method's index into method_ids of DexFile.
-  uint32_t access_flags;               // compiling method's access flags.
-  InvokeType invoke_type;              // compiling method's invocation type.
-  const char* shorty;                  // compiling method's shorty.
-  uint32_t disable_opt;                // opt_control_vector flags.
-  uint32_t enable_debug;               // debugControlVector flags.
+  CompilerDriver* const compiler_driver;
+  ClassLinker* const class_linker;        // Linker to resolve fields and methods.
+  const DexFile* dex_file;                // DexFile containing the method being compiled.
+  jobject class_loader;                   // compiling method's class loader.
+  uint16_t class_def_idx;                 // compiling method's defining class definition index.
+  uint32_t method_idx;                    // compiling method's index into method_ids of DexFile.
+  uint32_t access_flags;                  // compiling method's access flags.
+  InvokeType invoke_type;                 // compiling method's invocation type.
+  const char* shorty;                     // compiling method's shorty.
+  uint32_t disable_opt;                   // opt_control_vector flags.
+  uint32_t enable_debug;                  // debugControlVector flags.
   bool verbose;
-  const Compiler* compiler;
-  InstructionSet instruction_set;
-  bool target64;
-
-  const InstructionSetFeatures* GetInstructionSetFeatures() {
-    return compiler_driver->GetInstructionSetFeatures();
-  }
-
-  // If non-empty, apply optimizer/debug flags only to matching methods.
-  std::string compiler_method_match;
-  // Flips sense of compiler_method_match - apply flags if doesn't match.
-  bool compiler_flip_match;
+  const InstructionSet instruction_set;
+  const bool target64;
 
   // TODO: move memory management to mir_graph, or just switch to using standard containers.
   ArenaAllocator arena;
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 9f17a3e..2a06cec 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
 #define ART_COMPILER_DEX_DATAFLOW_ITERATOR_H_
 
-#include "compiler_ir.h"
+#include "base/logging.h"
 #include "mir_graph.h"
 
 namespace art {
diff --git a/compiler/dex/dex_flags.h b/compiler/dex/dex_flags.h
new file mode 100644
index 0000000..eaf272b
--- /dev/null
+++ b/compiler/dex/dex_flags.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_DEX_FLAGS_H_
+#define ART_COMPILER_DEX_DEX_FLAGS_H_
+
+namespace art {
+
+// Suppress optimization if corresponding bit set.
+enum OptControlVector {
+  kLoadStoreElimination = 0,
+  kLoadHoisting,
+  kSuppressLoads,
+  kNullCheckElimination,
+  kClassInitCheckElimination,
+  kGlobalValueNumbering,
+  kLocalValueNumbering,
+  kPromoteRegs,
+  kTrackLiveTemps,
+  kSafeOptimizations,
+  kBBOpt,
+  kSuspendCheckElimination,
+  kMatch,
+  kPromoteCompilerTemps,
+  kBranchFusing,
+  kSuppressExceptionEdges,
+  kSuppressMethodInlining,
+};
+
+// Force code generation paths for testing.
+enum DebugControlVector {
+  kDebugVerbose,
+  kDebugDumpCFG,
+  kDebugSlowFieldPath,
+  kDebugSlowInvokePath,
+  kDebugSlowStringPath,
+  kDebugSlowTypePath,
+  kDebugSlowestFieldPath,
+  kDebugSlowestStringPath,
+  kDebugExerciseResolveMethod,
+  kDebugVerifyDataflow,
+  kDebugShowMemoryUsage,
+  kDebugShowNops,
+  kDebugCountOpcodes,
+  kDebugDumpCheckStats,
+  kDebugShowSummaryMemoryUsage,
+  kDebugShowFilterStats,
+  kDebugTimings,
+  kDebugCodegenDump
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_DEX_FLAGS_H_
diff --git a/compiler/dex/compiler_internals.h b/compiler/dex/dex_types.h
similarity index 63%
rename from compiler/dex/compiler_internals.h
rename to compiler/dex/dex_types.h
index 2019f0b..f485c1c 100644
--- a/compiler/dex/compiler_internals.h
+++ b/compiler/dex/dex_types.h
@@ -14,18 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_DEX_COMPILER_INTERNALS_H_
-#define ART_COMPILER_DEX_COMPILER_INTERNALS_H_
+#ifndef ART_COMPILER_DEX_DEX_TYPES_H_
+#define ART_COMPILER_DEX_DEX_TYPES_H_
 
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
+namespace art {
 
-#include "base/logging.h"
-#include "mir_graph.h"
-#include "compiler_ir.h"
-#include "frontend.h"  // Debug flags.
-#include "utils.h"
+typedef uint32_t DexOffset;          // Dex offset in code units.
+typedef uint16_t NarrowDexOffset;    // For use in structs, Dex offsets range from 0 .. 0xffff.
 
-#endif  // ART_COMPILER_DEX_COMPILER_INTERNALS_H_
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_DEX_TYPES_H_
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
deleted file mode 100644
index dd8b4c8..0000000
--- a/compiler/dex/frontend.cc
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "frontend.h"
-
-#include <cstdint>
-
-#include "backend.h"
-#include "base/dumpable.h"
-#include "compiler.h"
-#include "compiler_internals.h"
-#include "driver/compiler_driver.h"
-#include "driver/compiler_options.h"
-#include "mirror/object.h"
-#include "pass_driver_me_opts.h"
-#include "runtime.h"
-#include "base/logging.h"
-#include "base/timing_logger.h"
-#include "driver/compiler_options.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-
-namespace art {
-
-/* Default optimizer/debug setting for the compiler. */
-static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimizations
-  // (1 << kLoadStoreElimination) |
-  // (1 << kLoadHoisting) |
-  // (1 << kSuppressLoads) |
-  // (1 << kNullCheckElimination) |
-  // (1 << kClassInitCheckElimination) |
-  // (1 << kGlobalValueNumbering) |
-  // (1 << kLocalValueNumbering) |
-  // (1 << kPromoteRegs) |
-  // (1 << kTrackLiveTemps) |
-  // (1 << kSafeOptimizations) |
-  // (1 << kBBOpt) |
-  // (1 << kSuspendCheckElimination) |
-  // (1 << kMatch) |
-  // (1 << kPromoteCompilerTemps) |
-  // (1 << kSuppressExceptionEdges) |
-  // (1 << kSuppressMethodInlining) |
-  0;
-
-static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
-  // (1 << kDebugDisplayMissingTargets) |
-  // (1 << kDebugVerbose) |
-  // (1 << kDebugDumpCFG) |
-  // (1 << kDebugSlowFieldPath) |
-  // (1 << kDebugSlowInvokePath) |
-  // (1 << kDebugSlowStringPath) |
-  // (1 << kDebugSlowestFieldPath) |
-  // (1 << kDebugSlowestStringPath) |
-  // (1 << kDebugExerciseResolveMethod) |
-  // (1 << kDebugVerifyDataflow) |
-  // (1 << kDebugShowMemoryUsage) |
-  // (1 << kDebugShowNops) |
-  // (1 << kDebugCountOpcodes) |
-  // (1 << kDebugDumpCheckStats) |
-  // (1 << kDebugShowSummaryMemoryUsage) |
-  // (1 << kDebugShowFilterStats) |
-  // (1 << kDebugTimings) |
-  // (1 << kDebugCodegenDump) |
-  0;
-
-static CompiledMethod* CompileMethod(CompilerDriver& driver,
-                                     const Compiler* compiler,
-                                     const DexFile::CodeItem* code_item,
-                                     uint32_t access_flags, InvokeType invoke_type,
-                                     uint16_t class_def_idx, uint32_t method_idx,
-                                     jobject class_loader, const DexFile& dex_file,
-                                     void* llvm_compilation_unit) {
-  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
-  if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    return nullptr;
-  }
-
-  DCHECK(driver.GetCompilerOptions().IsCompilationEnabled());
-
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  CompilationUnit cu(driver.GetArenaPool());
-
-  cu.compiler_driver = &driver;
-  cu.class_linker = class_linker;
-  cu.instruction_set = driver.GetInstructionSet();
-  if (cu.instruction_set == kArm) {
-    cu.instruction_set = kThumb2;
-  }
-  cu.target64 = Is64BitInstructionSet(cu.instruction_set);
-  cu.compiler = compiler;
-  // TODO: Mips64 is not yet implemented.
-  CHECK((cu.instruction_set == kThumb2) ||
-        (cu.instruction_set == kArm64) ||
-        (cu.instruction_set == kX86) ||
-        (cu.instruction_set == kX86_64) ||
-        (cu.instruction_set == kMips));
-
-  // TODO: set this from command line
-  cu.compiler_flip_match = false;
-  bool use_match = !cu.compiler_method_match.empty();
-  bool match = use_match && (cu.compiler_flip_match ^
-      (PrettyMethod(method_idx, dex_file).find(cu.compiler_method_match) != std::string::npos));
-  if (!use_match || match) {
-    cu.disable_opt = kCompilerOptimizerDisableFlags;
-    cu.enable_debug = kCompilerDebugFlags;
-    cu.verbose = VLOG_IS_ON(compiler) ||
-        (cu.enable_debug & (1 << kDebugVerbose));
-  }
-
-  if (driver.GetCompilerOptions().HasVerboseMethods()) {
-    cu.verbose = driver.GetCompilerOptions().IsVerboseMethod(PrettyMethod(method_idx, dex_file));
-  }
-
-  if (cu.verbose) {
-    cu.enable_debug |= (1 << kDebugCodegenDump);
-  }
-
-  /*
-   * TODO: rework handling of optimization and debug flags.  Should we split out
-   * MIR and backend flags?  Need command-line setting as well.
-   */
-
-  compiler->InitCompilationUnit(cu);
-
-  cu.StartTimingSplit("BuildMIRGraph");
-  cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
-
-  /*
-   * After creation of the MIR graph, also create the code generator.
-   * The reason we do this is that optimizations on the MIR graph may need to get information
-   * that is only available if a CG exists.
-   */
-  cu.cg.reset(compiler->GetCodeGenerator(&cu, llvm_compilation_unit));
-
-  /* Gathering opcode stats? */
-  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
-    cu.mir_graph->EnableOpcodeCounting();
-  }
-
-  /* Build the raw MIR graph */
-  cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
-                              class_loader, dex_file);
-
-  if (!compiler->CanCompileMethod(method_idx, dex_file, &cu)) {
-    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : "
-        << PrettyMethod(method_idx, dex_file);
-    return nullptr;
-  }
-
-  cu.NewTimingSplit("MIROpt:CheckFilters");
-  std::string skip_message;
-  if (cu.mir_graph->SkipCompilation(&skip_message)) {
-    VLOG(compiler) << cu.instruction_set << ": Skipping method : "
-                   << PrettyMethod(method_idx, dex_file) << "  Reason = " << skip_message;
-    return nullptr;
-  }
-
-  /* Create the pass driver and launch it */
-  PassDriverMEOpts pass_driver(&cu);
-  pass_driver.Launch();
-
-  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
-  if (cu.compiler_driver->ProfilePresent()
-      && !cu.mir_graph->MethodIsLeaf()
-      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
-    return nullptr;
-  }
-
-  if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
-    cu.mir_graph->DumpCheckStats();
-  }
-
-  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
-    cu.mir_graph->ShowOpcodeStats();
-  }
-
-  /* Reassociate sreg names with original Dalvik vreg names. */
-  cu.mir_graph->RemapRegLocations();
-
-  /* Free Arenas from the cu.arena_stack for reuse by the cu.arena in the codegen. */
-  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
-    if (cu.arena_stack.PeakBytesAllocated() > 1 * 1024 * 1024) {
-      MemStats stack_stats(cu.arena_stack.GetPeakStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
-    }
-  }
-  cu.arena_stack.Reset();
-
-  CompiledMethod* result = NULL;
-
-  if (cu.mir_graph->PuntToInterpreter()) {
-    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: "
-        << PrettyMethod(method_idx, dex_file);
-    return nullptr;
-  }
-
-  cu.cg->Materialize();
-
-  cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
-  result = cu.cg->GetCompiledMethod();
-  cu.NewTimingSplit("Cleanup");
-
-  if (result) {
-    VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file);
-  } else {
-    VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file);
-  }
-
-  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
-    if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
-      MemStats mem_stats(cu.arena.GetMemStats());
-      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
-    }
-  }
-
-  if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
-    LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
-              << " " << PrettyMethod(method_idx, dex_file);
-  }
-
-  cu.EndTiming();
-  driver.GetTimingsLogger()->AddLogger(cu.timings);
-  return result;
-}
-
-CompiledMethod* CompileOneMethod(CompilerDriver* driver,
-                                 const Compiler* compiler,
-                                 const DexFile::CodeItem* code_item,
-                                 uint32_t access_flags,
-                                 InvokeType invoke_type,
-                                 uint16_t class_def_idx,
-                                 uint32_t method_idx,
-                                 jobject class_loader,
-                                 const DexFile& dex_file,
-                                 void* compilation_unit) {
-  return CompileMethod(*driver, compiler, code_item, access_flags, invoke_type, class_def_idx,
-                       method_idx, class_loader, dex_file, compilation_unit);
-}
-
-}  // namespace art
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
deleted file mode 100644
index 4266535..0000000
--- a/compiler/dex/frontend.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_FRONTEND_H_
-#define ART_COMPILER_DEX_FRONTEND_H_
-
-#include "dex_file.h"
-#include "invoke_type.h"
-
-namespace art {
-
-class CompiledMethod;
-class Compiler;
-class CompilerDriver;
-
-/*
- * Assembly is an iterative process, and usually terminates within
- * two or three passes.  This should be high enough to handle bizarre
- * cases, but detect an infinite loop bug.
- */
-#define MAX_ASSEMBLER_RETRIES 50
-
-// Suppress optimization if corresponding bit set.
-enum opt_control_vector {
-  kLoadStoreElimination = 0,
-  kLoadHoisting,
-  kSuppressLoads,
-  kNullCheckElimination,
-  kClassInitCheckElimination,
-  kGlobalValueNumbering,
-  kLocalValueNumbering,
-  kPromoteRegs,
-  kTrackLiveTemps,
-  kSafeOptimizations,
-  kBBOpt,
-  kSuspendCheckElimination,
-  kMatch,
-  kPromoteCompilerTemps,
-  kBranchFusing,
-  kSuppressExceptionEdges,
-  kSuppressMethodInlining,
-};
-
-// Force code generation paths for testing.
-enum debugControlVector {
-  kDebugVerbose,
-  kDebugDumpCFG,
-  kDebugSlowFieldPath,
-  kDebugSlowInvokePath,
-  kDebugSlowStringPath,
-  kDebugSlowTypePath,
-  kDebugSlowestFieldPath,
-  kDebugSlowestStringPath,
-  kDebugExerciseResolveMethod,
-  kDebugVerifyDataflow,
-  kDebugShowMemoryUsage,
-  kDebugShowNops,
-  kDebugCountOpcodes,
-  kDebugDumpCheckStats,
-  kDebugShowSummaryMemoryUsage,
-  kDebugShowFilterStats,
-  kDebugTimings,
-  kDebugCodegenDump
-};
-
-CompiledMethod* CompileOneMethod(CompilerDriver* driver,
-                                 const Compiler* compiler,
-                                 const DexFile::CodeItem* code_item,
-                                 uint32_t access_flags,
-                                 InvokeType invoke_type,
-                                 uint16_t class_def_idx,
-                                 uint32_t method_idx,
-                                 jobject class_loader,
-                                 const DexFile& dex_file,
-                                 void* compilation_unit);
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_FRONTEND_H_
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index 578952b..a8fd812 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -15,6 +15,8 @@
  */
 
 #include "global_value_numbering.h"
+
+#include "base/stl_util.h"
 #include "local_value_numbering.h"
 
 namespace art {
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index d72144a..cdafc68 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -17,8 +17,11 @@
 #ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
 #define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
 
+#include "base/logging.h"
 #include "base/macros.h"
-#include "compiler_internals.h"
+#include "mir_graph.h"
+#include "compiler_ir.h"
+#include "dex_flags.h"
 #include "utils/arena_object.h"
 
 namespace art {
@@ -252,7 +255,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const GlobalValueNumbering::Mode& rhs);
 
-inline  void GlobalValueNumbering::StartPostProcessing() {
+inline void GlobalValueNumbering::StartPostProcessing() {
   DCHECK(Good());
   DCHECK_EQ(mode_, kModeGvn);
   mode_ = kModeGvnPostProcessing;
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index 18e3469..f71b7ae 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "compiler_internals.h"
+#include "base/logging.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
 #include "dex/mir_field_info.h"
@@ -337,7 +337,7 @@
 
   GlobalValueNumberingTest()
       : pool_(),
-        cu_(&pool_),
+        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
         mir_count_(0u),
         mirs_(nullptr),
         ssa_reps_(),
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index 9b89c95..aef8c6d 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -19,7 +19,7 @@
 
 #include <memory>
 
-#include "compiler_internals.h"
+#include "base/logging.h"
 #include "global_value_numbering.h"
 #include "utils/arena_object.h"
 #include "utils/dex_instruction_utils.h"
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index 0fcb584..c894892 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "compiler_internals.h"
 #include "dex/mir_field_info.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
@@ -202,7 +201,7 @@
 
   LocalValueNumberingTest()
       : pool_(),
-        cu_(&pool_),
+        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
         mir_count_(0u),
         mirs_(nullptr),
         ssa_reps_(),
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 0f0846c..473196b 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -17,15 +17,18 @@
 #include <algorithm>
 #include <memory>
 
-#include "compiler_internals.h"
+#include "base/logging.h"
 #include "dataflow_iterator-inl.h"
-#include "dex_instruction.h"
+#include "compiler_ir.h"
+#include "dex_flags.h"
 #include "dex_instruction-inl.h"
 #include "dex/mir_field_info.h"
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "driver/dex_compilation_unit.h"
 #include "utils/scoped_arena_containers.h"
 
 namespace art {
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 6704112..f09d1ae 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "compiler_internals.h"
 #include "local_value_numbering.h"
 #include "dataflow_iterator-inl.h"
 
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index abd3482..db4141c 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -21,13 +21,15 @@
 #include <unistd.h>
 
 #include "base/bit_vector-inl.h"
+#include "base/logging.h"
 #include "base/stl_util.h"
-#include "compiler_internals.h"
+#include "base/stringprintf.h"
+#include "compiler_ir.h"
 #include "dex_file-inl.h"
+#include "dex_flags.h"
 #include "dex_instruction-inl.h"
-#include "dex/global_value_numbering.h"
-#include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/quick/dex_file_method_inliner.h"
+#include "driver/compiler_driver.h"
+#include "driver/dex_compilation_unit.h"
 #include "leb128.h"
 #include "pass_driver_me_post_opt.h"
 #include "stack.h"
@@ -2521,4 +2523,9 @@
       return 0;
   }
 }
+
+const uint16_t* MIRGraph::GetInsns(int m_unit_index) const {
+  return m_units_[m_unit_index]->GetCodeItem()->insns_;
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index af97f51..5def191 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -19,10 +19,9 @@
 
 #include <stdint.h>
 
-#include "compiler_ir.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
-#include "driver/dex_compilation_unit.h"
+#include "dex_types.h"
 #include "invoke_type.h"
 #include "mir_field_info.h"
 #include "mir_method_info.h"
@@ -34,9 +33,14 @@
 
 namespace art {
 
+struct CompilationUnit;
+class DexCompilationUnit;
 class DexFileMethodInliner;
 class GlobalValueNumbering;
 
+// Forward declaration.
+class MIRGraph;
+
 enum DataFlowAttributePos {
   kUA = 0,
   kUB,
@@ -139,7 +143,6 @@
 // Minimum field size to contain Dalvik v_reg number.
 #define VREG_NUM_WIDTH 16
 
-#define INVALID_SREG (-1)
 #define INVALID_VREG (0xFFFFU)
 #define INVALID_OFFSET (0xDEADF00FU)
 
@@ -553,9 +556,7 @@
    * This is guaranteed to contain index 0 which is the base method being compiled.
    * @return Returns the raw instruction pointer.
    */
-  const uint16_t* GetInsns(int m_unit_index) const {
-    return m_units_[m_unit_index]->GetCodeItem()->insns_;
-  }
+  const uint16_t* GetInsns(int m_unit_index) const;
 
   /**
    * @brief Used to obtain the raw data table.
diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc
index 8a7e71f..b3ad040 100644
--- a/compiler/dex/mir_graph_test.cc
+++ b/compiler/dex/mir_graph_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "compiler_ir.h"
 #include "mir_graph.h"
 #include "gtest/gtest.h"
 
@@ -148,7 +149,7 @@
 
   TopologicalSortOrderTest()
       : pool_(),
-        cu_(&pool_) {
+        cu_(&pool_, kRuntimeISA, nullptr, nullptr) {
     cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
   }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index ebbd28f..05414b3 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -15,8 +15,11 @@
  */
 
 #include "base/bit_vector-inl.h"
-#include "compiler_internals.h"
+#include "base/logging.h"
 #include "dataflow_iterator-inl.h"
+#include "dex_flags.h"
+#include "driver/compiler_driver.h"
+#include "driver/dex_compilation_unit.h"
 #include "global_value_numbering.h"
 #include "local_value_numbering.h"
 #include "mir_field_info.h"
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 362c7fd..199bc27 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -16,9 +16,10 @@
 
 #include <vector>
 
-#include "compiler_internals.h"
+#include "base/logging.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex/compiler_ir.h"
 #include "dex/mir_field_info.h"
 #include "gtest/gtest.h"
 
@@ -326,7 +327,7 @@
 
   MirOptimizationTest()
       : pool_(),
-        cu_(&pool_),
+        cu_(&pool_, kRuntimeISA, nullptr, nullptr),
         mir_count_(0u),
         mirs_(nullptr),
         code_item_(nullptr) {
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index d3e54a0..0def056 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -19,14 +19,12 @@
 
 #include <string>
 
-#include "compiler_ir.h"
 #include "base/logging.h"
 
 namespace art {
 
 // Forward declarations.
 class BasicBlock;
-struct CompilationUnit;
 class Pass;
 
 // Empty Pass Data Class, can be extended by any pass extending the base Pass class.
@@ -89,21 +87,6 @@
     UNREACHABLE();
   }
 
-  static void BasePrintMessage(CompilationUnit* c_unit, const char* pass_name, const char* message, ...) {
-    // Check if we want to log something or not.
-    if (c_unit->print_pass) {
-      // Stringify the message.
-      va_list args;
-      va_start(args, message);
-      std::string stringified_message;
-      StringAppendV(&stringified_message, message, args);
-      va_end(args);
-
-      // Log the message and ensure to include pass name.
-      LOG(INFO) << pass_name << ": " << stringified_message;
-    }
-  }
-
  protected:
   /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */
   const char* const pass_name_;
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index 8a3eae1..632df38 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -18,13 +18,13 @@
 #define ART_COMPILER_DEX_PASS_DRIVER_H_
 
 #include <vector>
+
+#include "base/logging.h"
 #include "pass.h"
 #include "safe_map.h"
 
-// Forward Declarations.
-class Pass;
-class PassDriver;
 namespace art {
+
 /**
  * @brief Helper function to create a single instance of a given Pass and can be shared across
  * the threads.
diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h
index 7bfaf82..ff7c4a4 100644
--- a/compiler/dex/pass_driver_me.h
+++ b/compiler/dex/pass_driver_me.h
@@ -22,6 +22,7 @@
 #include "bb_optimizations.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
+#include "dex_flags.h"
 #include "pass_driver.h"
 #include "pass_me.h"
 
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 6bb94c3..c2b6b91 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
+#include "pass_driver_me_opts.h"
+
+#include "base/logging.h"
 #include "base/macros.h"
 #include "bb_optimizations.h"
-#include "compiler_internals.h"
 #include "dataflow_iterator.h"
 #include "dataflow_iterator-inl.h"
-#include "pass_driver_me_opts.h"
 #include "post_opt_passes.h"
 
 namespace art {
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
index 5e2140d..5f0c65c 100644
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -16,7 +16,6 @@
 
 #include "base/macros.h"
 #include "post_opt_passes.h"
-#include "compiler_internals.h"
 #include "pass_driver_me_post_opt.h"
 
 namespace art {
diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h
index d0b450a..73e49ec 100644
--- a/compiler/dex/pass_me.h
+++ b/compiler/dex/pass_me.h
@@ -18,14 +18,16 @@
 #define ART_COMPILER_DEX_PASS_ME_H_
 
 #include <string>
+
+#include "base/logging.h"
 #include "pass.h"
+#include "safe_map.h"
 
 namespace art {
 
 // Forward declarations.
 class BasicBlock;
 struct CompilationUnit;
-class Pass;
 
 /**
  * @brief OptimizationFlag is an enumeration to perform certain tasks for a given pass.
@@ -118,17 +120,6 @@
     }
   }
 
-  /**
-   * @brief Used to obtain the option for a pass.
-   * @details Will return the overridden option if it exists or default one.
-   * @param option_name The name of option whose setting to look for.
-   * @param c_unit The compilation unit currently being handled.
-   * @return Returns the setting for the pass option.
-   */
-  int GetPassOption(const char* option_name, CompilationUnit* c_unit) const {
-    return GetPassOption(option_name, c_unit->overridden_pass_options);
-  }
-
   const char* GetDumpCFGFolder() const {
     return dump_cfg_folder_;
   }
diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc
index 92078b4..9262440 100644
--- a/compiler/dex/post_opt_passes.cc
+++ b/compiler/dex/post_opt_passes.cc
@@ -15,7 +15,7 @@
  */
 
 #include "post_opt_passes.h"
-#include "dataflow_iterator.h"
+
 #include "dataflow_iterator-inl.h"
 
 namespace art {
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index 55ae874..a3dbc5a 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -17,8 +17,11 @@
 #ifndef ART_COMPILER_DEX_POST_OPT_PASSES_H_
 #define ART_COMPILER_DEX_POST_OPT_PASSES_H_
 
-#include "dex/quick/mir_to_lir.h"
-#include "compiler_internals.h"
+#include "base/casts.h"
+#include "base/logging.h"
+#include "compiler_ir.h"
+#include "dex_flags.h"
+#include "mir_graph.h"
 #include "pass_me.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 5d09ae1..9717459 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -17,7 +17,9 @@
 #ifndef ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_
 #define ART_COMPILER_DEX_QUICK_ARM_ARM_LIR_H_
 
-#include "dex/compiler_internals.h"
+#include "dex/compiler_enums.h"
+#include "dex/reg_location.h"
+#include "dex/reg_storage.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 65fb3cd..3d64833 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
-#include "arm_lir.h"
 #include "codegen_arm.h"
+
+#include "arm_lir.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 0713b7a..f15b727 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -16,9 +16,13 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
-#include "arm_lir.h"
 #include "codegen_arm.h"
+
+#include "arm_lir.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "gc/accounting/card_table.h"
 #include "mirror/art_method.h"
 #include "mirror/object_array-inl.h"
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 6ac1849..025e69f 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -18,12 +18,14 @@
 #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 
 #include "arm_lir.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir.h"
 #include "utils/arena_containers.h"
 
 namespace art {
 
+struct CompilationUnit;
+
 class ArmMir2Lir FINAL : public Mir2Lir {
  protected:
   // Inherited class for ARM backend.
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 2b2592d..eb1383f 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
-#include "arm_lir.h"
 #include "codegen_arm.h"
+
+#include "arm_lir.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 7970bd8..3159886 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -16,11 +16,16 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
+#include "codegen_arm.h"
+
 #include "arch/instruction_set_features.h"
 #include "arm_lir.h"
-#include "codegen_arm.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array-inl.h"
 #include "utils.h"
@@ -1140,7 +1145,7 @@
 }
 
 bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
     return false;
   }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 52a516c..5538d79 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -21,7 +21,8 @@
 #include <string>
 
 #include "backend_arm.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 73b68a5..e4bd2a3 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -18,8 +18,10 @@
 
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arm_lir.h"
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 943c5c1..d15412a 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -17,7 +17,9 @@
 #ifndef ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
 #define ART_COMPILER_DEX_QUICK_ARM64_ARM64_LIR_H_
 
-#include "dex/compiler_internals.h"
+#include "dex/compiler_enums.h"
+#include "dex/reg_location.h"
+#include "dex/reg_storage.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index d45ec49..806617b 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -18,7 +18,10 @@
 
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "arm64_lir.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 
 namespace art {
 
@@ -1016,7 +1019,7 @@
           // Avoid emitting code that could trigger Cortex A53's erratum 835769.
           // This fixup should be carried out for all multiply-accumulate instructions: madd, msub,
           // smaddl, smsubl, umaddl and umsubl.
-          if (cu_->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
+          if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
               ->NeedFixCortexA53_835769()) {
             // Check that this is a 64-bit multiply-accumulate.
             if (IS_WIDE(lir->opcode)) {
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 089e4b6..6492442 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -16,9 +16,13 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
-#include "arm64_lir.h"
 #include "codegen_arm64.h"
+
+#include "arm64_lir.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/art_method.h"
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index c68b1d0..49ca625 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -18,7 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_ARM64_CODEGEN_ARM64_H_
 
 #include "arm64_lir.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir.h"
 
 #include <map>
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index ff692b7..a8ec6c0 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
-#include "arm64_lir.h"
 #include "codegen_arm64.h"
+
+#include "arm64_lir.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "utils.h"
 
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 88ab6f8..92675f3 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -16,11 +16,16 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
+#include "codegen_arm64.h"
+
 #include "arch/instruction_set_features.h"
 #include "arm64_lir.h"
-#include "codegen_arm64.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array-inl.h"
 #include "utils.h"
@@ -1003,7 +1008,7 @@
 }
 
 bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
     return false;
   }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index ee7e818..34662f2 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -21,7 +21,8 @@
 #include <string>
 
 #include "backend_arm64.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index a331f41..f48290d 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include "arm64_lir.h"
 #include "codegen_arm64.h"
+
+#include "arm64_lir.h"
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index ae9b0f4..52b2e15 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -14,13 +14,16 @@
  * limitations under the License.
  */
 
-#include "dex/compiler_internals.h"
+#include "mir_to_lir-inl.h"
+
+#include "dex/mir_graph.h"
+#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "driver/dex_compilation_unit.h"
 #include "dex_file-inl.h"
 #include "gc_map.h"
 #include "gc_map_builder.h"
 #include "mapping_table.h"
-#include "mir_to_lir-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verification_results.h"
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 84c0d93..7245853 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -18,18 +18,15 @@
 
 #include <algorithm>
 
+#include "base/logging.h"
 #include "base/macros.h"
-#include "base/mutex.h"
 #include "base/mutex-inl.h"
-#include "dex/frontend.h"
-#include "thread.h"
+#include "dex/compiler_ir.h"
 #include "thread-inl.h"
 #include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir.h"
-#include "dex_instruction.h"
 #include "dex_instruction-inl.h"
 #include "driver/dex_compilation_unit.h"
-#include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 
 namespace art {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index aa47cee..9f53b89 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -14,13 +14,16 @@
  * limitations under the License.
  */
 
+#include "mir_to_lir-inl.h"
+
 #include <functional>
 
 #include "arch/arm/instruction_set_features_arm.h"
+#include "base/macros.h"
 #include "dex/compiler_ir.h"
-#include "dex/compiler_internals.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/arm/arm_lir.h"
-#include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
 #include "mirror/object_array-inl.h"
@@ -41,6 +44,18 @@
  * and "op" calls may be used here.
  */
 
+ALWAYS_INLINE static inline bool ForceSlowFieldPath(CompilationUnit* cu) {
+  return (cu->enable_debug & (1 << kDebugSlowFieldPath)) != 0;
+}
+
+ALWAYS_INLINE static inline bool ForceSlowStringPath(CompilationUnit* cu) {
+  return (cu->enable_debug & (1 << kDebugSlowStringPath)) != 0;
+}
+
+ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) {
+  return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
+}
+
 /*
  * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
@@ -594,7 +609,7 @@
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastPut(), field_info.IsReferrersClass());
-  if (!SLOW_FIELD_PATH && field_info.FastPut()) {
+  if (!ForceSlowFieldPath(cu_) && field_info.FastPut()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -714,7 +729,7 @@
   DCHECK_EQ(SGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedStaticField(field_info.FastGet(), field_info.IsReferrersClass());
 
-  if (!SLOW_FIELD_PATH && field_info.FastGet()) {
+  if (!ForceSlowFieldPath(cu_) && field_info.FastGet()) {
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
@@ -852,7 +867,7 @@
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   DCHECK_EQ(IGetMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastGet());
-  if (!SLOW_FIELD_PATH && field_info.FastGet()) {
+  if (!ForceSlowFieldPath(cu_) && field_info.FastGet()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
     // A load of the class will lead to an iget with offset 0.
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
@@ -926,7 +941,7 @@
   const MirIFieldLoweringInfo& field_info = mir_graph_->GetIFieldLoweringInfo(mir);
   DCHECK_EQ(IPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
   cu_->compiler_driver->ProcessedInstanceField(field_info.FastPut());
-  if (!SLOW_FIELD_PATH && field_info.FastPut()) {
+  if (!ForceSlowFieldPath(cu_) && field_info.FastPut()) {
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
     // Dex code never writes to the class field.
     DCHECK_GE(static_cast<uint32_t>(field_info.FieldOffset().Int32Value()),
@@ -1016,7 +1031,7 @@
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
     LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
-        type_idx) || SLOW_TYPE_PATH) {
+        type_idx) || ForceSlowTypePath(cu_)) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
       LIR* branch = OpCmpImmBranch(kCondEq, rl_result.reg, 0, NULL);
@@ -1061,7 +1076,7 @@
   int32_t offset_of_string = mirror::ObjectArray<mirror::String>::OffsetOfElement(string_idx).
                                                                                       Int32Value();
   if (!cu_->compiler_driver->CanAssumeStringIsPresentInDexCache(
-      *cu_->dex_file, string_idx) || SLOW_STRING_PATH) {
+      *cu_->dex_file, string_idx) || ForceSlowStringPath(cu_)) {
     // slow path, resolve string if not in dex cache
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
@@ -1679,7 +1694,7 @@
       rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
       done = true;
     } else if (cu_->instruction_set == kThumb2) {
-      if (cu_->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
+      if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
               HasDivideInstruction()) {
         // Use ARM SDIV instruction for division.  For remainder we also need to
         // calculate using a MUL and subtract.
@@ -1973,7 +1988,7 @@
         rl_result = GenDivRemLit(rl_dest, rl_src, lit, is_div);
         done = true;
       } else if (cu_->instruction_set == kThumb2) {
-        if (cu_->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
+        if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArmInstructionSetFeatures()->
                 HasDivideInstruction()) {
           // Use ARM SDIV instruction for division.  For remainder we also need to
           // calculate using a MUL and subtract.
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d5889f5..bb5b0cd 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -14,12 +14,16 @@
  * limitations under the License.
  */
 
+#include "mir_to_lir-inl.h"
+
 #include "arm/codegen_arm.h"
 #include "dex/compiler_ir.h"
-#include "dex/frontend.h"
+#include "dex/dex_flags.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex_file-inl.h"
+#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "invoke_type.h"
 #include "mirror/array.h"
@@ -27,7 +31,6 @@
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string.h"
-#include "mir_to_lir-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index d314601..9f36e35 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include "mir_to_lir-inl.h"
+
 #include "dex/compiler_ir.h"
-#include "dex/compiler_internals.h"
-#include "dex/quick/mir_to_lir-inl.h"
+#include "dex/mir_graph.h"
 #include "invoke_type.h"
 
 namespace art {
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index e0f4691..e573899 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
+#include "base/logging.h"
+
 namespace art {
 
 #define DEBUG_OPT(X)
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index 4265ae1..5c98b10 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -16,6 +16,8 @@
 
 #include "codegen_mips.h"
 
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "mips_lir.h"
 
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index ed92e82..ccfdaf6 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -18,6 +18,8 @@
 
 #include "codegen_mips.h"
 
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index ac14704..a37fe40 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -17,12 +17,13 @@
 #ifndef ART_COMPILER_DEX_QUICK_MIPS_CODEGEN_MIPS_H_
 #define ART_COMPILER_DEX_QUICK_MIPS_CODEGEN_MIPS_H_
 
-#include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir.h"
 #include "mips_lir.h"
 
 namespace art {
 
+struct CompilationUnit;
+
 class MipsMir2Lir FINAL : public Mir2Lir {
  protected:
   class InToRegStorageMipsMapper : public InToRegStorageMapper {
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 495d85e..d7ed7ac 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_mips.h"
 
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mips_lir.h"
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index aabef60..17ac629 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -18,6 +18,8 @@
 
 #include "codegen_mips.h"
 
+#include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 3df8f2e..66e3894 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -17,7 +17,8 @@
 #ifndef ART_COMPILER_DEX_QUICK_MIPS_MIPS_LIR_H_
 #define ART_COMPILER_DEX_QUICK_MIPS_MIPS_LIR_H_
 
-#include "dex/compiler_internals.h"
+#include "dex/reg_location.h"
+#include "dex/reg_storage.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c819903..8574ffd 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -22,8 +22,10 @@
 
 #include "arch/mips/instruction_set_features_mips.h"
 #include "backend_mips.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "mips_lir.h"
 
 namespace art {
@@ -143,7 +145,8 @@
  */
 ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
   if (reg.IsDouble()) {
-    if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
+    if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+        ->Is32BitFloatingPoint()) {
       return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
     } else {
       return ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0);
@@ -398,7 +401,8 @@
   Clobber(rs_rF13);
   Clobber(rs_rF14);
   Clobber(rs_rF15);
-  if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
+  if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+      ->Is32BitFloatingPoint()) {
     Clobber(rs_rD0_fr0);
     Clobber(rs_rD1_fr0);
     Clobber(rs_rD2_fr0);
@@ -449,7 +453,7 @@
 }
 
 bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) {
-  if (cu_->GetInstructionSetFeatures()->IsSmp()) {
+  if (cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
     NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
     return true;
   } else {
@@ -459,7 +463,8 @@
 
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
   const bool fpu_is_32bit =
-      cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint();
+      cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+      ->Is32BitFloatingPoint();
   reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
                                             sp_regs,
                                             fpu_is_32bit ? dp_fr0_regs : dp_fr1_regs,
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 15fc69d..6f6bf68 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -17,8 +17,10 @@
 #include "codegen_mips.h"
 
 #include "arch/mips/instruction_set_features_mips.h"
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
 #include "mips_lir.h"
 
 namespace art {
@@ -306,7 +308,7 @@
     case kOpXor:
       return OpRegRegReg(op, r_dest_src1, r_dest_src1, r_src2);
     case kOp2Byte:
-      if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+      if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
           ->IsMipsIsaRevGreaterThanEqual2()) {
         res = NewLIR2(kMipsSeb, r_dest_src1.GetReg(), r_src2.GetReg());
       } else {
@@ -315,7 +317,7 @@
       }
       return res;
     case kOp2Short:
-      if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+      if (cu_->compiler_driver->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
           ->IsMipsIsaRevGreaterThanEqual2()) {
         res = NewLIR2(kMipsSeh, r_dest_src1.GetReg(), r_src2.GetReg());
       } else {
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 144790e..280dbbe 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -19,7 +19,8 @@
 
 #include "mir_to_lir.h"
 
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 
 namespace art {
 
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1ff64c9..274e078 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "dex/compiler_internals.h"
+#include "mir_to_lir-inl.h"
+
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
-#include "mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "primitive.h"
 #include "thread-inl.h"
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index fabf941..64ecf94 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -17,15 +17,14 @@
 #ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 #define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 
-#include "arch/instruction_set.h"
 #include "compiled_method.h"
 #include "dex/compiler_enums.h"
-#include "dex/compiler_ir.h"
+#include "dex/dex_flags.h"
+#include "dex/dex_types.h"
 #include "dex/reg_location.h"
 #include "dex/reg_storage.h"
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
-#include "driver/compiler_driver.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "invoke_type.h"
 #include "leb128.h"
@@ -125,10 +124,12 @@
 #define REG_USE23            (REG_USE2 | REG_USE3)
 #define REG_USE123           (REG_USE1 | REG_USE2 | REG_USE3)
 
-// TODO: #includes need a cleanup
-#ifndef INVALID_SREG
-#define INVALID_SREG (-1)
-#endif
+/*
+ * Assembly is an iterative process, and usually terminates within
+ * two or three passes.  This should be high enough to handle bizarre
+ * cases, but detect an infinite loop bug.
+ */
+#define MAX_ASSEMBLER_RETRIES 50
 
 class BasicBlock;
 struct CallInfo;
@@ -140,7 +141,6 @@
 class DexFileMethodInliner;
 class MIRGraph;
 class MirMethodLoweringInfo;
-class Mir2Lir;
 
 typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
                             const MethodReference& target_method,
@@ -148,6 +148,7 @@
                             uintptr_t direct_method, InvokeType type);
 
 typedef std::vector<uint8_t> CodeBuffer;
+typedef uint32_t CodeOffset;           // Native code offset in bytes.
 
 struct UseDefMasks {
   const ResourceMask* use_mask;        // Resource mask for use.
@@ -200,13 +201,6 @@
 // Mask to denote sreg as the start of a 64-bit item.  Must not interfere with low 16 bits.
 #define STARTING_WIDE_SREG 0x10000
 
-// TODO: replace these macros
-#define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath))
-#define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath))
-#define SLOW_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowStringPath))
-#define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
-#define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
-
 class Mir2Lir : public Backend {
   public:
     static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 102ce17..11808ad 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -18,16 +18,26 @@
 
 #include <cstdint>
 
+#include "base/dumpable.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/timing_logger.h"
 #include "compiler.h"
 #include "dex_file-inl.h"
-#include "dex/frontend.h"
+#include "dex_file_to_method_inliner_map.h"
+#include "dex/backend.h"
+#include "dex/compiler_ir.h"
+#include "dex/dex_flags.h"
 #include "dex/mir_graph.h"
+#include "dex/pass_driver_me_opts.h"
 #include "dex/quick/mir_to_lir.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "elf_writer_quick.h"
 #include "jni/quick/jni_compiler.h"
 #include "mirror/art_method-inl.h"
-#include "base/logging.h"
+#include "mirror/object.h"
+#include "runtime.h"
 
 // Specific compiler backends.
 #include "dex/quick/arm/backend_arm.h"
@@ -37,7 +47,7 @@
 
 namespace art {
 
-class QuickCompiler : public Compiler {
+class QuickCompiler FINAL : public Compiler {
  public:
   explicit QuickCompiler(CompilerDriver* driver) : Compiler(driver, 100) {}
 
@@ -582,6 +592,47 @@
   CHECK(GetCompilerDriver()->GetCompilerContext() == nullptr);
 }
 
+/* Default optimizer/debug setting for the compiler. */
+static uint32_t kCompilerOptimizerDisableFlags = 0 |  // Disable specific optimizations
+  // (1 << kLoadStoreElimination) |
+  // (1 << kLoadHoisting) |
+  // (1 << kSuppressLoads) |
+  // (1 << kNullCheckElimination) |
+  // (1 << kClassInitCheckElimination) |
+  // (1 << kGlobalValueNumbering) |
+  // (1 << kLocalValueNumbering) |
+  // (1 << kPromoteRegs) |
+  // (1 << kTrackLiveTemps) |
+  // (1 << kSafeOptimizations) |
+  // (1 << kBBOpt) |
+  // (1 << kSuspendCheckElimination) |
+  // (1 << kMatch) |
+  // (1 << kPromoteCompilerTemps) |
+  // (1 << kSuppressExceptionEdges) |
+  // (1 << kSuppressMethodInlining) |
+  0;
+
+static uint32_t kCompilerDebugFlags = 0 |     // Enable debug/testing modes
+  // (1 << kDebugDisplayMissingTargets) |
+  // (1 << kDebugVerbose) |
+  // (1 << kDebugDumpCFG) |
+  // (1 << kDebugSlowFieldPath) |
+  // (1 << kDebugSlowInvokePath) |
+  // (1 << kDebugSlowStringPath) |
+  // (1 << kDebugSlowestFieldPath) |
+  // (1 << kDebugSlowestStringPath) |
+  // (1 << kDebugExerciseResolveMethod) |
+  // (1 << kDebugVerifyDataflow) |
+  // (1 << kDebugShowMemoryUsage) |
+  // (1 << kDebugShowNops) |
+  // (1 << kDebugCountOpcodes) |
+  // (1 << kDebugDumpCheckStats) |
+  // (1 << kDebugShowSummaryMemoryUsage) |
+  // (1 << kDebugShowFilterStats) |
+  // (1 << kDebugTimings) |
+  // (1 << kDebugCodegenDump) |
+  0;
+
 CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
                                        uint32_t access_flags,
                                        InvokeType invoke_type,
@@ -592,8 +643,160 @@
   // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use
   // build default.
   CompilerDriver* driver = GetCompilerDriver();
-  return CompileOneMethod(driver, this, code_item, access_flags, invoke_type, class_def_idx,
-                          method_idx, class_loader, dex_file, nullptr /* use thread llvm_info */);
+
+  VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
+  if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
+    return nullptr;
+  }
+
+  DCHECK(driver->GetCompilerOptions().IsCompilationEnabled());
+
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  InstructionSet instruction_set = driver->GetInstructionSet();
+  if (instruction_set == kArm) {
+    instruction_set = kThumb2;
+  }
+  CompilationUnit cu(driver->GetArenaPool(), instruction_set, driver, class_linker);
+
+  // TODO: Mips64 is not yet implemented.
+  CHECK((cu.instruction_set == kThumb2) ||
+        (cu.instruction_set == kArm64) ||
+        (cu.instruction_set == kX86) ||
+        (cu.instruction_set == kX86_64) ||
+        (cu.instruction_set == kMips));
+
+  // TODO: set this from command line
+  constexpr bool compiler_flip_match = false;
+  const std::string compiler_method_match = "";
+
+  bool use_match = !compiler_method_match.empty();
+  bool match = use_match && (compiler_flip_match ^
+      (PrettyMethod(method_idx, dex_file).find(compiler_method_match) != std::string::npos));
+  if (!use_match || match) {
+    cu.disable_opt = kCompilerOptimizerDisableFlags;
+    cu.enable_debug = kCompilerDebugFlags;
+    cu.verbose = VLOG_IS_ON(compiler) ||
+        (cu.enable_debug & (1 << kDebugVerbose));
+  }
+
+  if (driver->GetCompilerOptions().HasVerboseMethods()) {
+    cu.verbose = driver->GetCompilerOptions().IsVerboseMethod(PrettyMethod(method_idx, dex_file));
+  }
+
+  if (cu.verbose) {
+    cu.enable_debug |= (1 << kDebugCodegenDump);
+  }
+
+  /*
+   * TODO: rework handling of optimization and debug flags.  Should we split out
+   * MIR and backend flags?  Need command-line setting as well.
+   */
+
+  InitCompilationUnit(cu);
+
+  cu.StartTimingSplit("BuildMIRGraph");
+  cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
+
+  /*
+   * After creation of the MIR graph, also create the code generator.
+   * The reason we do this is that optimizations on the MIR graph may need to get information
+   * that is only available if a CG exists.
+   */
+  cu.cg.reset(GetCodeGenerator(&cu, nullptr));
+
+  /* Gathering opcode stats? */
+  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
+    cu.mir_graph->EnableOpcodeCounting();
+  }
+
+  /* Build the raw MIR graph */
+  cu.mir_graph->InlineMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx,
+                             class_loader, dex_file);
+
+  if (!CanCompileMethod(method_idx, dex_file, &cu)) {
+    VLOG(compiler)  << cu.instruction_set << ": Cannot compile method : "
+        << PrettyMethod(method_idx, dex_file);
+    cu.EndTiming();
+    return nullptr;
+  }
+
+  cu.NewTimingSplit("MIROpt:CheckFilters");
+  std::string skip_message;
+  if (cu.mir_graph->SkipCompilation(&skip_message)) {
+    VLOG(compiler) << cu.instruction_set << ": Skipping method : "
+        << PrettyMethod(method_idx, dex_file) << "  Reason = " << skip_message;
+    cu.EndTiming();
+    return nullptr;
+  }
+
+  /* Create the pass driver and launch it */
+  PassDriverMEOpts pass_driver(&cu);
+  pass_driver.Launch();
+
+  /* For non-leaf methods check if we should skip compilation when the profiler is enabled. */
+  if (cu.compiler_driver->ProfilePresent()
+      && !cu.mir_graph->MethodIsLeaf()
+      && cu.mir_graph->SkipCompilationByName(PrettyMethod(method_idx, dex_file))) {
+    cu.EndTiming();
+    return nullptr;
+  }
+
+  if (cu.enable_debug & (1 << kDebugDumpCheckStats)) {
+    cu.mir_graph->DumpCheckStats();
+  }
+
+  if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
+    cu.mir_graph->ShowOpcodeStats();
+  }
+
+  /* Reassociate sreg names with original Dalvik vreg names. */
+  cu.mir_graph->RemapRegLocations();
+
+  /* Free Arenas from the cu.arena_stack for reuse by the cu.arena in the codegen. */
+  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
+    if (cu.arena_stack.PeakBytesAllocated() > 1 * 1024 * 1024) {
+      MemStats stack_stats(cu.arena_stack.GetPeakStats());
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(stack_stats);
+    }
+  }
+  cu.arena_stack.Reset();
+
+  CompiledMethod* result = nullptr;
+
+  if (cu.mir_graph->PuntToInterpreter()) {
+    VLOG(compiler) << cu.instruction_set << ": Punted method to interpreter: "
+        << PrettyMethod(method_idx, dex_file);
+    cu.EndTiming();
+    return nullptr;
+  }
+
+  cu.cg->Materialize();
+
+  cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
+  result = cu.cg->GetCompiledMethod();
+  cu.NewTimingSplit("Cleanup");
+
+  if (result) {
+    VLOG(compiler) << cu.instruction_set << ": Compiled " << PrettyMethod(method_idx, dex_file);
+  } else {
+    VLOG(compiler) << cu.instruction_set << ": Deferred " << PrettyMethod(method_idx, dex_file);
+  }
+
+  if (cu.enable_debug & (1 << kDebugShowMemoryUsage)) {
+    if (cu.arena.BytesAllocated() > (1 * 1024 *1024)) {
+      MemStats mem_stats(cu.arena.GetMemStats());
+      LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+    }
+  }
+
+  if (cu.enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
+    LOG(INFO) << "MEMINFO " << cu.arena.BytesAllocated() << " " << cu.mir_graph->GetNumBlocks()
+                    << " " << PrettyMethod(method_idx, dex_file);
+  }
+
+  cu.EndTiming();
+  driver->GetTimingsLogger()->AddLogger(cu.timings);
+  return result;
 }
 
 CompiledMethod* QuickCompiler::JniCompile(uint32_t access_flags,
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 0a98c80..8efafb2 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -16,10 +16,13 @@
 
 /* This file contains register alloction support. */
 
-#include "dex/compiler_ir.h"
-#include "dex/compiler_internals.h"
 #include "mir_to_lir-inl.h"
 
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
+#include "driver/compiler_driver.h"
+#include "driver/dex_compilation_unit.h"
+
 namespace art {
 
 /*
diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc
index ca68f95..8a27ecb 100644
--- a/compiler/dex/quick/resource_mask.cc
+++ b/compiler/dex/quick/resource_mask.cc
@@ -18,6 +18,7 @@
 
 #include "resource_mask.h"
 
+#include "base/logging.h"
 #include "utils/arena_allocator.h"
 #include "utils.h"
 
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index ad2ed01..6f26b78 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -15,7 +15,10 @@
  */
 
 #include "codegen_x86.h"
-#include "dex/quick/mir_to_lir-inl.h"
+
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
+#include "dex/quick/mir_to_lir.h"
 #include "oat.h"
 #include "x86_lir.h"
 
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 544ac3b..aa0972f 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -17,7 +17,10 @@
 /* This file contains codegen for the X86 ISA */
 
 #include "codegen_x86.h"
+
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "driver/compiler_driver.h"
 #include "gc/accounting/card_table.h"
 #include "mirror/art_method.h"
 #include "mirror/object_array-inl.h"
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index c7d83dd..3815722 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -17,7 +17,9 @@
 #ifndef ART_COMPILER_DEX_QUICK_X86_CODEGEN_X86_H_
 #define ART_COMPILER_DEX_QUICK_X86_CODEGEN_X86_H_
 
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir.h"
 #include "x86_lir.h"
 
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 89c5648..d8616a7 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -15,6 +15,8 @@
  */
 
 #include "codegen_x86.h"
+
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "x86_lir.h"
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 03156dc..4fe7a43 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -17,6 +17,8 @@
 /* This file contains codegen for the X86 ISA */
 
 #include "codegen_x86.h"
+
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "mirror/art_method.h"
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 142acbc..bc64aad 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -14,16 +14,19 @@
  * limitations under the License.
  */
 
+#include "codegen_x86.h"
+
 #include <cstdarg>
 #include <inttypes.h>
 #include <string>
 
 #include "arch/instruction_set_features.h"
 #include "backend_x86.h"
-#include "codegen_x86.h"
-#include "dex/compiler_internals.h"
+#include "base/logging.h"
+#include "dex/compiler_ir.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
 #include "mirror/array-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/string.h"
@@ -596,7 +599,7 @@
 }
 
 bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
     return false;
   }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -2501,7 +2504,7 @@
     if (rl_idx.is_const) {
       LIR* comparison;
       range_check_branch = OpCmpMemImmBranch(
-          kCondUlt, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
+          kCondLs, RegStorage::InvalidReg(), rl_obj.reg, count_offset,
           mir_graph_->ConstantValue(rl_idx.orig_sreg), nullptr, &comparison);
       MarkPossibleNullPointerExceptionAfter(0, comparison);
     } else {
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 3b58698..893b98a 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -15,12 +15,15 @@
  */
 
 #include "codegen_x86.h"
+
+#include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
-#include "x86_lir.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/reg_storage_eq.h"
+#include "driver/compiler_driver.h"
+#include "x86_lir.h"
 
 namespace art {
 
@@ -509,7 +512,7 @@
     }
   }
   if (r_dest != r_src) {
-    if (false && op == kOpLsl && value >= 0 && value <= 3) {  // lea shift special case
+    if ((false) && op == kOpLsl && value >= 0 && value <= 3) {  // lea shift special case
       // TODO: fix bug in LEA encoding when disp == 0
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 3e0a852..bc4cb5a 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -17,7 +17,8 @@
 #ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
 #define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
 
-#include "dex/compiler_internals.h"
+#include "dex/reg_location.h"
+#include "dex/reg_storage.h"
 
 namespace art {
 
diff --git a/compiler/dex/reg_location.h b/compiler/dex/reg_location.h
index 38f59da..aa8ed46 100644
--- a/compiler/dex/reg_location.h
+++ b/compiler/dex/reg_location.h
@@ -21,6 +21,7 @@
 
 namespace art {
 
+static constexpr int16_t INVALID_SREG = -1;
 
 /*
  * Whereas a SSA name describes a definition of a Dalvik vreg, the RegLocation describes
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 3905649..6bd49de 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -15,7 +15,8 @@
  */
 
 #include "base/bit_vector-inl.h"
-#include "compiler_internals.h"
+#include "base/logging.h"
+#include "compiler_ir.h"
 #include "dataflow_iterator-inl.h"
 #include "utils/scoped_arena_containers.h"
 
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 4daed67..4ff173d 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -16,15 +16,14 @@
 
 #include "verification_results.h"
 
+#include "base/logging.h"
 #include "base/stl_util.h"
-#include "base/mutex.h"
 #include "base/mutex-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 #include "thread.h"
 #include "thread-inl.h"
 #include "verified_method.h"
-#include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 
 namespace art {
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 93e9a51..21e965d 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -23,20 +23,13 @@
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "dex_file.h"
-#include "dex_instruction.h"
 #include "dex_instruction-inl.h"
-#include "base/mutex.h"
-#include "base/mutex-inl.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/class.h"
 #include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
-#include "mirror/object.h"
 #include "mirror/object-inl.h"
+#include "utils.h"
 #include "verifier/dex_gc_map.h"
-#include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/reg_type-inl.h"
 #include "verifier/register_line-inl.h"
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index 62c4089..f70850a 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
-#include "compiler_internals.h"
+#include "base/logging.h"
+#include "base/stringprintf.h"
+#include "compiler_ir.h"
 #include "dex/dataflow_iterator-inl.h"
+#include "dex_flags.h"
 
 namespace art {
 
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 3a91b08..9948c82 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -19,7 +19,6 @@
 
 #include "compiler_driver.h"
 
-#include "dex/compiler_ir.h"
 #include "dex_compilation_unit.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index a86043c..2fca2e5 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "compiler.h"
 #include "dex_file.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_options.h"
 #include "invoke_type.h"
 #include "method_reference.h"
@@ -41,7 +42,6 @@
 #include "utils/dedupe_set.h"
 #include "utils/swap_space.h"
 #include "utils.h"
-#include "dex/verified_method.h"
 
 namespace art {
 
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index 986fc71..e6c8c18 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -18,7 +18,6 @@
 
 #include "base/stringprintf.h"
 #include "dex/compiler_ir.h"
-#include "dex/mir_graph.h"
 #include "utils.h"
 
 namespace art {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index f650ff2..b6aaab5 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -19,12 +19,19 @@
 #include "gvn.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "side_effects_analysis.h"
 #include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
 
 namespace art {
 
+static void RunGvn(HGraph* graph) {
+  SideEffectsAnalysis side_effects(graph);
+  side_effects.Run();
+  GVNOptimization(graph, side_effects).Run();
+}
+
 // if (i < 0) { array[i] = 1; // Can't eliminate. }
 // else if (i >= array.length) { array[i] = 1; // Can't eliminate. }
 // else { array[i] = 1; // Can eliminate. }
@@ -120,7 +127,7 @@
   block3->AddSuccessor(block4);  // False successor
 
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check2));
@@ -195,7 +202,7 @@
   block3->AddSuccessor(exit);
 
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -270,7 +277,7 @@
   block3->AddSuccessor(exit);
 
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -344,7 +351,7 @@
   exit->AddInstruction(new (&allocator) HExit());
 
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination(graph);
   bounds_check_elimination.Run();
   ASSERT_FALSE(IsRemoved(bounds_check5));
@@ -443,7 +450,7 @@
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -451,7 +458,7 @@
   // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -459,7 +466,7 @@
   // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
   bounds_check_elimination_with_initial_minus_1.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -467,7 +474,7 @@
   // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -476,7 +483,7 @@
   //   array[i] = 10; // Can't eliminate due to overflow concern. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
   bounds_check_elimination_with_increment_2.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -484,7 +491,7 @@
   // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
   bounds_check_elimination_with_increment_2_from_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -584,7 +591,7 @@
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -592,7 +599,7 @@
   // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -600,7 +607,7 @@
   // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
   bounds_check_elimination_with_initial_minus_1.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -608,7 +615,7 @@
   // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
   bounds_check_elimination_with_less_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -616,7 +623,7 @@
   // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
   bounds_check_elimination_increment_minus_2.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -703,7 +710,7 @@
   HInstruction* bounds_check = nullptr;
   HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -712,7 +719,7 @@
   // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -721,7 +728,7 @@
   // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -730,7 +737,7 @@
   // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
   graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_increment_8(graph);
   bounds_check_elimination_increment_8.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -831,7 +838,7 @@
   // HArrayLength which uses the null check as its input.
   graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
   bounds_check_elimination_after_gvn.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -839,7 +846,7 @@
   // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
   graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
   bounds_check_elimination_with_initial_1.Run();
   ASSERT_TRUE(IsRemoved(bounds_check));
@@ -847,7 +854,7 @@
   // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
   graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
   bounds_check_elimination_with_greater_than.Run();
   ASSERT_FALSE(IsRemoved(bounds_check));
@@ -1023,7 +1030,7 @@
   outer_body_add->AddSuccessor(outer_header);
 
   graph->BuildDominatorTree();
-  GlobalValueNumberer(&allocator, graph).Run();
+  RunGvn(graph);
   // gvn should remove the same bounds check.
   ASSERT_FALSE(IsRemoved(bounds_check1));
   ASSERT_FALSE(IsRemoved(bounds_check2));
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 824663a..0fe28e8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -455,7 +455,7 @@
   return Location();
 }
 
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs_[R1_R2] = true;
 
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index aa4fc8f..dfa4748 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -39,6 +39,31 @@
 
 namespace art {
 
+// Provide our own codegen, that ensures the C calling conventions
+// are preserved. Currently, ART and C do not match as R4 is caller-save
+// in ART, and callee-save in C. Alternatively, we could use or write
+// the stub that saves and restores all registers, but it is easier
+// to just overwrite the code generator.
+class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
+ public:
+  TestCodeGeneratorARM(HGraph* graph,
+                       const ArmInstructionSetFeatures& isa_features,
+                       const CompilerOptions& compiler_options)
+      : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
+    AddAllocatedRegister(Location::RegisterLocation(6));
+    AddAllocatedRegister(Location::RegisterLocation(7));
+  }
+
+  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
+    arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+    blocked_core_registers_[4] = true;
+    blocked_core_registers_[6] = false;
+    blocked_core_registers_[7] = false;
+    // Makes pair R6-R7 available.
+    blocked_register_pairs_[6 >> 1] = false;
+  }
+};
+
 class InternalCodeAllocator : public CodeAllocator {
  public:
   InternalCodeAllocator() : size_(0) { }
@@ -92,7 +117,7 @@
 
   std::unique_ptr<const ArmInstructionSetFeatures> features(
       ArmInstructionSetFeatures::FromCppDefines());
-  arm::CodeGeneratorARM codegenARM(graph, *features.get(), compiler_options);
+  TestCodeGeneratorARM codegenARM(graph, *features.get(), compiler_options);
   codegenARM.CompileBaseline(&allocator, true);
   if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
     Run(allocator, codegenARM, has_result, expected);
@@ -136,9 +161,9 @@
                              Expected expected) {
   CompilerOptions compiler_options;
   if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    arm::CodeGeneratorARM codegenARM(graph,
-                                     *ArmInstructionSetFeatures::FromCppDefines(),
-                                     compiler_options);
+    TestCodeGeneratorARM codegenARM(graph,
+                                    *ArmInstructionSetFeatures::FromCppDefines(),
+                                    compiler_options);
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kArm64) {
     arm64::CodeGeneratorARM64 codegenARM64(graph, compiler_options);
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4d74c4e..35c5269 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -146,7 +146,7 @@
   }
 
   // Ensure the uses of `instruction` are defined in a block of the graph.
-  for (HUseIterator<HInstruction> use_it(instruction->GetUses());
+  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
     const HInstructionList& list = use->IsPhi()
@@ -254,7 +254,7 @@
   super_type::VisitInstruction(instruction);
 
   // Ensure an instruction dominates all its uses.
-  for (HUseIterator<HInstruction> use_it(instruction->GetUses());
+  for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
     if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d7dcb4c..c606bd7 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -225,7 +225,7 @@
       HInstruction* instruction = it.Current();
       AddIndent();
       int bci = 0;
-      output_ << bci << " " << instruction->NumberOfUses()
+      output_ << bci << " " << instruction->ExpensiveComputeNumberOfUses()
               << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " ";
       PrintInstruction(instruction);
       output_ << kEndInstructionMarker << std::endl;
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 6e5f1bd..89bba2d 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -15,82 +15,251 @@
  */
 
 #include "gvn.h"
+#include "side_effects_analysis.h"
 
 namespace art {
 
-void GlobalValueNumberer::Run() {
-  ComputeSideEffects();
+/**
+ * A node in the collision list of a ValueSet. Encodes the instruction,
+ * the hash code, and the next node in the collision list.
+ */
+class ValueSetNode : public ArenaObject<kArenaAllocMisc> {
+ public:
+  ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next)
+      : instruction_(instruction), hash_code_(hash_code), next_(next) {}
 
+  size_t GetHashCode() const { return hash_code_; }
+  HInstruction* GetInstruction() const { return instruction_; }
+  ValueSetNode* GetNext() const { return next_; }
+  void SetNext(ValueSetNode* node) { next_ = node; }
+
+ private:
+  HInstruction* const instruction_;
+  const size_t hash_code_;
+  ValueSetNode* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(ValueSetNode);
+};
+
+/**
+ * A ValueSet holds instructions that can replace other instructions. It is updated
+ * through the `Add` method, and the `Kill` method. The `Kill` method removes
+ * instructions that are affected by the given side effect.
+ *
+ * The `Lookup` method returns an equivalent instruction to the given instruction
+ * if there is one in the set. In GVN, we would say those instructions have the
+ * same "number".
+ */
+class ValueSet : public ArenaObject<kArenaAllocMisc> {
+ public:
+  explicit ValueSet(ArenaAllocator* allocator)
+      : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) {
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      table_[i] = nullptr;
+    }
+  }
+
+  // Adds an instruction in the set.
+  void Add(HInstruction* instruction) {
+    DCHECK(Lookup(instruction) == nullptr);
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    if (table_[index] == nullptr) {
+      table_[index] = instruction;
+    } else {
+      collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_);
+    }
+    ++number_of_entries_;
+  }
+
+  // If in the set, returns an equivalent instruction to the given instruction. Returns
+  // null otherwise.
+  HInstruction* Lookup(HInstruction* instruction) const {
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    HInstruction* existing = table_[index];
+    if (existing != nullptr && existing->Equals(instruction)) {
+      return existing;
+    }
+
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      if (node->GetHashCode() == hash_code) {
+        existing = node->GetInstruction();
+        if (existing->Equals(instruction)) {
+          return existing;
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  // Returns whether `instruction` is in the set.
+  HInstruction* IdentityLookup(HInstruction* instruction) const {
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    HInstruction* existing = table_[index];
+    if (existing != nullptr && existing == instruction) {
+      return existing;
+    }
+
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      if (node->GetHashCode() == hash_code) {
+        existing = node->GetInstruction();
+        if (existing == instruction) {
+          return existing;
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  // Removes all instructions in the set that are affected by the given side effects.
+  void Kill(SideEffects side_effects) {
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      HInstruction* instruction = table_[i];
+      if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) {
+        table_[i] = nullptr;
+        --number_of_entries_;
+      }
+    }
+
+    for (ValueSetNode* current = collisions_, *previous = nullptr;
+         current != nullptr;
+         current = current->GetNext()) {
+      HInstruction* instruction = current->GetInstruction();
+      if (instruction->GetSideEffects().DependsOn(side_effects)) {
+        if (previous == nullptr) {
+          collisions_ = current->GetNext();
+        } else {
+          previous->SetNext(current->GetNext());
+        }
+        --number_of_entries_;
+      } else {
+        previous = current;
+      }
+    }
+  }
+
+  // Returns a copy of this set.
+  ValueSet* Copy() const {
+    ValueSet* copy = new (allocator_) ValueSet(allocator_);
+
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      copy->table_[i] = table_[i];
+    }
+
+    // Note that the order will be inverted in the copy. This is fine, as the order is not
+    // relevant for a ValueSet.
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      copy->collisions_ = new (allocator_) ValueSetNode(
+          node->GetInstruction(), node->GetHashCode(), copy->collisions_);
+    }
+
+    copy->number_of_entries_ = number_of_entries_;
+    return copy;
+  }
+
+  void Clear() {
+    number_of_entries_ = 0;
+    collisions_ = nullptr;
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      table_[i] = nullptr;
+    }
+  }
+
+  // Update this `ValueSet` by intersecting with instructions in `other`.
+  void IntersectionWith(ValueSet* other) {
+    if (IsEmpty()) {
+      return;
+    } else if (other->IsEmpty()) {
+      Clear();
+    } else {
+      for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+        if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) {
+          --number_of_entries_;
+          table_[i] = nullptr;
+        }
+      }
+      for (ValueSetNode* current = collisions_, *previous = nullptr;
+           current != nullptr;
+           current = current->GetNext()) {
+        if (other->IdentityLookup(current->GetInstruction()) == nullptr) {
+          if (previous == nullptr) {
+            collisions_ = current->GetNext();
+          } else {
+            previous->SetNext(current->GetNext());
+          }
+          --number_of_entries_;
+        } else {
+          previous = current;
+        }
+      }
+    }
+  }
+
+  bool IsEmpty() const { return number_of_entries_ == 0; }
+  size_t GetNumberOfEntries() const { return number_of_entries_; }
+
+ private:
+  static constexpr size_t kDefaultNumberOfEntries = 8;
+
+  ArenaAllocator* const allocator_;
+
+  // The number of entries in the set.
+  size_t number_of_entries_;
+
+  // The internal implementation of the set. It uses a combination of a hash code based
+  // fixed-size list, and a linked list to handle hash code collisions.
+  // TODO: Tune the fixed size list original size, and support growing it.
+  ValueSetNode* collisions_;
+  HInstruction* table_[kDefaultNumberOfEntries];
+
+  DISALLOW_COPY_AND_ASSIGN(ValueSet);
+};
+
+/**
+ * Optimization phase that removes redundant instruction.
+ */
+class GlobalValueNumberer : public ValueObject {
+ public:
+  GlobalValueNumberer(ArenaAllocator* allocator,
+                      HGraph* graph,
+                      const SideEffectsAnalysis& side_effects)
+      : graph_(graph),
+        allocator_(allocator),
+        side_effects_(side_effects),
+        sets_(allocator, graph->GetBlocks().Size(), nullptr) {}
+
+  void Run();
+
+ private:
+  // Per-block GVN. Will also update the ValueSet of the dominated and
+  // successor blocks.
+  void VisitBasicBlock(HBasicBlock* block);
+
+  HGraph* graph_;
+  ArenaAllocator* const allocator_;
+  const SideEffectsAnalysis& side_effects_;
+
+  // ValueSet for blocks. Initially null, but for an individual block they
+  // are allocated and populated by the dominator, and updated by all blocks
+  // in the path from the dominator to the block.
+  GrowableArray<ValueSet*> sets_;
+
+  DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
+};
+
+void GlobalValueNumberer::Run() {
+  DCHECK(side_effects_.HasRun());
   sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_));
 
-  // Do reverse post order to ensure the non back-edge predecessors of a block are
+  // Use the reverse post order to ensure the non back-edge predecessors of a block are
   // visited before the block itself.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     VisitBasicBlock(it.Current());
   }
 }
 
-void GlobalValueNumberer::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
-  int id = info->GetHeader()->GetBlockId();
-  loop_effects_.Put(id, loop_effects_.Get(id).Union(effects));
-}
-
-void GlobalValueNumberer::ComputeSideEffects() {
-  if (kIsDebugBuild) {
-    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
-      SideEffects effects = GetBlockEffects(block);
-      DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
-      if (block->IsLoopHeader()) {
-        effects = GetLoopEffects(block);
-        DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
-      }
-    }
-  }
-
-  // Do a post order visit to ensure we visit a loop header after its loop body.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-
-    SideEffects effects = SideEffects::None();
-    // Update `effects` with the side effects of all instructions in this block.
-    for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
-         inst_it.Advance()) {
-      HInstruction* instruction = inst_it.Current();
-      effects = effects.Union(instruction->GetSideEffects());
-      if (effects.HasAllSideEffects()) {
-        break;
-      }
-    }
-
-    block_effects_.Put(block->GetBlockId(), effects);
-
-    if (block->IsLoopHeader()) {
-      // The side effects of the loop header are part of the loop.
-      UpdateLoopEffects(block->GetLoopInformation(), effects);
-      HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
-      if (pre_header->IsInLoop()) {
-        // Update the side effects of the outer loop with the side effects of the inner loop.
-        // Note that this works because we know all the blocks of the inner loop are visited
-        // before the loop header of the outer loop.
-        UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block));
-      }
-    } else if (block->IsInLoop()) {
-      // Update the side effects of the loop with the side effects of this block.
-      UpdateLoopEffects(block->GetLoopInformation(), effects);
-    }
-  }
-}
-
-SideEffects GlobalValueNumberer::GetLoopEffects(HBasicBlock* block) const {
-  DCHECK(block->IsLoopHeader());
-  return loop_effects_.Get(block->GetBlockId());
-}
-
-SideEffects GlobalValueNumberer::GetBlockEffects(HBasicBlock* block) const {
-  return block_effects_.Get(block->GetBlockId());
-}
-
 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
   ValueSet* set = nullptr;
   const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
@@ -110,7 +279,7 @@
     if (!set->IsEmpty()) {
       if (block->IsLoopHeader()) {
         DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
-        set->Kill(GetLoopEffects(block));
+        set->Kill(side_effects_.GetLoopEffects(block));
       } else if (predecessors.Size() > 1) {
         for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
           set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId()));
@@ -142,4 +311,9 @@
   }
 }
 
+void GVNOptimization::Run() {
+  GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_);
+  gvn.Run();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index 81f2c3f..57e0487 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -22,272 +22,18 @@
 
 namespace art {
 
-/**
- * A node in the collision list of a ValueSet. Encodes the instruction,
- * the hash code, and the next node in the collision list.
- */
-class ValueSetNode : public ArenaObject<kArenaAllocMisc> {
- public:
-  ValueSetNode(HInstruction* instruction, size_t hash_code, ValueSetNode* next)
-      : instruction_(instruction), hash_code_(hash_code), next_(next) {}
-
-  size_t GetHashCode() const { return hash_code_; }
-  HInstruction* GetInstruction() const { return instruction_; }
-  ValueSetNode* GetNext() const { return next_; }
-  void SetNext(ValueSetNode* node) { next_ = node; }
-
- private:
-  HInstruction* const instruction_;
-  const size_t hash_code_;
-  ValueSetNode* next_;
-
-  DISALLOW_COPY_AND_ASSIGN(ValueSetNode);
-};
-
-/**
- * A ValueSet holds instructions that can replace other instructions. It is updated
- * through the `Add` method, and the `Kill` method. The `Kill` method removes
- * instructions that are affected by the given side effect.
- *
- * The `Lookup` method returns an equivalent instruction to the given instruction
- * if there is one in the set. In GVN, we would say those instructions have the
- * same "number".
- */
-class ValueSet : public ArenaObject<kArenaAllocMisc> {
- public:
-  explicit ValueSet(ArenaAllocator* allocator)
-      : allocator_(allocator), number_of_entries_(0), collisions_(nullptr) {
-    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
-      table_[i] = nullptr;
-    }
-  }
-
-  // Adds an instruction in the set.
-  void Add(HInstruction* instruction) {
-    DCHECK(Lookup(instruction) == nullptr);
-    size_t hash_code = instruction->ComputeHashCode();
-    size_t index = hash_code % kDefaultNumberOfEntries;
-    if (table_[index] == nullptr) {
-      table_[index] = instruction;
-    } else {
-      collisions_ = new (allocator_) ValueSetNode(instruction, hash_code, collisions_);
-    }
-    ++number_of_entries_;
-  }
-
-  // If in the set, returns an equivalent instruction to the given instruction. Returns
-  // null otherwise.
-  HInstruction* Lookup(HInstruction* instruction) const {
-    size_t hash_code = instruction->ComputeHashCode();
-    size_t index = hash_code % kDefaultNumberOfEntries;
-    HInstruction* existing = table_[index];
-    if (existing != nullptr && existing->Equals(instruction)) {
-      return existing;
-    }
-
-    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
-      if (node->GetHashCode() == hash_code) {
-        existing = node->GetInstruction();
-        if (existing->Equals(instruction)) {
-          return existing;
-        }
-      }
-    }
-    return nullptr;
-  }
-
-  // Returns whether `instruction` is in the set.
-  HInstruction* IdentityLookup(HInstruction* instruction) const {
-    size_t hash_code = instruction->ComputeHashCode();
-    size_t index = hash_code % kDefaultNumberOfEntries;
-    HInstruction* existing = table_[index];
-    if (existing != nullptr && existing == instruction) {
-      return existing;
-    }
-
-    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
-      if (node->GetHashCode() == hash_code) {
-        existing = node->GetInstruction();
-        if (existing == instruction) {
-          return existing;
-        }
-      }
-    }
-    return nullptr;
-  }
-
-  // Removes all instructions in the set that are affected by the given side effects.
-  void Kill(SideEffects side_effects) {
-    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
-      HInstruction* instruction = table_[i];
-      if (instruction != nullptr && instruction->GetSideEffects().DependsOn(side_effects)) {
-        table_[i] = nullptr;
-        --number_of_entries_;
-      }
-    }
-
-    for (ValueSetNode* current = collisions_, *previous = nullptr;
-         current != nullptr;
-         current = current->GetNext()) {
-      HInstruction* instruction = current->GetInstruction();
-      if (instruction->GetSideEffects().DependsOn(side_effects)) {
-        if (previous == nullptr) {
-          collisions_ = current->GetNext();
-        } else {
-          previous->SetNext(current->GetNext());
-        }
-        --number_of_entries_;
-      } else {
-        previous = current;
-      }
-    }
-  }
-
-  // Returns a copy of this set.
-  ValueSet* Copy() const {
-    ValueSet* copy = new (allocator_) ValueSet(allocator_);
-
-    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
-      copy->table_[i] = table_[i];
-    }
-
-    // Note that the order will be inverted in the copy. This is fine, as the order is not
-    // relevant for a ValueSet.
-    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
-      copy->collisions_ = new (allocator_) ValueSetNode(
-          node->GetInstruction(), node->GetHashCode(), copy->collisions_);
-    }
-
-    copy->number_of_entries_ = number_of_entries_;
-    return copy;
-  }
-
-  void Clear() {
-    number_of_entries_ = 0;
-    collisions_ = nullptr;
-    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
-      table_[i] = nullptr;
-    }
-  }
-
-  // Update this `ValueSet` by intersecting with instructions in `other`.
-  void IntersectionWith(ValueSet* other) {
-    if (IsEmpty()) {
-      return;
-    } else if (other->IsEmpty()) {
-      Clear();
-    } else {
-      for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
-        if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) {
-          --number_of_entries_;
-          table_[i] = nullptr;
-        }
-      }
-      for (ValueSetNode* current = collisions_, *previous = nullptr;
-           current != nullptr;
-           current = current->GetNext()) {
-        if (other->IdentityLookup(current->GetInstruction()) == nullptr) {
-          if (previous == nullptr) {
-            collisions_ = current->GetNext();
-          } else {
-            previous->SetNext(current->GetNext());
-          }
-          --number_of_entries_;
-        } else {
-          previous = current;
-        }
-      }
-    }
-  }
-
-  bool IsEmpty() const { return number_of_entries_ == 0; }
-  size_t GetNumberOfEntries() const { return number_of_entries_; }
-
- private:
-  static constexpr size_t kDefaultNumberOfEntries = 8;
-
-  ArenaAllocator* const allocator_;
-
-  // The number of entries in the set.
-  size_t number_of_entries_;
-
-  // The internal implementation of the set. It uses a combination of a hash code based
-  // fixed-size list, and a linked list to handle hash code collisions.
-  // TODO: Tune the fixed size list original size, and support growing it.
-  ValueSetNode* collisions_;
-  HInstruction* table_[kDefaultNumberOfEntries];
-
-  DISALLOW_COPY_AND_ASSIGN(ValueSet);
-};
-
-/**
- * Optimization phase that removes redundant instruction.
- */
-class GlobalValueNumberer : public ValueObject {
- public:
-  GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph)
-      : graph_(graph),
-        allocator_(allocator),
-        block_effects_(allocator, graph->GetBlocks().Size()),
-        loop_effects_(allocator, graph->GetBlocks().Size()),
-        sets_(allocator, graph->GetBlocks().Size()) {
-    size_t number_of_blocks = graph->GetBlocks().Size();
-    block_effects_.SetSize(number_of_blocks);
-    loop_effects_.SetSize(number_of_blocks);
-    sets_.SetSize(number_of_blocks);
-
-    for (size_t i = 0; i < number_of_blocks; ++i) {
-      block_effects_.Put(i, SideEffects::None());
-      loop_effects_.Put(i, SideEffects::None());
-    }
-  }
-
-  void Run();
-
- private:
-  // Per-block GVN. Will also update the ValueSet of the dominated and
-  // successor blocks.
-  void VisitBasicBlock(HBasicBlock* block);
-
-  // Compute side effects of individual blocks and loops. The GVN algorithm
-  // will use these side effects to update the ValueSet of individual blocks.
-  void ComputeSideEffects();
-
-  void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
-  SideEffects GetLoopEffects(HBasicBlock* block) const;
-  SideEffects GetBlockEffects(HBasicBlock* block) const;
-
-  HGraph* graph_;
-
-  ArenaAllocator* const allocator_;
-
-  // Side effects of individual blocks, that is the union of the side effects
-  // of the instructions in the block.
-  GrowableArray<SideEffects> block_effects_;
-
-  // Side effects of loops, that is the union of the side effects of the
-  // blocks contained in that loop.
-  GrowableArray<SideEffects> loop_effects_;
-
-  // ValueSet for blocks. Initially null, but for an individual block they
-  // are allocated and populated by the dominator, and updated by all blocks
-  // in the path from the dominator to the block.
-  GrowableArray<ValueSet*> sets_;
-
-  ART_FRIEND_TEST(GVNTest, LoopSideEffects);
-  DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
-};
+class SideEffectsAnalysis;
 
 class GVNOptimization : public HOptimization {
  public:
-  explicit GVNOptimization(HGraph* graph) : HOptimization(graph, true, "GVN") {}
+  GVNOptimization(HGraph* graph, const SideEffectsAnalysis& side_effects)
+      : HOptimization(graph, true, "GVN"), side_effects_(side_effects) {}
 
-  void Run() OVERRIDE {
-    GlobalValueNumberer gvn(graph_->GetArena(), graph_);
-    gvn.Run();
-  }
+  void Run() OVERRIDE;
 
  private:
+  const SideEffectsAnalysis& side_effects_;
+
   DISALLOW_COPY_AND_ASSIGN(GVNOptimization);
 };
 
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 48f1ea9..4a48fee 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -18,6 +18,7 @@
 #include "gvn.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "side_effects_analysis.h"
 #include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
@@ -64,7 +65,9 @@
   ASSERT_EQ(use_after_kill->GetBlock(), block);
 
   graph->TryBuildingSsa();
-  GlobalValueNumberer(&allocator, graph).Run();
+  SideEffectsAnalysis side_effects(graph);
+  side_effects.Run();
+  GVNOptimization(graph, side_effects).Run();
 
   ASSERT_TRUE(to_remove->GetBlock() == nullptr);
   ASSERT_EQ(different_offset->GetBlock(), block);
@@ -116,7 +119,9 @@
   join->AddInstruction(new (&allocator) HExit());
 
   graph->TryBuildingSsa();
-  GlobalValueNumberer(&allocator, graph).Run();
+  SideEffectsAnalysis side_effects(graph);
+  side_effects.Run();
+  GVNOptimization(graph, side_effects).Run();
 
   // Check that all field get instructions have been GVN'ed.
   ASSERT_TRUE(then->GetFirstInstruction()->IsGoto());
@@ -184,7 +189,11 @@
   ASSERT_EQ(field_get_in_exit->GetBlock(), exit);
 
   graph->TryBuildingSsa();
-  GlobalValueNumberer(&allocator, graph).Run();
+  {
+    SideEffectsAnalysis side_effects(graph);
+    side_effects.Run();
+    GVNOptimization(graph, side_effects).Run();
+  }
 
   // Check that all field get instructions are still there.
   ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header);
@@ -195,7 +204,11 @@
 
   // Now remove the field set, and check that all field get instructions have been GVN'ed.
   loop_body->RemoveInstruction(field_set);
-  GlobalValueNumberer(&allocator, graph).Run();
+  {
+    SideEffectsAnalysis side_effects(graph);
+    side_effects.Run();
+    GVNOptimization(graph, side_effects).Run();
+  }
 
   ASSERT_TRUE(field_get_in_loop_header->GetBlock() == nullptr);
   ASSERT_TRUE(field_get_in_loop_body->GetBlock() == nullptr);
@@ -256,12 +269,12 @@
     entry->AddInstruction(new (&allocator) HInstanceFieldSet(
         parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false));
 
-    GlobalValueNumberer gvn(&allocator, graph);
-    gvn.Run();
+    SideEffectsAnalysis side_effects(graph);
+    side_effects.Run();
 
-    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_FALSE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_FALSE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
   }
 
   // Check that the side effects of the outer loop does not affect the inner loop.
@@ -271,13 +284,13 @@
             parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false),
         outer_loop_body->GetLastInstruction());
 
-    GlobalValueNumberer gvn(&allocator, graph);
-    gvn.Run();
+    SideEffectsAnalysis side_effects(graph);
+    side_effects.Run();
 
-    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_TRUE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects());
-    ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_FALSE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_FALSE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
   }
 
   // Check that the side effects of the inner loop affects the outer loop.
@@ -288,13 +301,13 @@
             parameter, parameter, Primitive::kPrimNot, MemberOffset(42), false),
         inner_loop_body->GetLastInstruction());
 
-    GlobalValueNumberer gvn(&allocator, graph);
-    gvn.Run();
+    SideEffectsAnalysis side_effects(graph);
+    side_effects.Run();
 
-    ASSERT_TRUE(gvn.GetBlockEffects(entry).HasSideEffects());
-    ASSERT_FALSE(gvn.GetBlockEffects(outer_loop_body).HasSideEffects());
-    ASSERT_TRUE(gvn.GetLoopEffects(outer_loop_header).HasSideEffects());
-    ASSERT_TRUE(gvn.GetLoopEffects(inner_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetBlockEffects(entry).HasSideEffects());
+    ASSERT_FALSE(side_effects.GetBlockEffects(outer_loop_body).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetLoopEffects(outer_loop_header).HasSideEffects());
+    ASSERT_TRUE(side_effects.GetLoopEffects(inner_loop_header).HasSideEffects());
   }
 }
 }  // namespace art
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index 3e4b83b..ac8759c 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -278,4 +278,55 @@
   }
 }
 
+TEST(LiveInterval, AddLoopRange) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    // Test when only used in a loop.
+    static constexpr size_t ranges[][2] = {{0, 4}};
+    LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator);
+    interval->AddLoopRange(0, 8);
+    LiveRange* range = interval->GetFirstRange();
+    ASSERT_TRUE(range->GetNext() == nullptr);
+    ASSERT_EQ(range->GetStart(), 0u);
+    ASSERT_EQ(range->GetEnd(), 8u);
+  }
+
+  {
+    // Test when only used in a loop.
+    static constexpr size_t ranges[][2] = {{2, 4}};
+    LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator);
+    interval->AddLoopRange(0, 8);
+    LiveRange* range = interval->GetFirstRange();
+    ASSERT_TRUE(range->GetNext() == nullptr);
+    ASSERT_EQ(range->GetStart(), 0u);
+    ASSERT_EQ(range->GetEnd(), 8u);
+  }
+
+  {
+    // Test when used just after the loop.
+    static constexpr size_t ranges[][2] = {{2, 4}, {8, 10}};
+    LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator);
+    interval->AddLoopRange(0, 8);
+    LiveRange* range = interval->GetFirstRange();
+    ASSERT_TRUE(range->GetNext() == nullptr);
+    ASSERT_EQ(range->GetStart(), 0u);
+    ASSERT_EQ(range->GetEnd(), 10u);
+  }
+
+  {
+    // Test when use after the loop is after a lifetime hole.
+    static constexpr size_t ranges[][2] = {{2, 4}, {10, 12}};
+    LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator);
+    interval->AddLoopRange(0, 8);
+    LiveRange* range = interval->GetFirstRange();
+    ASSERT_EQ(range->GetStart(), 0u);
+    ASSERT_EQ(range->GetEnd(), 8u);
+    range = range->GetNext();
+    ASSERT_EQ(range->GetStart(), 10u);
+    ASSERT_EQ(range->GetEnd(), 12u);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index ff23eda..2097ea6 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -432,7 +432,7 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   HPhi* phi = liveness.GetInstructionFromSsaIndex(4)->AsPhi();
-  ASSERT_EQ(phi->NumberOfUses(), 1u);
+  ASSERT_EQ(phi->ExpensiveComputeNumberOfUses(), 1u);
   interval = phi->GetLiveInterval();
   range = interval->GetFirstRange();
   ASSERT_EQ(26u, range->GetStart());
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ec53366..fe9ce74 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -39,9 +39,11 @@
   HEnvironment* environment = instruction->GetEnvironment();
   if (environment != nullptr) {
     for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-      HInstruction* vreg = environment->GetInstructionAt(i);
-      if (vreg != nullptr) {
-        vreg->RemoveEnvironmentUser(environment, i);
+      HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i);
+      if (vreg_env_use != nullptr) {
+        HInstruction* vreg = environment->GetInstructionAt(i);
+        DCHECK(vreg != nullptr);
+        vreg->RemoveEnvironmentUser(vreg_env_use);
       }
     }
   }
@@ -425,8 +427,8 @@
                    HBasicBlock* block,
                    HInstruction* instruction) {
   DCHECK_EQ(block, instruction->GetBlock());
-  DCHECK(instruction->GetUses() == nullptr);
-  DCHECK(instruction->GetEnvUses() == nullptr);
+  DCHECK(instruction->GetUses().IsEmpty());
+  DCHECK(instruction->GetEnvUses().IsEmpty());
   instruction->SetBlock(nullptr);
   instruction_list->RemoveInstruction(instruction);
 
@@ -441,22 +443,24 @@
   Remove(&phis_, this, phi);
 }
 
-template <typename T>
-static void RemoveFromUseList(T* user,
-                              size_t input_index,
-                              HUseListNode<T>** list) {
-  HUseListNode<T>* previous = nullptr;
-  HUseListNode<T>* current = *list;
-  while (current != nullptr) {
-    if (current->GetUser() == user && current->GetIndex() == input_index) {
-      if (previous == nullptr) {
-        *list = current->GetTail();
-      } else {
-        previous->SetTail(current->GetTail());
-      }
+void HEnvironment::CopyFrom(HEnvironment* env) {
+  for (size_t i = 0; i < env->Size(); i++) {
+    HInstruction* instruction = env->GetInstructionAt(i);
+    SetRawEnvAt(i, instruction);
+    if (instruction != nullptr) {
+      instruction->AddEnvUseAt(this, i);
     }
-    previous = current;
-    current = current->GetTail();
+  }
+}
+
+template <typename T>
+static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) {
+  HUseListNode<T>* current;
+  for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) {
+    current = use_it.Current();
+    if (current->GetUser() == user && current->GetIndex() == input_index) {
+      list->Remove(current);
+    }
   }
 }
 
@@ -480,8 +484,8 @@
   RemoveFromUseList(user, input_index, &uses_);
 }
 
-void HInstruction::RemoveEnvironmentUser(HEnvironment* user, size_t input_index) {
-  RemoveFromUseList(user, input_index, &env_uses_);
+void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) {
+  env_uses_.Remove(use);
 }
 
 void HInstructionList::AddInstruction(HInstruction* instruction) {
@@ -573,24 +577,24 @@
 
 void HInstruction::ReplaceWith(HInstruction* other) {
   DCHECK(other != nullptr);
-  for (HUseIterator<HInstruction> it(GetUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HInstruction>* current = it.Current();
+  for (HUseIterator<HInstruction*> it(GetUses()); !it.Done(); it.Advance()) {
+    HUseListNode<HInstruction*>* current = it.Current();
     HInstruction* user = current->GetUser();
     size_t input_index = current->GetIndex();
     user->SetRawInputAt(input_index, other);
     other->AddUseAt(user, input_index);
   }
 
-  for (HUseIterator<HEnvironment> it(GetEnvUses()); !it.Done(); it.Advance()) {
-    HUseListNode<HEnvironment>* current = it.Current();
+  for (HUseIterator<HEnvironment*> it(GetEnvUses()); !it.Done(); it.Advance()) {
+    HUseListNode<HEnvironment*>* current = it.Current();
     HEnvironment* user = current->GetUser();
     size_t input_index = current->GetIndex();
     user->SetRawEnvAt(input_index, other);
     other->AddEnvUseAt(user, input_index);
   }
 
-  uses_ = nullptr;
-  env_uses_ = nullptr;
+  uses_.Clear();
+  env_uses_.Clear();
 }
 
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e19bfce..cac78f6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -587,26 +587,104 @@
   }                                                                     \
   virtual void Accept(HGraphVisitor* visitor)
 
+template <typename T> class HUseList;
+
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocMisc> {
  public:
-  HUseListNode(T* user, size_t index, HUseListNode* tail)
-      : user_(user), index_(index), tail_(tail) {}
-
-  HUseListNode* GetTail() const { return tail_; }
-  T* GetUser() const { return user_; }
+  HUseListNode* GetPrevious() const { return prev_; }
+  HUseListNode* GetNext() const { return next_; }
+  T GetUser() const { return user_; }
   size_t GetIndex() const { return index_; }
 
-  void SetTail(HUseListNode<T>* node) { tail_ = node; }
-
  private:
-  T* const user_;
+  HUseListNode(T user, size_t index)
+      : user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
+
+  T const user_;
   const size_t index_;
-  HUseListNode<T>* tail_;
+  HUseListNode<T>* prev_;
+  HUseListNode<T>* next_;
+
+  friend class HUseList<T>;
 
   DISALLOW_COPY_AND_ASSIGN(HUseListNode);
 };
 
+template <typename T>
+class HUseList : public ValueObject {
+ public:
+  HUseList() : first_(nullptr) {}
+
+  void Clear() {
+    first_ = nullptr;
+  }
+
+  // Adds a new entry at the beginning of the use list and returns
+  // the newly created node.
+  HUseListNode<T>* AddUse(T user, size_t index, ArenaAllocator* arena) {
+    HUseListNode<T>* new_node = new(arena) HUseListNode<T>(user, index);
+    if (IsEmpty()) {
+      first_ = new_node;
+    } else {
+      first_->prev_ = new_node;
+      new_node->next_ = first_;
+      first_ = new_node;
+    }
+    return new_node;
+  }
+
+  HUseListNode<T>* GetFirst() const {
+    return first_;
+  }
+
+  void Remove(HUseListNode<T>* node) {
+    if (node->prev_ != nullptr) {
+      node->prev_->next_ = node->next_;
+    }
+    if (node->next_ != nullptr) {
+      node->next_->prev_ = node->prev_;
+    }
+    if (node == first_) {
+      first_ = node->next_;
+    }
+  }
+
+  bool IsEmpty() const {
+    return first_ == nullptr;
+  }
+
+  bool HasOnlyOneUse() const {
+    return first_ != nullptr && first_->next_ == nullptr;
+  }
+
+ private:
+  HUseListNode<T>* first_;
+};
+
+template<typename T>
+class HUseIterator : public ValueObject {
+ public:
+  explicit HUseIterator(const HUseList<T>& uses) : current_(uses.GetFirst()) {}
+
+  bool Done() const { return current_ == nullptr; }
+
+  void Advance() {
+    DCHECK(!Done());
+    current_ = current_->GetNext();
+  }
+
+  HUseListNode<T>* Current() const {
+    DCHECK(!Done());
+    return current_;
+  }
+
+ private:
+  HUseListNode<T>* current_;
+
+  friend class HValue;
+};
+
 // Represents the side effects an instruction may have.
 class SideEffects : public ValueObject {
  public:
@@ -670,6 +748,57 @@
   size_t flags_;
 };
 
+// A HEnvironment object contains the values of virtual registers at a given location.
+class HEnvironment : public ArenaObject<kArenaAllocMisc> {
+ public:
+  HEnvironment(ArenaAllocator* arena, size_t number_of_vregs)
+     : vregs_(arena, number_of_vregs) {
+    vregs_.SetSize(number_of_vregs);
+    for (size_t i = 0; i < number_of_vregs; i++) {
+      vregs_.Put(i, VRegInfo(nullptr, nullptr));
+    }
+  }
+
+  void CopyFrom(HEnvironment* env);
+
+  void SetRawEnvAt(size_t index, HInstruction* instruction) {
+    vregs_.Put(index, VRegInfo(instruction, nullptr));
+  }
+
+  // Record instructions' use entries of this environment for constant-time removal.
+  void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
+    DCHECK(env_use->GetUser() == this);
+    size_t index = env_use->GetIndex();
+    VRegInfo info = vregs_.Get(index);
+    DCHECK(info.vreg_ != nullptr);
+    DCHECK(info.node_ == nullptr);
+    vregs_.Put(index, VRegInfo(info.vreg_, env_use));
+  }
+
+  HInstruction* GetInstructionAt(size_t index) const {
+    return vregs_.Get(index).vreg_;
+  }
+
+  HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const {
+    return vregs_.Get(index).node_;
+  }
+
+  size_t Size() const { return vregs_.Size(); }
+
+ private:
+  struct VRegInfo {
+    HInstruction* vreg_;
+    HUseListNode<HEnvironment*>* node_;
+
+    VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use)
+        : vreg_(instruction), node_(env_use) {}
+  };
+
+  GrowableArray<VRegInfo> vregs_;
+
+  DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+};
+
 class HInstruction : public ArenaObject<kArenaAllocMisc> {
  public:
   explicit HInstruction(SideEffects side_effects)
@@ -678,8 +807,6 @@
         block_(nullptr),
         id_(-1),
         ssa_index_(-1),
-        uses_(nullptr),
-        env_uses_(nullptr),
         environment_(nullptr),
         locations_(nullptr),
         live_interval_(nullptr),
@@ -723,30 +850,29 @@
   virtual bool CanDoImplicitNullCheck() const { return false; }
 
   void AddUseAt(HInstruction* user, size_t index) {
-    uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HInstruction>(user, index, uses_);
+    uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
   }
 
   void AddEnvUseAt(HEnvironment* user, size_t index) {
     DCHECK(user != nullptr);
-    env_uses_ = new (block_->GetGraph()->GetArena()) HUseListNode<HEnvironment>(
-        user, index, env_uses_);
+    HUseListNode<HEnvironment*>* env_use =
+        env_uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+    user->RecordEnvUse(env_use);
   }
 
   void RemoveUser(HInstruction* user, size_t index);
-  void RemoveEnvironmentUser(HEnvironment* user, size_t index);
+  void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use);
 
-  HUseListNode<HInstruction>* GetUses() const { return uses_; }
-  HUseListNode<HEnvironment>* GetEnvUses() const { return env_uses_; }
+  HUseList<HInstruction*>& GetUses() { return uses_; }
+  HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; }
 
-  bool HasUses() const { return uses_ != nullptr || env_uses_ != nullptr; }
-  bool HasEnvironmentUses() const { return env_uses_ != nullptr; }
+  bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
+  bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
 
-  size_t NumberOfUses() const {
+  size_t ExpensiveComputeNumberOfUses() const {
     // TODO: Optimize this method if it is used outside of the HGraphVisualizer.
     size_t result = 0;
-    HUseListNode<HInstruction>* current = uses_;
-    while (current != nullptr) {
-      current = current->GetTail();
+    for (HUseIterator<HInstruction*> it(uses_); !it.Done(); it.Advance()) {
       ++result;
     }
     return result;
@@ -781,10 +907,6 @@
   // Insert `this` instruction in `cursor`'s graph, just before `cursor`.
   void InsertBefore(HInstruction* cursor);
 
-  bool HasOnlyOneUse() const {
-    return uses_ != nullptr && uses_->GetTail() == nullptr;
-  }
-
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -847,10 +969,10 @@
   int ssa_index_;
 
   // List of instructions that have this instruction as input.
-  HUseListNode<HInstruction>* uses_;
+  HUseList<HInstruction*> uses_;
 
   // List of environments that contain this instruction.
-  HUseListNode<HEnvironment>* env_uses_;
+  HUseList<HEnvironment*> env_uses_;
 
   // The environment associated with this instruction. Not null if the instruction
   // might jump out of the method.
@@ -876,69 +998,6 @@
 };
 std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs);
 
-template<typename T>
-class HUseIterator : public ValueObject {
- public:
-  explicit HUseIterator(HUseListNode<T>* uses) : current_(uses) {}
-
-  bool Done() const { return current_ == nullptr; }
-
-  void Advance() {
-    DCHECK(!Done());
-    current_ = current_->GetTail();
-  }
-
-  HUseListNode<T>* Current() const {
-    DCHECK(!Done());
-    return current_;
-  }
-
- private:
-  HUseListNode<T>* current_;
-
-  friend class HValue;
-};
-
-// A HEnvironment object contains the values of virtual registers at a given location.
-class HEnvironment : public ArenaObject<kArenaAllocMisc> {
- public:
-  HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) : vregs_(arena, number_of_vregs) {
-    vregs_.SetSize(number_of_vregs);
-    for (size_t i = 0; i < number_of_vregs; i++) {
-      vregs_.Put(i, nullptr);
-    }
-  }
-
-  void Populate(const GrowableArray<HInstruction*>& env) {
-    for (size_t i = 0; i < env.Size(); i++) {
-      HInstruction* instruction = env.Get(i);
-      vregs_.Put(i, instruction);
-      if (instruction != nullptr) {
-        instruction->AddEnvUseAt(this, i);
-      }
-    }
-  }
-
-  void SetRawEnvAt(size_t index, HInstruction* instruction) {
-    vregs_.Put(index, instruction);
-  }
-
-  HInstruction* GetInstructionAt(size_t index) const {
-    return vregs_.Get(index);
-  }
-
-  GrowableArray<HInstruction*>* GetVRegs() {
-    return &vregs_;
-  }
-
-  size_t Size() const { return vregs_.Size(); }
-
- private:
-  GrowableArray<HInstruction*> vregs_;
-
-  DISALLOW_COPY_AND_ASSIGN(HEnvironment);
-};
-
 class HInputIterator : public ValueObject {
  public:
   explicit HInputIterator(HInstruction* instruction) : instruction_(instruction), index_(0) {}
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 70dd8d7..cf90bf7 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -81,13 +81,13 @@
   entry->AddInstruction(new (&allocator) HExit());
 
   ASSERT_FALSE(parameter1->HasUses());
-  ASSERT_EQ(parameter1->NumberOfUses(), 0u);
+  ASSERT_EQ(parameter1->ExpensiveComputeNumberOfUses(), 0u);
 
   HInstruction* to_insert = new (&allocator) HNullCheck(parameter1, 0);
   entry->InsertInstructionBefore(to_insert, parameter2);
 
   ASSERT_TRUE(parameter1->HasUses());
-  ASSERT_EQ(parameter1->NumberOfUses(), 1u);
+  ASSERT_EQ(parameter1->ExpensiveComputeNumberOfUses(), 1u);
 }
 
 /**
@@ -105,13 +105,13 @@
   entry->AddInstruction(parameter);
 
   ASSERT_FALSE(parameter->HasUses());
-  ASSERT_EQ(parameter->NumberOfUses(), 0u);
+  ASSERT_EQ(parameter->ExpensiveComputeNumberOfUses(), 0u);
 
   HInstruction* to_add = new (&allocator) HNullCheck(parameter, 0);
   entry->AddInstruction(to_add);
 
   ASSERT_TRUE(parameter->HasUses());
-  ASSERT_EQ(parameter->NumberOfUses(), 1u);
+  ASSERT_EQ(parameter->ExpensiveComputeNumberOfUses(), 1u);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 5bca730..a590c43 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -39,6 +39,7 @@
 #include "nodes.h"
 #include "prepare_for_register_allocation.h"
 #include "register_allocator.h"
+#include "side_effects_analysis.h"
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
@@ -214,7 +215,8 @@
   HInliner inliner(graph, dex_compilation_unit, driver, stats);
 
   HConstantFolding fold2(graph);
-  GVNOptimization gvn(graph);
+  SideEffectsAnalysis side_effects(graph);
+  GVNOptimization gvn(graph, side_effects);
   BoundsCheckElimination bce(graph);
   InstructionSimplifier simplify2(graph);
 
@@ -229,6 +231,7 @@
     &simplify1,
     &inliner,
     &fold2,
+    &side_effects,
     &gvn,
     &bce,
     &simplify2
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 7186dbe..12acd08 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -55,11 +55,10 @@
 
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   bool needs_materialization = false;
-  if (!condition->HasOnlyOneUse()) {
+  if (!condition->GetUses().HasOnlyOneUse()) {
     needs_materialization = true;
   } else {
-    HUseListNode<HInstruction>* uses = condition->GetUses();
-    HInstruction* user = uses->GetUser();
+    HInstruction* user = condition->GetUses().GetFirst()->GetUser();
     if (!user->IsIf()) {
       needs_materialization = true;
     } else {
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 2c8166e..d2a21c8 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -55,7 +55,7 @@
     if (instruction->HasUses()) {
       PrintString(" [");
       bool first = true;
-      for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+      for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
         if (first) {
           first = false;
         } else {
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
new file mode 100644
index 0000000..96e1c8f
--- /dev/null
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "side_effects_analysis.h"
+
+namespace art {
+
+void SideEffectsAnalysis::Run() {
+  if (kIsDebugBuild) {
+    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      SideEffects effects = GetBlockEffects(block);
+      DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+      if (block->IsLoopHeader()) {
+        effects = GetLoopEffects(block);
+        DCHECK(!effects.HasSideEffects() && !effects.HasDependencies());
+      }
+    }
+  }
+
+  // Do a post order visit to ensure we visit a loop header after its loop body.
+  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+
+    SideEffects effects = SideEffects::None();
+    // Update `effects` with the side effects of all instructions in this block.
+    for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+         inst_it.Advance()) {
+      HInstruction* instruction = inst_it.Current();
+      effects = effects.Union(instruction->GetSideEffects());
+      if (effects.HasAllSideEffects()) {
+        break;
+      }
+    }
+
+    block_effects_.Put(block->GetBlockId(), effects);
+
+    if (block->IsLoopHeader()) {
+      // The side effects of the loop header are part of the loop.
+      UpdateLoopEffects(block->GetLoopInformation(), effects);
+      HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+      if (pre_header->IsInLoop()) {
+        // Update the side effects of the outer loop with the side effects of the inner loop.
+        // Note that this works because we know all the blocks of the inner loop are visited
+        // before the loop header of the outer loop.
+        UpdateLoopEffects(pre_header->GetLoopInformation(), GetLoopEffects(block));
+      }
+    } else if (block->IsInLoop()) {
+      // Update the side effects of the loop with the side effects of this block.
+      UpdateLoopEffects(block->GetLoopInformation(), effects);
+    }
+  }
+  has_run_ = true;
+}
+
+SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const {
+  DCHECK(block->IsLoopHeader());
+  return loop_effects_.Get(block->GetBlockId());
+}
+
+SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const {
+  return block_effects_.Get(block->GetBlockId());
+}
+
+void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
+  int id = info->GetHeader()->GetBlockId();
+  loop_effects_.Put(id, loop_effects_.Get(id).Union(effects));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
new file mode 100644
index 0000000..f1c98ac
--- /dev/null
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class SideEffectsAnalysis : public HOptimization {
+ public:
+  explicit SideEffectsAnalysis(HGraph* graph)
+      : HOptimization(graph, true, "SideEffects"),
+        graph_(graph),
+        block_effects_(graph->GetArena(), graph->GetBlocks().Size(), SideEffects::None()),
+        loop_effects_(graph->GetArena(), graph->GetBlocks().Size(), SideEffects::None()) {}
+
+  SideEffects GetLoopEffects(HBasicBlock* block) const;
+  SideEffects GetBlockEffects(HBasicBlock* block) const;
+
+  // Compute side effects of individual blocks and loops.
+  void Run();
+
+  bool HasRun() const { return has_run_; }
+
+ private:
+  void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
+
+  HGraph* graph_;
+
+  // Checked in debug build, to ensure the pass has been run prior to
+  // running a pass that depends on it.
+  bool has_run_ = false;
+
+  // Side effects of individual blocks, that is the union of the side effects
+  // of the instructions in the block.
+  GrowableArray<SideEffects> block_effects_;
+
+  // Side effects of loops, that is the union of the side effects of the
+  // blocks contained in that loop.
+  GrowableArray<SideEffects> loop_effects_;
+
+  ART_FRIEND_TEST(GVNTest, LoopSideEffects);
+  DISALLOW_COPY_AND_ASSIGN(SideEffectsAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index edfafcd..4f9c3b8 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -68,7 +68,7 @@
 }
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  return GetLocalsFor(block)->Get(local);
+  return GetLocalsFor(block)->GetInstructionAt(local);
 }
 
 void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -85,7 +85,7 @@
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
         block->AddPhi(phi);
-        current_locals_->Put(local, phi);
+        current_locals_->SetRawEnvAt(local, phi);
       }
     }
     // Save the loop header so that the last phase of the analysis knows which
@@ -125,7 +125,7 @@
         block->AddPhi(phi);
         value = phi;
       }
-      current_locals_->Put(local, value);
+      current_locals_->SetRawEnvAt(local, value);
     }
   }
 
@@ -235,7 +235,7 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
+  HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
   if (load->GetType() != value->GetType()
       && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
     // If the operation requests a specific type, we make sure its input is of that type.
@@ -246,7 +246,7 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+  current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
   store->GetBlock()->RemoveInstruction(store);
 }
 
@@ -256,7 +256,7 @@
   }
   HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
       GetGraph()->GetArena(), current_locals_->Size());
-  environment->Populate(*current_locals_);
+  environment->CopyFrom(current_locals_);
   instruction->SetEnvironment(environment);
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2cbd51a..2eec87b 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -36,14 +36,14 @@
 
   void BuildSsa();
 
-  GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+  HEnvironment* GetLocalsFor(HBasicBlock* block) {
     HEnvironment* env = locals_for_.Get(block->GetBlockId());
     if (env == nullptr) {
       env = new (GetGraph()->GetArena()) HEnvironment(
           GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
       locals_for_.Put(block->GetBlockId(), env);
     }
-    return env->GetVRegs();
+    return env;
   }
 
   HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -60,7 +60,7 @@
 
  private:
   // Locals for the current block being visited.
-  GrowableArray<HInstruction*>* current_locals_;
+  HEnvironment* current_locals_;
 
   // Keep track of loop headers found. The last phase of the analysis iterates
   // over these blocks to set the inputs of their phis.
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index d41157b..1b06315 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -232,9 +232,9 @@
 
       if (current->HasEnvironment()) {
         // All instructions in the environment must be live.
-        GrowableArray<HInstruction*>* environment = current->GetEnvironment()->GetVRegs();
+        HEnvironment* environment = current->GetEnvironment();
         for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-          HInstruction* instruction = environment->Get(i);
+          HInstruction* instruction = environment->GetInstructionAt(i);
           if (instruction != nullptr) {
             DCHECK(instruction->HasSsaIndex());
             live_in->SetBit(instruction->GetSsaIndex());
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index a123313..b0d3853 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -254,16 +254,28 @@
 
   void AddLoopRange(size_t start, size_t end) {
     DCHECK(first_range_ != nullptr);
-    while (first_range_ != nullptr && first_range_->GetEnd() < end) {
-      DCHECK_LE(start, first_range_->GetStart());
-      first_range_ = first_range_->GetNext();
+    DCHECK_LE(start, first_range_->GetStart());
+    // Find the range that covers the positions after the loop.
+    LiveRange* after_loop = first_range_;
+    LiveRange* last_in_loop = nullptr;
+    while (after_loop != nullptr && after_loop->GetEnd() < end) {
+      DCHECK_LE(start, after_loop->GetStart());
+      last_in_loop = after_loop;
+      after_loop = after_loop->GetNext();
     }
-    if (first_range_ == nullptr) {
+    if (after_loop == nullptr) {
       // Uses are only in the loop.
       first_range_ = last_range_ = new (allocator_) LiveRange(start, end, nullptr);
-    } else {
+    } else if (after_loop->GetStart() <= end) {
+      first_range_ = after_loop;
       // There are uses after the loop.
       first_range_->start_ = start;
+    } else {
+      // The use after the loop is after a lifetime hole.
+      DCHECK(last_in_loop != nullptr);
+      first_range_ = last_in_loop;
+      first_range_->start_ = start;
+      first_range_->end_ = end;
     }
   }
 
@@ -479,10 +491,11 @@
   void Dump(std::ostream& stream) const {
     stream << "ranges: { ";
     LiveRange* current = first_range_;
-    do {
+    while (current != nullptr) {
       current->Dump(stream);
       stream << " ";
-    } while ((current = current->GetNext()) != nullptr);
+      current = current->GetNext();
+    }
     stream << "}, uses: { ";
     UsePosition* use = first_use_;
     if (use != nullptr) {
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 58cea77..fd30c1b 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -26,8 +26,8 @@
       HPhi* phi = inst_it.Current()->AsPhi();
       // Set dead ahead of running through uses. The phi may have no use.
       phi->SetDead();
-      for (HUseIterator<HInstruction> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        HUseListNode<HInstruction>* current = use_it.Current();
+      for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
+        HUseListNode<HInstruction*>* current = use_it.Current();
         HInstruction* user = current->GetUser();
         if (!user->IsPhi()) {
           worklist_.Add(phi);
@@ -61,9 +61,9 @@
       next = current->GetNext();
       if (current->AsPhi()->IsDead()) {
         if (current->HasUses()) {
-          for (HUseIterator<HInstruction> use_it(current->GetUses()); !use_it.Done();
+          for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done();
                use_it.Advance()) {
-            HUseListNode<HInstruction>* user_node = use_it.Current();
+            HUseListNode<HInstruction*>* user_node = use_it.Current();
             HInstruction* user = user_node->GetUser();
             DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
             DCHECK(user->AsPhi()->IsDead()) << user->GetId();
@@ -73,12 +73,12 @@
           }
         }
         if (current->HasEnvironmentUses()) {
-          for (HUseIterator<HEnvironment> use_it(current->GetEnvUses()); !use_it.Done();
+          for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done();
                use_it.Advance()) {
-            HUseListNode<HEnvironment>* user_node = use_it.Current();
+            HUseListNode<HEnvironment*>* user_node = use_it.Current();
             HEnvironment* user = user_node->GetUser();
             user->SetRawEnvAt(user_node->GetIndex(), nullptr);
-            current->RemoveEnvironmentUser(user, user_node->GetIndex());
+            current->RemoveEnvironmentUser(user_node);
           }
         }
         block->RemovePhi(current->AsPhi());
@@ -132,8 +132,8 @@
       // Because we're updating the users of this phi, we may have new
       // phis candidate for elimination if this phi is in a loop. Add phis that
       // used this phi to the worklist.
-      for (HUseIterator<HInstruction> it(phi->GetUses()); !it.Done(); it.Advance()) {
-        HUseListNode<HInstruction>* current = it.Current();
+      for (HUseIterator<HInstruction*> it(phi->GetUses()); !it.Done(); it.Advance()) {
+        HUseListNode<HInstruction*>* current = it.Current();
         HInstruction* user = current->GetUser();
         if (user->IsPhi()) {
           worklist_.Add(user->AsPhi());
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
index cb5ce20..947427b 100644
--- a/compiler/optimizing/ssa_type_propagation.cc
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -114,7 +114,7 @@
 }
 
 void SsaTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
-  for (HUseIterator<HInstruction> it(instruction->GetUses()); !it.Done(); it.Advance()) {
+  for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->GetUser()->AsPhi();
     if (phi != nullptr) {
       AddToWorklist(phi);
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index fde65e7..6af4853 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -37,6 +37,17 @@
                                                  kArenaAllocGrowableArray));
     }
 
+    GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data)
+      : arena_(arena),
+        num_allocated_(init_length),
+        num_used_(init_length) {
+      elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
+                                                 kArenaAllocGrowableArray));
+      for (size_t i = 0; i < init_length; ++i) {
+        elem_list_[i] = initial_data;
+      }
+    }
+
 
     // Expand the list size to at least new length.
     void Resize(size_t new_length) {
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 9e8d282..c181e43 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -67,26 +67,18 @@
   }
 }
 
-bool ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
+void ArmContext::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+  DCHECK(IsAccessibleGPR(reg));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool ArmContext::SetFPR(uint32_t reg, uintptr_t value) {
+void ArmContext::SetFPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+  DCHECK(IsAccessibleFPR(reg));
   DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (fprs_[reg] != nullptr) {
-    *fprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *fprs_[reg] = value;
 }
 
 void ArmContext::SmashCallerSaves() {
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index e894f16..1ca973e 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -37,13 +37,16 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(SP, new_sp);
-    CHECK(success) << "Failed to set SP register";
+    SetGPR(SP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    bool success = SetGPR(PC, new_pc);
-    CHECK(success) << "Failed to set PC register";
+    SetGPR(PC, new_pc);
+  }
+
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+    return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
@@ -51,31 +54,26 @@
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
-    if (fprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *fprs_[reg];
-      return true;
-    }
+    return fprs_[reg] != nullptr;
   }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfSRegisters));
+    DCHECK(IsAccessibleFPR(reg));
+    return *fprs_[reg];
+  }
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 0a31480..7fc0555 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -70,27 +70,19 @@
   }
 }
 
-bool Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
+void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
   DCHECK_NE(reg, static_cast<uint32_t>(XZR));
+  DCHECK(IsAccessibleGPR(reg));
   DCHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool Arm64Context::SetFPR(uint32_t reg, uintptr_t value) {
+void Arm64Context::SetFPR(uint32_t reg, uintptr_t value) {
   DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+  DCHECK(IsAccessibleFPR(reg));
   DCHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (fprs_[reg] != nullptr) {
-    *fprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *fprs_[reg] = value;
 }
 
 void Arm64Context::SmashCallerSaves() {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index d9a433b..6a4485b 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -37,13 +37,16 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(SP, new_sp);
-    CHECK(success) << "Failed to set SP register";
+    SetGPR(SP, new_sp);
   }
 
   void SetPC(uintptr_t new_lr) OVERRIDE {
-    bool success = SetGPR(LR, new_lr);
-    CHECK(success) << "Failed to set LR register";
+    SetGPR(LR, new_lr);
+  }
+
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
+    return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
@@ -51,31 +54,26 @@
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
-    if (fprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *fprs_[reg];
-      return true;
-    }
+    return fprs_[reg] != nullptr;
   }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfDRegisters));
+    DCHECK(IsAccessibleFPR(reg));
+    return *fprs_[reg];
+  }
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 20a84dd..ed8cab0 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -49,24 +49,30 @@
   // Sets the program counter value.
   virtual void SetPC(uintptr_t new_pc) = 0;
 
+  // Returns whether the given GPR is accessible (read or write).
+  virtual bool IsAccessibleGPR(uint32_t reg) = 0;
+
   // Gets the given GPRs address.
   virtual uintptr_t* GetGPRAddress(uint32_t reg) = 0;
 
-  // Reads the given GPR. Returns true if we successfully read the register and
-  // set its value into 'val', returns false otherwise.
-  virtual bool GetGPR(uint32_t reg, uintptr_t* val) = 0;
+  // Reads the given GPR. The caller is responsible for checking the register
+  // is accessible with IsAccessibleGPR.
+  virtual uintptr_t GetGPR(uint32_t reg) = 0;
 
-  // Sets the given GPR. Returns true if we successfully write the given value
-  // into the register, returns false otherwise.
-  virtual bool SetGPR(uint32_t reg, uintptr_t value) = 0;
+  // Sets the given GPR. The caller is responsible for checking the register
+  // is accessible with IsAccessibleGPR.
+  virtual void SetGPR(uint32_t reg, uintptr_t value) = 0;
 
-  // Reads the given FPR. Returns true if we successfully read the register and
-  // set its value into 'val', returns false otherwise.
-  virtual bool GetFPR(uint32_t reg, uintptr_t* val) = 0;
+  // Returns whether the given FPR is accessible (read or write).
+  virtual bool IsAccessibleFPR(uint32_t reg) = 0;
 
-  // Sets the given FPR. Returns true if we successfully write the given value
-  // into the register, returns false otherwise.
-  virtual bool SetFPR(uint32_t reg, uintptr_t value) = 0;
+  // Reads the given FPR. The caller is responsible for checking the register
+  // is accessible with IsAccessibleFPR.
+  virtual uintptr_t GetFPR(uint32_t reg) = 0;
+
+  // Sets the given FPR. The caller is responsible for checking the register
+  // is accessible with IsAccessibleFPR.
+  virtual void SetFPR(uint32_t reg, uintptr_t value) = 0;
 
   // Smashes the caller save registers. If we're throwing, we don't want to return bogus values.
   virtual void SmashCallerSaves() = 0;
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index e1f6c06..6c0ab98 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -67,26 +67,18 @@
   }
 }
 
-bool MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
+void MipsContext::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+  DCHECK(IsAccessibleGPR(reg));
   CHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool MipsContext::SetFPR(uint32_t reg, uintptr_t value) {
+void MipsContext::SetFPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+  DCHECK(IsAccessibleFPR(reg));
   CHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (fprs_[reg] != nullptr) {
-    *fprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *fprs_[reg] = value;
 }
 
 void MipsContext::SmashCallerSaves() {
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index f2ee335..d8a0b67 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -36,13 +36,16 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(SP, new_sp);
-    CHECK(success) << "Failed to set SP register";
+    SetGPR(SP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    bool success = SetGPR(RA, new_pc);
-    CHECK(success) << "Failed to set RA register";
+    SetGPR(RA, new_pc);
+  }
+
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
+    return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
@@ -50,31 +53,26 @@
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
-    if (fprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *fprs_[reg];
-      return true;
-    }
+    return fprs_[reg] != nullptr;
   }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFRegisters));
+    DCHECK(IsAccessibleFPR(reg));
+    return *fprs_[reg];
+  }
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index 7523ade..1c96bd4 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -67,26 +67,18 @@
   }
 }
 
-bool Mips64Context::SetGPR(uint32_t reg, uintptr_t value) {
+void Mips64Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfGpuRegisters));
+  DCHECK(IsAccessibleGPR(reg));
   CHECK_NE(gprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool Mips64Context::SetFPR(uint32_t reg, uintptr_t value) {
+void Mips64Context::SetFPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFpuRegisters));
+  DCHECK(IsAccessibleFPR(reg));
   CHECK_NE(fprs_[reg], &gZero);  // Can't overwrite this static value since they are never reset.
-  if (fprs_[reg] != nullptr) {
-    *fprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *fprs_[reg] = value;
 }
 
 void Mips64Context::SmashCallerSaves() {
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 4ba5f13..1046723 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -36,13 +36,16 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(SP, new_sp);
-    CHECK(success) << "Failed to set SP register";
+    SetGPR(SP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
-    bool success = SetGPR(RA, new_pc);
-    CHECK(success) << "Failed to set RA register";
+    SetGPR(RA, new_pc);
+  }
+
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfGpuRegisters));
+    return gprs_[reg] != nullptr;
   }
 
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
@@ -50,31 +53,26 @@
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfGpuRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
     CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFpuRegisters));
-    if (fprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *fprs_[reg];
-      return true;
-    }
+    return fprs_[reg] != nullptr;
   }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
+    CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFpuRegisters));
+    DCHECK(IsAccessibleFPR(reg));
+    return *fprs_[reg];
+  }
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 49aa326..2a6ff14 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -61,23 +61,19 @@
   gprs_[EBX] = nullptr;
 }
 
-bool X86Context::SetGPR(uint32_t reg, uintptr_t value) {
+void X86Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+  DCHECK(IsAccessibleGPR(reg));
   CHECK_NE(gprs_[reg], &gZero);
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool X86Context::GetFPR(uint32_t reg ATTRIBUTE_UNUSED, uintptr_t* val ATTRIBUTE_UNUSED) {
+uintptr_t X86Context::GetFPR(uint32_t reg ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Floating-point registers are all caller save in X86";
   UNREACHABLE();
 }
 
-bool X86Context::SetFPR(uint32_t reg ATTRIBUTE_UNUSED, uintptr_t value ATTRIBUTE_UNUSED) {
+void X86Context::SetFPR(uint32_t reg ATTRIBUTE_UNUSED, uintptr_t value ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Floating-point registers are all caller save in X86";
   UNREACHABLE();
 }
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 01c8b82..8b7804d 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -36,35 +36,38 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(ESP, new_sp);
-    CHECK(success) << "Failed to set ESP register";
+    SetGPR(ESP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
     eip_ = new_pc;
   }
 
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+    return gprs_[reg] != nullptr;
+  }
+
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE;
+  bool IsAccessibleFPR(uint32_t reg ATTRIBUTE_UNUSED) OVERRIDE {
+    return false;
+  }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE;
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 6e9b99c..cdc2ec7 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -91,26 +91,18 @@
   fprs_[XMM11] = nullptr;
 }
 
-bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+  DCHECK(IsAccessibleGPR(reg));
   CHECK_NE(gprs_[reg], &gZero);
-  if (gprs_[reg] != nullptr) {
-    *gprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *gprs_[reg] = value;
 }
 
-bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
+void X86_64Context::SetFPR(uint32_t reg, uintptr_t value) {
   CHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+  DCHECK(IsAccessibleFPR(reg));
   CHECK_NE(fprs_[reg], reinterpret_cast<const uint64_t*>(&gZero));
-  if (fprs_[reg] != nullptr) {
-    *fprs_[reg] = value;
-    return true;
-  } else {
-    return false;
-  }
+  *fprs_[reg] = value;
 }
 
 extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*);
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index 902c3b9..0dda06e 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -36,44 +36,43 @@
   void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
-    bool success = SetGPR(RSP, new_sp);
-    CHECK(success) << "Failed to set RSP register";
+    SetGPR(RSP, new_sp);
   }
 
   void SetPC(uintptr_t new_pc) OVERRIDE {
     rip_ = new_pc;
   }
 
+  bool IsAccessibleGPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+    return gprs_[reg] != nullptr;
+  }
+
   uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
     return gprs_[reg];
   }
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  uintptr_t GetGPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
-    if (gprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *gprs_[reg];
-      return true;
-    }
+    DCHECK(IsAccessibleGPR(reg));
+    return *gprs_[reg];
   }
 
-  bool SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  void SetGPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
-  bool GetFPR(uint32_t reg, uintptr_t* val) OVERRIDE {
+  bool IsAccessibleFPR(uint32_t reg) OVERRIDE {
     DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
-    if (fprs_[reg] == nullptr) {
-      return false;
-    } else {
-      DCHECK(val != nullptr);
-      *val = *fprs_[reg];
-      return true;
-    }
+    return fprs_[reg] != nullptr;
   }
 
-  bool SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
+  uintptr_t GetFPR(uint32_t reg) OVERRIDE {
+    DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfFloatRegisters));
+    DCHECK(IsAccessibleFPR(reg));
+    return *fprs_[reg];
+  }
+
+  void SetFPR(uint32_t reg, uintptr_t value) OVERRIDE;
 
   void SmashCallerSaves() OVERRIDE;
   void DoLongJump() OVERRIDE;
diff --git a/runtime/base/dumpable.h b/runtime/base/dumpable.h
index 3c316cc..9bc4089 100644
--- a/runtime/base/dumpable.h
+++ b/runtime/base/dumpable.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_BASE_DUMPABLE_H_
 #define ART_RUNTIME_BASE_DUMPABLE_H_
 
+#include <ostream>
+
 #include "base/macros.h"
 
 namespace art {
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1cbaf39..d89ad5e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -365,19 +365,17 @@
 }
 
 void SingleStepControl::VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info) {
-  if (method != nullptr) {
-    callback(reinterpret_cast<mirror::Object**>(&method), arg, root_info);
+  if (method_ != nullptr) {
+    callback(reinterpret_cast<mirror::Object**>(&method_), arg, root_info);
   }
 }
 
-bool SingleStepControl::ContainsDexPc(uint32_t dex_pc) const {
-  return dex_pcs.find(dex_pc) == dex_pcs.end();
+void SingleStepControl::AddDexPc(uint32_t dex_pc) {
+  dex_pcs_.insert(dex_pc);
 }
 
-void SingleStepControl::Clear() {
-  is_active = false;
-  method = nullptr;
-  dex_pcs.clear();
+bool SingleStepControl::ContainsDexPc(uint32_t dex_pc) const {
+  return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
 static bool IsBreakpoint(const mirror::ArtMethod* m, uint32_t dex_pc)
@@ -2918,24 +2916,23 @@
   // If the debugger is single-stepping one of our threads, check to
   // see if we're that thread and we've reached a step point.
   const SingleStepControl* single_step_control = thread->GetSingleStepControl();
-  DCHECK(single_step_control != nullptr);
-  if (single_step_control->is_active) {
+  if (single_step_control != nullptr) {
     CHECK(!m->IsNative());
-    if (single_step_control->step_depth == JDWP::SD_INTO) {
+    if (single_step_control->GetStepDepth() == JDWP::SD_INTO) {
       // Step into method calls.  We break when the line number
       // or method pointer changes.  If we're in SS_MIN mode, we
       // always stop.
-      if (single_step_control->method != m) {
+      if (single_step_control->GetMethod() != m) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new method";
-      } else if (single_step_control->step_size == JDWP::SS_MIN) {
+      } else if (single_step_control->GetStepSize() == JDWP::SS_MIN) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new instruction";
       } else if (single_step_control->ContainsDexPc(dex_pc)) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS new line";
       }
-    } else if (single_step_control->step_depth == JDWP::SD_OVER) {
+    } else if (single_step_control->GetStepDepth() == JDWP::SD_OVER) {
       // Step over method calls.  We break when the line number is
       // different and the frame depth is <= the original frame
       // depth.  (We can't just compare on the method, because we
@@ -2944,13 +2941,13 @@
 
       int stack_depth = GetStackDepth(thread);
 
-      if (stack_depth < single_step_control->stack_depth) {
+      if (stack_depth < single_step_control->GetStackDepth()) {
         // Popped up one or more frames, always trigger.
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS method pop";
-      } else if (stack_depth == single_step_control->stack_depth) {
+      } else if (stack_depth == single_step_control->GetStackDepth()) {
         // Same depth, see if we moved.
-        if (single_step_control->step_size == JDWP::SS_MIN) {
+        if (single_step_control->GetStepSize() == JDWP::SS_MIN) {
           event_flags |= kSingleStep;
           VLOG(jdwp) << "SS new instruction";
         } else if (single_step_control->ContainsDexPc(dex_pc)) {
@@ -2959,7 +2956,7 @@
         }
       }
     } else {
-      CHECK_EQ(single_step_control->step_depth, JDWP::SD_OUT);
+      CHECK_EQ(single_step_control->GetStepDepth(), JDWP::SD_OUT);
       // Return from the current method.  We break when the frame
       // depth pops up.
 
@@ -2968,7 +2965,7 @@
       // function, rather than the end of the returning function.
 
       int stack_depth = GetStackDepth(thread);
-      if (stack_depth < single_step_control->stack_depth) {
+      if (stack_depth < single_step_control->GetStackDepth()) {
         event_flags |= kSingleStep;
         VLOG(jdwp) << "SS method pop";
       }
@@ -3446,20 +3443,11 @@
     return sts.GetError();
   }
 
-  //
-  // Work out what Method* we're in, the current line number, and how deep the stack currently
+  // Work out what ArtMethod* we're in, the current line number, and how deep the stack currently
   // is for step-out.
-  //
-
   struct SingleStepStackVisitor : public StackVisitor {
-    explicit SingleStepStackVisitor(Thread* thread, SingleStepControl* single_step_control,
-                                    int32_t* line_number)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        : StackVisitor(thread, nullptr), single_step_control_(single_step_control),
-          line_number_(line_number) {
-      DCHECK_EQ(single_step_control_, thread->GetSingleStepControl());
-      single_step_control_->method = nullptr;
-      single_step_control_->stack_depth = 0;
+    explicit SingleStepStackVisitor(Thread* thread) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        : StackVisitor(thread, nullptr), stack_depth(0), method(nullptr), line_number(-1) {
     }
 
     // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
@@ -3467,38 +3455,32 @@
     bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
       mirror::ArtMethod* m = GetMethod();
       if (!m->IsRuntimeMethod()) {
-        ++single_step_control_->stack_depth;
-        if (single_step_control_->method == nullptr) {
+        ++stack_depth;
+        if (method == nullptr) {
           mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
-          single_step_control_->method = m;
-          *line_number_ = -1;
+          method = m;
           if (dex_cache != nullptr) {
             const DexFile& dex_file = *dex_cache->GetDexFile();
-            *line_number_ = dex_file.GetLineNumFromPC(m, GetDexPc());
+            line_number = dex_file.GetLineNumFromPC(m, GetDexPc());
           }
         }
       }
       return true;
     }
 
-    SingleStepControl* const single_step_control_;
-    int32_t* const line_number_;
+    int stack_depth;
+    mirror::ArtMethod* method;
+    int32_t line_number;
   };
 
   Thread* const thread = sts.GetThread();
-  SingleStepControl* const single_step_control = thread->GetSingleStepControl();
-  DCHECK(single_step_control != nullptr);
-  int32_t line_number = -1;
-  SingleStepStackVisitor visitor(thread, single_step_control, &line_number);
+  SingleStepStackVisitor visitor(thread);
   visitor.WalkStack();
 
-  //
   // Find the dex_pc values that correspond to the current line, for line-based single-stepping.
-  //
-
   struct DebugCallbackContext {
-    explicit DebugCallbackContext(SingleStepControl* single_step_control_cb, int32_t line_number_cb,
-                                  const DexFile::CodeItem* code_item)
+    explicit DebugCallbackContext(SingleStepControl* single_step_control_cb,
+                                  int32_t line_number_cb, const DexFile::CodeItem* code_item)
       : single_step_control_(single_step_control_cb), line_number_(line_number_cb),
         code_item_(code_item), last_pc_valid(false), last_pc(0) {
     }
@@ -3516,7 +3498,7 @@
       } else if (context->last_pc_valid) {  // and the line number is new
         // Add everything from the last entry up until here to the set
         for (uint32_t dex_pc = context->last_pc; dex_pc < address; ++dex_pc) {
-          context->single_step_control_->dex_pcs.insert(dex_pc);
+          context->single_step_control_->AddDexPc(dex_pc);
         }
         context->last_pc_valid = false;
       }
@@ -3528,7 +3510,7 @@
       if (last_pc_valid) {
         size_t end = code_item_->insns_size_in_code_units_;
         for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
-          single_step_control_->dex_pcs.insert(dex_pc);
+          single_step_control_->AddDexPc(dex_pc);
         }
       }
     }
@@ -3539,8 +3521,14 @@
     bool last_pc_valid;
     uint32_t last_pc;
   };
-  single_step_control->dex_pcs.clear();
-  mirror::ArtMethod* m = single_step_control->method;
+
+  // Allocate single step.
+  SingleStepControl* single_step_control = new SingleStepControl(step_size, step_depth,
+                                                                 visitor.stack_depth,
+                                                                 visitor.method);
+  CHECK(single_step_control != nullptr) << "Failed to allocate SingleStepControl";
+  mirror::ArtMethod* m = single_step_control->GetMethod();
+  const int32_t line_number = visitor.line_number;
   if (!m->IsNative()) {
     const DexFile::CodeItem* const code_item = m->GetCodeItem();
     DebugCallbackContext context(single_step_control, line_number, code_item);
@@ -3548,23 +3536,18 @@
                                      DebugCallbackContext::Callback, nullptr, &context);
   }
 
-  //
-  // Everything else...
-  //
-
-  single_step_control->step_size = step_size;
-  single_step_control->step_depth = step_depth;
-  single_step_control->is_active = true;
+  // Activate single-step in the thread.
+  thread->ActivateSingleStepControl(single_step_control);
 
   if (VLOG_IS_ON(jdwp)) {
     VLOG(jdwp) << "Single-step thread: " << *thread;
-    VLOG(jdwp) << "Single-step step size: " << single_step_control->step_size;
-    VLOG(jdwp) << "Single-step step depth: " << single_step_control->step_depth;
-    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(single_step_control->method);
+    VLOG(jdwp) << "Single-step step size: " << single_step_control->GetStepSize();
+    VLOG(jdwp) << "Single-step step depth: " << single_step_control->GetStepDepth();
+    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(single_step_control->GetMethod());
     VLOG(jdwp) << "Single-step current line: " << line_number;
-    VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->stack_depth;
+    VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->GetStackDepth();
     VLOG(jdwp) << "Single-step dex_pc values:";
-    for (uint32_t dex_pc : single_step_control->dex_pcs) {
+    for (uint32_t dex_pc : single_step_control->GetDexPcs()) {
       VLOG(jdwp) << StringPrintf(" %#x", dex_pc);
     }
   }
@@ -3578,9 +3561,7 @@
   JDWP::JdwpError error;
   Thread* thread = DecodeThread(soa, thread_id, &error);
   if (error == JDWP::ERR_NONE) {
-    SingleStepControl* single_step_control = thread->GetSingleStepControl();
-    DCHECK(single_step_control != nullptr);
-    single_step_control->Clear();
+    thread->DeactivateSingleStepControl();
   }
 }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index e79e8e4..901d3e7 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -98,39 +98,58 @@
 };
 
 // Thread local data-structure that holds fields for controlling single-stepping.
-struct SingleStepControl {
-  SingleStepControl()
-      : is_active(false), step_size(JDWP::SS_MIN), step_depth(JDWP::SD_INTO),
-        method(nullptr), stack_depth(0) {
+class SingleStepControl {
+ public:
+  SingleStepControl(JDWP::JdwpStepSize step_size, JDWP::JdwpStepDepth step_depth,
+                    int stack_depth, mirror::ArtMethod* method)
+      : step_size_(step_size), step_depth_(step_depth),
+        stack_depth_(stack_depth), method_(method) {
   }
 
-  // Are we single-stepping right now?
-  bool is_active;
+  JDWP::JdwpStepSize GetStepSize() const {
+    return step_size_;
+  }
 
+  JDWP::JdwpStepDepth GetStepDepth() const {
+    return step_depth_;
+  }
+
+  int GetStackDepth() const {
+    return stack_depth_;
+  }
+
+  mirror::ArtMethod* GetMethod() const {
+    return method_;
+  }
+
+  const std::set<uint32_t>& GetDexPcs() const {
+    return dex_pcs_;
+  }
+
+  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void AddDexPc(uint32_t dex_pc);
+
+  bool ContainsDexPc(uint32_t dex_pc) const;
+
+ private:
   // See JdwpStepSize and JdwpStepDepth for details.
-  JDWP::JdwpStepSize step_size;
-  JDWP::JdwpStepDepth step_depth;
+  const JDWP::JdwpStepSize step_size_;
+  const JDWP::JdwpStepDepth step_depth_;
+
+  // The stack depth when this single-step was initiated. This is used to support SD_OVER and SD_OUT
+  // single-step depth.
+  const int stack_depth_;
 
   // The location this single-step was initiated from.
   // A single-step is initiated in a suspended thread. We save here the current method and the
   // set of DEX pcs associated to the source line number where the suspension occurred.
   // This is used to support SD_INTO and SD_OVER single-step depths so we detect when a single-step
   // causes the execution of an instruction in a different method or at a different line number.
-  mirror::ArtMethod* method;
-  std::set<uint32_t> dex_pcs;
+  mirror::ArtMethod* method_;
+  std::set<uint32_t> dex_pcs_;
 
-  // The stack depth when this single-step was initiated. This is used to support SD_OVER and SD_OUT
-  // single-step depth.
-  int stack_depth;
-
-  void VisitRoots(RootCallback* callback, void* arg, const RootInfo& root_info)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  bool ContainsDexPc(uint32_t dex_pc) const;
-
-  void Clear();
-
- private:
   DISALLOW_COPY_AND_ASSIGN(SingleStepControl);
 };
 
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 929a1d2..72734e9 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -25,15 +25,34 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "mem_map.h"
+#include "stack.h"
 #include "utils.h"
 
 namespace art {
 namespace gc {
 namespace accounting {
 
+// Internal representation is StackReference<T>, so this only works with mirror::Object or it's
+// subclasses.
 template <typename T>
 class AtomicStack {
  public:
+  class ObjectComparator {
+   public:
+    // These two comparators are for std::binary_search.
+    bool operator()(const T* a, const StackReference<T>& b) const NO_THREAD_SAFETY_ANALYSIS {
+      return a < b.AsMirrorPtr();
+    }
+    bool operator()(const StackReference<T>& a, const T* b) const NO_THREAD_SAFETY_ANALYSIS {
+      return a.AsMirrorPtr() < b;
+    }
+    // This comparator is for std::sort.
+    bool operator()(const StackReference<T>& a, const StackReference<T>& b) const
+        NO_THREAD_SAFETY_ANALYSIS {
+      return a.AsMirrorPtr() < b.AsMirrorPtr();
+    }
+  };
+
   // Capacity is how many elements we can store in the stack.
   static AtomicStack* Create(const std::string& name, size_t growth_limit, size_t capacity) {
     std::unique_ptr<AtomicStack> mark_stack(new AtomicStack(name, growth_limit, capacity));
@@ -45,7 +64,7 @@
 
   void Reset() {
     DCHECK(mem_map_.get() != nullptr);
-    DCHECK(begin_ != NULL);
+    DCHECK(begin_ != nullptr);
     front_index_.StoreRelaxed(0);
     back_index_.StoreRelaxed(0);
     debug_is_sorted_ = true;
@@ -55,18 +74,20 @@
   // Beware: Mixing atomic pushes and atomic pops will cause ABA problem.
 
   // Returns false if we overflowed the stack.
-  bool AtomicPushBackIgnoreGrowthLimit(const T& value) {
+  bool AtomicPushBackIgnoreGrowthLimit(T* value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AtomicPushBackInternal(value, capacity_);
   }
 
   // Returns false if we overflowed the stack.
-  bool AtomicPushBack(const T& value) {
+  bool AtomicPushBack(T* value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AtomicPushBackInternal(value, growth_limit_);
   }
 
   // Atomically bump the back index by the given number of
   // slots. Returns false if we overflowed the stack.
-  bool AtomicBumpBack(size_t num_slots, T** start_address, T** end_address) {
+  bool AtomicBumpBack(size_t num_slots, StackReference<T>** start_address,
+                      StackReference<T>** end_address)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
@@ -80,41 +101,41 @@
         return false;
       }
     } while (!back_index_.CompareExchangeWeakRelaxed(index, new_index));
-    *start_address = &begin_[index];
-    *end_address = &begin_[new_index];
+    *start_address = begin_ + index;
+    *end_address = begin_ + new_index;
     if (kIsDebugBuild) {
       // Sanity check that the memory is zero.
       for (int32_t i = index; i < new_index; ++i) {
-        DCHECK_EQ(begin_[i], static_cast<T>(0))
+        DCHECK_EQ(begin_[i].AsMirrorPtr(), static_cast<T*>(nullptr))
             << "i=" << i << " index=" << index << " new_index=" << new_index;
       }
     }
     return true;
   }
 
-  void AssertAllZero() {
+  void AssertAllZero() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kIsDebugBuild) {
       for (size_t i = 0; i < capacity_; ++i) {
-        DCHECK_EQ(begin_[i], static_cast<T>(0)) << "i=" << i;
+        DCHECK_EQ(begin_[i].AsMirrorPtr(), static_cast<T*>(nullptr)) << "i=" << i;
       }
     }
   }
 
-  void PushBack(const T& value) {
+  void PushBack(T* value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
-    int32_t index = back_index_.LoadRelaxed();
+    const int32_t index = back_index_.LoadRelaxed();
     DCHECK_LT(static_cast<size_t>(index), growth_limit_);
     back_index_.StoreRelaxed(index + 1);
-    begin_[index] = value;
+    begin_[index].Assign(value);
   }
 
-  T PopBack() {
+  T* PopBack() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_GT(back_index_.LoadRelaxed(), front_index_.LoadRelaxed());
     // Decrement the back index non atomically.
     back_index_.StoreRelaxed(back_index_.LoadRelaxed() - 1);
-    return begin_[back_index_.LoadRelaxed()];
+    return begin_[back_index_.LoadRelaxed()].AsMirrorPtr();
   }
 
   // Take an item from the front of the stack.
@@ -140,12 +161,11 @@
     return back_index_.LoadRelaxed() - front_index_.LoadRelaxed();
   }
 
-  T* Begin() const {
-    return const_cast<T*>(begin_ + front_index_.LoadRelaxed());
+  StackReference<T>* Begin() const {
+    return begin_ + front_index_.LoadRelaxed();
   }
-
-  T* End() const {
-    return const_cast<T*>(begin_ + back_index_.LoadRelaxed());
+  StackReference<T>* End() const {
+    return begin_ + back_index_.LoadRelaxed();
   }
 
   size_t Capacity() const {
@@ -162,7 +182,7 @@
   void Sort() {
     int32_t start_back_index = back_index_.LoadRelaxed();
     int32_t start_front_index = front_index_.LoadRelaxed();
-    std::sort(Begin(), End());
+    std::sort(Begin(), End(), ObjectComparator());
     CHECK_EQ(start_back_index, back_index_.LoadRelaxed());
     CHECK_EQ(start_front_index, front_index_.LoadRelaxed());
     if (kIsDebugBuild) {
@@ -170,13 +190,18 @@
     }
   }
 
-  bool ContainsSorted(const T& value) const {
+  bool ContainsSorted(const T* value) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(debug_is_sorted_);
-    return std::binary_search(Begin(), End(), value);
+    return std::binary_search(Begin(), End(), value, ObjectComparator());
   }
 
-  bool Contains(const T& value) const {
-    return std::find(Begin(), End(), value) != End();
+  bool Contains(const T* value) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    for (auto cur = Begin(), end = End(); cur != end; ++cur) {
+      if (cur->AsMirrorPtr() == value) {
+        return true;
+      }
+    }
+    return false;
   }
 
  private:
@@ -191,7 +216,8 @@
   }
 
   // Returns false if we overflowed the stack.
-  bool AtomicPushBackInternal(const T& value, size_t limit) ALWAYS_INLINE {
+  bool AtomicPushBackInternal(T* value, size_t limit) ALWAYS_INLINE
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
@@ -203,20 +229,20 @@
         return false;
       }
     } while (!back_index_.CompareExchangeWeakRelaxed(index, index + 1));
-    begin_[index] = value;
+    begin_[index].Assign(value);
     return true;
   }
 
   // Size in number of elements.
   void Init() {
     std::string error_msg;
-    mem_map_.reset(MemMap::MapAnonymous(name_.c_str(), NULL, capacity_ * sizeof(T),
+    mem_map_.reset(MemMap::MapAnonymous(name_.c_str(), NULL, capacity_ * sizeof(begin_[0]),
                                         PROT_READ | PROT_WRITE, false, &error_msg));
     CHECK(mem_map_.get() != NULL) << "couldn't allocate mark stack.\n" << error_msg;
     uint8_t* addr = mem_map_->Begin();
     CHECK(addr != NULL);
     debug_is_sorted_ = true;
-    begin_ = reinterpret_cast<T*>(addr);
+    begin_ = reinterpret_cast<StackReference<T>*>(addr);
     Reset();
   }
 
@@ -229,7 +255,7 @@
   // Front index, used for implementing PopFront.
   AtomicInteger front_index_;
   // Base of the atomic stack.
-  T* begin_;
+  StackReference<T>* begin_;
   // Current maximum which we can push back to, must be <= capacity_.
   size_t growth_limit_;
   // Maximum number of elements.
@@ -240,7 +266,7 @@
   DISALLOW_COPY_AND_ASSIGN(AtomicStack);
 };
 
-typedef AtomicStack<mirror::Object*> ObjectStack;
+typedef AtomicStack<mirror::Object> ObjectStack;
 
 }  // namespace accounting
 }  // namespace gc
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 5fa3c8b..754e217 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -607,9 +607,9 @@
   // The alloc stack.
   {
     ConcurrentCopyingVerifyNoFromSpaceRefsVisitor ref_visitor(this);
-    for (mirror::Object** it = heap_->allocation_stack_->Begin(),
-             **end = heap_->allocation_stack_->End(); it < end; ++it) {
-      mirror::Object* obj = *it;
+    for (auto* it = heap_->allocation_stack_->Begin(), *end = heap_->allocation_stack_->End();
+        it < end; ++it) {
+      mirror::Object* const obj = it->AsMirrorPtr();
       if (obj != nullptr && obj->GetClass() != nullptr) {
         // TODO: need to call this only if obj is alive?
         ref_visitor(obj);
@@ -845,14 +845,14 @@
   // Objects on the allocation stack?
   if (ReadBarrier::kEnableReadBarrierInvariantChecks || kIsDebugBuild) {
     size_t count = GetAllocationStack()->Size();
-    mirror::Object** it = GetAllocationStack()->Begin();
-    mirror::Object** end = GetAllocationStack()->End();
+    auto* it = GetAllocationStack()->Begin();
+    auto* end = GetAllocationStack()->End();
     for (size_t i = 0; i < count; ++i, ++it) {
-      CHECK(it < end);
-      mirror::Object* obj = *it;
+      CHECK_LT(it, end);
+      mirror::Object* obj = it->AsMirrorPtr();
       if (obj != nullptr) {
         // Must have been cleared above.
-        CHECK(obj->GetReadBarrierPointer() == ReadBarrier::WhitePtr()) << obj;
+        CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr()) << obj;
       }
     }
   }
@@ -1446,10 +1446,7 @@
 bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) {
   QuasiAtomic::ThreadFenceAcquire();
   accounting::ObjectStack* alloc_stack = GetAllocationStack();
-  mirror::Object** begin = alloc_stack->Begin();
-  // Important to read end once as it could be concurrently updated and screw up std::find().
-  mirror::Object** end = alloc_stack->End();
-  return std::find(begin, end, ref) != end;
+  return alloc_stack->Contains(ref);
 }
 
 mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) {
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 43f520a..d0e0446 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -233,7 +233,7 @@
   void SetFwdPtr(mirror::Object* from_ref, mirror::Object* to_ref)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FlipThreadRoots() LOCKS_EXCLUDED(Locks::mutator_lock_);;
-  void SwapStacks(Thread* self);
+  void SwapStacks(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void RecordLiveStackFreezeSize(Thread* self);
   void ComputeUnevacFromSpaceLiveRatio();
 
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 18af005..ff3c893 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -274,11 +274,11 @@
 }
 
 void MarkCompact::ResizeMarkStack(size_t new_size) {
-  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  std::vector<StackReference<Object>> temp(mark_stack_->Begin(), mark_stack_->End());
   CHECK_LE(mark_stack_->Size(), new_size);
   mark_stack_->Resize(new_size);
-  for (const auto& obj : temp) {
-    mark_stack_->PushBack(obj);
+  for (auto& obj : temp) {
+    mark_stack_->PushBack(obj.AsMirrorPtr());
   }
 }
 
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index f6d473d..06304bf 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -46,7 +46,7 @@
 
 namespace accounting {
   template <typename T> class AtomicStack;
-  typedef AtomicStack<mirror::Object*> ObjectStack;
+  typedef AtomicStack<mirror::Object> ObjectStack;
 }  // namespace accounting
 
 namespace space {
@@ -156,13 +156,13 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
-  void ResizeMarkStack(size_t new_size);
+  void ResizeMarkStack(size_t new_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if we should sweep the space.
   bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
   // Push an object onto the mark stack.
-  void MarkStackPush(mirror::Object* obj);
+  void MarkStackPush(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void UpdateAndMarkModUnion()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 04fb694..1959c09 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -330,11 +330,11 @@
     // Someone else acquired the lock and expanded the mark stack before us.
     return;
   }
-  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  std::vector<StackReference<Object>> temp(mark_stack_->Begin(), mark_stack_->End());
   CHECK_LE(mark_stack_->Size(), new_size);
   mark_stack_->Resize(new_size);
-  for (const auto& obj : temp) {
-    mark_stack_->PushBack(obj);
+  for (auto& obj : temp) {
+    mark_stack_->PushBack(obj.AsMirrorPtr());
   }
 }
 
@@ -554,7 +554,7 @@
 class MarkStackTask : public Task {
  public:
   MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size,
-                Object** mark_stack)
+                StackReference<Object>* mark_stack)
       : mark_sweep_(mark_sweep),
         thread_pool_(thread_pool),
         mark_stack_pos_(mark_stack_size) {
@@ -627,11 +627,11 @@
   MarkSweep* const mark_sweep_;
   ThreadPool* const thread_pool_;
   // Thread local mark stack for this task.
-  Object* mark_stack_[kMaxSize];
+  StackReference<Object> mark_stack_[kMaxSize];
   // Mark stack position.
   size_t mark_stack_pos_;
 
-  void MarkStackPush(Object* obj) ALWAYS_INLINE {
+  ALWAYS_INLINE void MarkStackPush(Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(mark_stack_pos_ == kMaxSize)) {
       // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task.
       mark_stack_pos_ /= 2;
@@ -641,7 +641,7 @@
     }
     DCHECK(obj != nullptr);
     DCHECK_LT(mark_stack_pos_, kMaxSize);
-    mark_stack_[mark_stack_pos_++] = obj;
+    mark_stack_[mark_stack_pos_++].Assign(obj);
   }
 
   virtual void Finalize() {
@@ -660,7 +660,7 @@
       Object* obj = nullptr;
       if (kUseMarkStackPrefetch) {
         while (mark_stack_pos_ != 0 && prefetch_fifo.size() < kFifoSize) {
-          Object* mark_stack_obj = mark_stack_[--mark_stack_pos_];
+          Object* const mark_stack_obj = mark_stack_[--mark_stack_pos_].AsMirrorPtr();
           DCHECK(mark_stack_obj != nullptr);
           __builtin_prefetch(mark_stack_obj);
           prefetch_fifo.push_back(mark_stack_obj);
@@ -674,7 +674,7 @@
         if (UNLIKELY(mark_stack_pos_ == 0)) {
           break;
         }
-        obj = mark_stack_[--mark_stack_pos_];
+        obj = mark_stack_[--mark_stack_pos_].AsMirrorPtr();
       }
       DCHECK(obj != nullptr);
       visitor(obj);
@@ -687,7 +687,7 @@
   CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep,
                accounting::ContinuousSpaceBitmap* bitmap,
                uint8_t* begin, uint8_t* end, uint8_t minimum_age, size_t mark_stack_size,
-               Object** mark_stack_obj, bool clear_card)
+               StackReference<Object>* mark_stack_obj, bool clear_card)
       : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj),
         bitmap_(bitmap),
         begin_(begin),
@@ -742,8 +742,8 @@
     TimingLogger::ScopedTiming t(paused ? "(Paused)ScanGrayObjects" : __FUNCTION__,
         GetTimings());
     // Try to take some of the mark stack since we can pass this off to the worker tasks.
-    Object** mark_stack_begin = mark_stack_->Begin();
-    Object** mark_stack_end = mark_stack_->End();
+    StackReference<Object>* mark_stack_begin = mark_stack_->Begin();
+    StackReference<Object>* mark_stack_end = mark_stack_->End();
     const size_t mark_stack_size = mark_stack_end - mark_stack_begin;
     // Estimated number of work tasks we will create.
     const size_t mark_stack_tasks = GetHeap()->GetContinuousSpaces().size() * thread_count;
@@ -954,9 +954,9 @@
 
 void MarkSweep::VerifyIsLive(const Object* obj) {
   if (!heap_->GetLiveBitmap()->Test(obj)) {
-    accounting::ObjectStack* allocation_stack = heap_->allocation_stack_.get();
-    CHECK(std::find(allocation_stack->Begin(), allocation_stack->End(), obj) !=
-        allocation_stack->End()) << "Found dead object " << obj << "\n" << heap_->DumpSpaces();
+    // TODO: Consider live stack? Has this code bitrotted?
+    CHECK(!heap_->allocation_stack_->Contains(obj))
+        << "Found dead object " << obj << "\n" << heap_->DumpSpaces();
   }
 }
 
@@ -1025,7 +1025,7 @@
   ObjectBytePair freed;
   ObjectBytePair freed_los;
   // How many objects are left in the array, modified after each space is swept.
-  Object** objects = allocations->Begin();
+  StackReference<Object>* objects = allocations->Begin();
   size_t count = allocations->Size();
   // Change the order to ensure that the non-moving space last swept as an optimization.
   std::vector<space::ContinuousSpace*> sweep_spaces;
@@ -1053,9 +1053,9 @@
     if (swap_bitmaps) {
       std::swap(live_bitmap, mark_bitmap);
     }
-    Object** out = objects;
+    StackReference<Object>* out = objects;
     for (size_t i = 0; i < count; ++i) {
-      Object* obj = objects[i];
+      Object* const obj = objects[i].AsMirrorPtr();
       if (kUseThreadLocalAllocationStack && obj == nullptr) {
         continue;
       }
@@ -1072,7 +1072,7 @@
           chunk_free_buffer[chunk_free_pos++] = obj;
         }
       } else {
-        *(out++) = obj;
+        (out++)->Assign(obj);
       }
     }
     if (chunk_free_pos > 0) {
@@ -1094,7 +1094,7 @@
       std::swap(large_live_objects, large_mark_objects);
     }
     for (size_t i = 0; i < count; ++i) {
-      Object* obj = objects[i];
+      Object* const obj = objects[i].AsMirrorPtr();
       // Handle large objects.
       if (kUseThreadLocalAllocationStack && obj == nullptr) {
         continue;
@@ -1195,7 +1195,7 @@
                                      static_cast<size_t>(MarkStackTask<false>::kMaxSize));
   CHECK_GT(chunk_size, 0U);
   // Split the current mark stack up into work tasks.
-  for (mirror::Object **it = mark_stack_->Begin(), **end = mark_stack_->End(); it < end; ) {
+  for (auto* it = mark_stack_->Begin(), *end = mark_stack_->End(); it < end; ) {
     const size_t delta = std::min(static_cast<size_t>(end - it), chunk_size);
     thread_pool->AddTask(self, new MarkStackTask<false>(thread_pool, this, delta, it));
     it += delta;
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index b787327..3f99e21 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -47,7 +47,7 @@
 
 namespace accounting {
   template<typename T> class AtomicStack;
-  typedef AtomicStack<mirror::Object*> ObjectStack;
+  typedef AtomicStack<mirror::Object> ObjectStack;
 }  // namespace accounting
 
 namespace collector {
@@ -136,7 +136,8 @@
 
   // Sweeps unmarked objects to complete the garbage collection. Virtual as by default it sweeps
   // all allocation spaces. Partial and sticky GCs want to just sweep a subset of the heap.
-  virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  virtual void Sweep(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Sweeps unmarked objects to complete the garbage collection.
   void SweepLargeObjects(bool swap_bitmaps) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
@@ -162,13 +163,14 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void VerifySystemWeaks()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Verify that an object is live, either in a live bitmap or in the allocation stack.
   void VerifyIsLive(const mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   static mirror::Object* MarkObjectCallback(mirror::Object* obj, void* arg)
@@ -223,11 +225,12 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   void MarkObjectNonNull(mirror::Object* obj)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Marks an object atomically, safe to use from multiple threads.
-  void MarkObjectNonNullParallel(mirror::Object* obj);
+  void MarkObjectNonNullParallel(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if we need to add obj to a mark stack.
   bool MarkObjectParallel(const mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
@@ -238,8 +241,10 @@
       NO_THREAD_SAFETY_ANALYSIS;
 
   // Expand mark stack to 2x its current size.
-  void ExpandMarkStack() EXCLUSIVE_LOCKS_REQUIRED(mark_stack_lock_);
-  void ResizeMarkStack(size_t new_size) EXCLUSIVE_LOCKS_REQUIRED(mark_stack_lock_);
+  void ExpandMarkStack() EXCLUSIVE_LOCKS_REQUIRED(mark_stack_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ResizeMarkStack(size_t new_size) EXCLUSIVE_LOCKS_REQUIRED(mark_stack_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns how many threads we should use for the current GC phase based on if we are paused,
   // whether or not we care about pauses.
@@ -250,7 +255,7 @@
   void VerifyRoot(const mirror::Object* root, const RootInfo& root_info) NO_THREAD_SAFETY_ANALYSIS;
 
   // Push a single reference on a mark stack.
-  void PushOnMarkStack(mirror::Object* obj);
+  void PushOnMarkStack(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Blackens objects grayed during a garbage collection.
   void ScanGrayObjects(bool paused, uint8_t minimum_age)
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index fcc601f..8660eff 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -421,11 +421,11 @@
 }
 
 void SemiSpace::ResizeMarkStack(size_t new_size) {
-  std::vector<Object*> temp(mark_stack_->Begin(), mark_stack_->End());
+  std::vector<StackReference<Object>> temp(mark_stack_->Begin(), mark_stack_->End());
   CHECK_LE(mark_stack_->Size(), new_size);
   mark_stack_->Resize(new_size);
-  for (const auto& obj : temp) {
-    mark_stack_->PushBack(obj);
+  for (auto& obj : temp) {
+    mark_stack_->PushBack(obj.AsMirrorPtr());
   }
 }
 
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index f8fced8..192fb14 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -45,7 +45,7 @@
 
 namespace accounting {
   template <typename T> class AtomicStack;
-  typedef AtomicStack<mirror::Object*> ObjectStack;
+  typedef AtomicStack<mirror::Object> ObjectStack;
 }  // namespace accounting
 
 namespace space {
@@ -178,13 +178,13 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Expand mark stack to 2x its current size.
-  void ResizeMarkStack(size_t new_size);
+  void ResizeMarkStack(size_t new_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if we should sweep the space.
   virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
   // Push an object onto the mark stack.
-  void MarkStackPush(mirror::Object* obj);
+  void MarkStackPush(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void UpdateAndMarkModUnion()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ba06e05..9225a09 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -787,9 +787,8 @@
     bump_pointer_space_->Walk(callback, arg);
   }
   // TODO: Switch to standard begin and end to use ranged a based loop.
-  for (mirror::Object** it = allocation_stack_->Begin(), **end = allocation_stack_->End();
-      it < end; ++it) {
-    mirror::Object* obj = *it;
+  for (auto* it = allocation_stack_->Begin(), *end = allocation_stack_->End(); it < end; ++it) {
+    mirror::Object* const obj = it->AsMirrorPtr();
     if (obj != nullptr && obj->GetClass() != nullptr) {
       // Avoid the race condition caused by the object not yet being written into the allocation
       // stack or the class not yet being written in the object. Or, if
@@ -2139,9 +2138,9 @@
                           accounting::ObjectStack* stack) {
   DCHECK(bitmap1 != nullptr);
   DCHECK(bitmap2 != nullptr);
-  mirror::Object** limit = stack->End();
-  for (mirror::Object** it = stack->Begin(); it != limit; ++it) {
-    const mirror::Object* obj = *it;
+  const auto* limit = stack->End();
+  for (auto* it = stack->Begin(); it != limit; ++it) {
+    const mirror::Object* obj = it->AsMirrorPtr();
     if (!kUseThreadLocalAllocationStack || obj != nullptr) {
       if (bitmap1->HasAddress(obj)) {
         bitmap1->Set(obj);
@@ -2538,8 +2537,8 @@
 void Heap::PushOnThreadLocalAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
   // Slow path, the allocation stack push back must have already failed.
   DCHECK(!self->PushOnThreadLocalAllocationStack(*obj));
-  mirror::Object** start_address;
-  mirror::Object** end_address;
+  StackReference<mirror::Object>* start_address;
+  StackReference<mirror::Object>* end_address;
   while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize, &start_address,
                                             &end_address)) {
     // TODO: Add handle VerifyObject.
@@ -2698,9 +2697,9 @@
   VerifyLiveStackReferences visitor(this);
   GetLiveBitmap()->Visit(visitor);
   // We can verify objects in the live stack since none of these should reference dead objects.
-  for (mirror::Object** it = live_stack_->Begin(); it != live_stack_->End(); ++it) {
-    if (!kUseThreadLocalAllocationStack || *it != nullptr) {
-      visitor(*it);
+  for (auto* it = live_stack_->Begin(); it != live_stack_->End(); ++it) {
+    if (!kUseThreadLocalAllocationStack || it->AsMirrorPtr() != nullptr) {
+      visitor(it->AsMirrorPtr());
     }
   }
   return !visitor.Failed();
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index b0b53b0..2a0b466 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -515,6 +515,7 @@
 
   // Mark and empty stack.
   void FlushAllocStack()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Revoke all the thread-local allocation stacks.
@@ -528,10 +529,12 @@
                       accounting::SpaceBitmap<kObjectAlignment>* bitmap2,
                       accounting::SpaceBitmap<kLargeObjectAlignment>* large_objects,
                       accounting::ObjectStack* stack)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Mark the specified allocation stack as live.
   void MarkAllocStackAsLive(accounting::ObjectStack* stack)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Unbind any bound bitmaps.
@@ -818,7 +821,7 @@
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Swap the allocation stack with the live stack.
-  void SwapStacks(Thread* self);
+  void SwapStacks(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Clear cards and update the mod union table.
   void ProcessCards(TimingLogger* timings, bool use_rem_sets);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 3165898..b771aa7 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -172,11 +172,10 @@
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-      uintptr_t ptr_val;
-      bool success = is_float ? GetFPR(reg, &ptr_val) : GetGPR(reg, &ptr_val);
-      if (!success) {
+      if (!IsAccessibleRegister(reg, is_float)) {
         return false;
       }
+      uintptr_t ptr_val = GetRegister(reg, is_float);
       bool target64 = Is64BitInstructionSet(kRuntimeISA);
       if (target64) {
         bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
@@ -194,11 +193,14 @@
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be NULL or how would we compile
                                                         // its instructions?
-      *val = *GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-                          frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      uint32_t* addr = GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
+                                   frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      DCHECK(addr != nullptr);
+      *val = *addr;
       return true;
     }
   } else {
+    DCHECK(cur_shadow_frame_ != nullptr);
     *val = cur_shadow_frame_->GetVReg(vreg);
     return true;
   }
@@ -228,12 +230,11 @@
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
       uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-      uintptr_t ptr_val_lo, ptr_val_hi;
-      bool success = is_float ? GetFPR(reg_lo, &ptr_val_lo) : GetGPR(reg_lo, &ptr_val_lo);
-      success &= is_float ? GetFPR(reg_hi, &ptr_val_hi) : GetGPR(reg_hi, &ptr_val_hi);
-      if (!success) {
+      if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
         return false;
       }
+      uintptr_t ptr_val_lo = GetRegister(reg_lo, is_float);
+      uintptr_t ptr_val_hi = GetRegister(reg_hi, is_float);
       bool target64 = Is64BitInstructionSet(kRuntimeISA);
       if (target64) {
         int64_t value_long_lo = static_cast<int64_t>(ptr_val_lo);
@@ -249,10 +250,12 @@
                                                         // its instructions?
       uint32_t* addr = GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                                    frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      DCHECK(addr != nullptr);
       *val = *reinterpret_cast<uint64_t*>(addr);
       return true;
     }
   } else {
+    DCHECK(cur_shadow_frame_ != nullptr);
     *val = cur_shadow_frame_->GetVRegLong(vreg);
     return true;
   }
@@ -273,17 +276,16 @@
       bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       const uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
+      if (!IsAccessibleRegister(reg, is_float)) {
+        return false;
+      }
       bool target64 = Is64BitInstructionSet(kRuntimeISA);
       // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
       if (target64) {
         bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
         bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
         if (wide_lo || wide_hi) {
-          uintptr_t old_reg_val;
-          bool success = is_float ? GetFPR(reg, &old_reg_val) : GetGPR(reg, &old_reg_val);
-          if (!success) {
-            return false;
-          }
+          uintptr_t old_reg_val = GetRegister(reg, is_float);
           uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
           uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
           uint64_t mask = 0xffffffff;
@@ -295,21 +297,20 @@
           new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
         }
       }
-      if (is_float) {
-        return SetFPR(reg, new_value);
-      } else {
-        return SetGPR(reg, new_value);
-      }
+      SetRegister(reg, new_value, is_float);
+      return true;
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be NULL or how would we compile
                                                         // its instructions?
       uint32_t* addr = GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                                    frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      DCHECK(addr != nullptr);
       *addr = new_value;
       return true;
     }
   } else {
+    DCHECK(cur_shadow_frame_ != nullptr);
     cur_shadow_frame_->SetVReg(vreg, new_value);
     return true;
   }
@@ -339,17 +340,16 @@
       uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
       uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
       uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
+      if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
+        return false;
+      }
       uintptr_t new_value_lo = static_cast<uintptr_t>(new_value & 0xFFFFFFFF);
       uintptr_t new_value_hi = static_cast<uintptr_t>(new_value >> 32);
       bool target64 = Is64BitInstructionSet(kRuntimeISA);
       // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
       if (target64) {
-        uintptr_t old_reg_val_lo, old_reg_val_hi;
-        bool success = is_float ? GetFPR(reg_lo, &old_reg_val_lo) : GetGPR(reg_lo, &old_reg_val_lo);
-        success &= is_float ? GetFPR(reg_hi, &old_reg_val_hi) : GetGPR(reg_hi, &old_reg_val_hi);
-        if (!success) {
-          return false;
-        }
+        uintptr_t old_reg_val_lo = GetRegister(reg_lo, is_float);
+        uintptr_t old_reg_val_hi = GetRegister(reg_hi, is_float);
         uint64_t new_vreg_portion_lo = static_cast<uint64_t>(new_value_lo);
         uint64_t new_vreg_portion_hi = static_cast<uint64_t>(new_value_hi) << 32;
         uint64_t old_reg_val_lo_as_wide = static_cast<uint64_t>(old_reg_val_lo);
@@ -359,47 +359,64 @@
         new_value_lo = static_cast<uintptr_t>((old_reg_val_lo_as_wide & mask_lo) | new_vreg_portion_lo);
         new_value_hi = static_cast<uintptr_t>((old_reg_val_hi_as_wide & mask_hi) | new_vreg_portion_hi);
       }
-      bool success = is_float ? SetFPR(reg_lo, new_value_lo) : SetGPR(reg_lo, new_value_lo);
-      success &= is_float ? SetFPR(reg_hi, new_value_hi) : SetGPR(reg_hi, new_value_hi);
-      return success;
+      SetRegister(reg_lo, new_value_lo, is_float);
+      SetRegister(reg_hi, new_value_hi, is_float);
+      return true;
     } else {
       const DexFile::CodeItem* code_item = m->GetCodeItem();
       DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be NULL or how would we compile
                                                         // its instructions?
       uint32_t* addr = GetVRegAddr(cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
                                    frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
+      DCHECK(addr != nullptr);
       *reinterpret_cast<uint64_t*>(addr) = new_value;
       return true;
     }
   } else {
+    DCHECK(cur_shadow_frame_ != nullptr);
     cur_shadow_frame_->SetVRegLong(vreg, new_value);
     return true;
   }
 }
 
+bool StackVisitor::IsAccessibleGPR(uint32_t reg) const {
+  DCHECK(context_ != nullptr);
+  return context_->IsAccessibleGPR(reg);
+}
+
 uintptr_t* StackVisitor::GetGPRAddress(uint32_t reg) const {
-  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
+  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
+  DCHECK(context_ != nullptr);
   return context_->GetGPRAddress(reg);
 }
 
-bool StackVisitor::GetGPR(uint32_t reg, uintptr_t* val) const {
-  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->GetGPR(reg, val);
+uintptr_t StackVisitor::GetGPR(uint32_t reg) const {
+  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
+  DCHECK(context_ != nullptr);
+  return context_->GetGPR(reg);
 }
 
-bool StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->SetGPR(reg, value);
+void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
+  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
+  DCHECK(context_ != nullptr);
+  context_->SetGPR(reg, value);
 }
 
-bool StackVisitor::GetFPR(uint32_t reg, uintptr_t* val) const {
-  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->GetFPR(reg, val);
+bool StackVisitor::IsAccessibleFPR(uint32_t reg) const {
+  DCHECK(context_ != nullptr);
+  return context_->IsAccessibleFPR(reg);
 }
 
-bool StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != NULL) << "This is a quick frame routine";
-  return context_->SetFPR(reg, value);
+uintptr_t StackVisitor::GetFPR(uint32_t reg) const {
+  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
+  DCHECK(context_ != nullptr);
+  return context_->GetFPR(reg);
+}
+
+void StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
+  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
+  DCHECK(context_ != nullptr);
+  context_->SetFPR(reg, value);
 }
 
 uintptr_t StackVisitor::GetReturnPc() const {
diff --git a/runtime/stack.h b/runtime/stack.h
index b2b2072..5a86ca1 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -649,10 +649,29 @@
   StackVisitor(Thread* thread, Context* context, size_t num_frames)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool GetGPR(uint32_t reg, uintptr_t* val) const;
-  bool SetGPR(uint32_t reg, uintptr_t value);
-  bool GetFPR(uint32_t reg, uintptr_t* val) const;
-  bool SetFPR(uint32_t reg, uintptr_t value);
+  bool IsAccessibleRegister(uint32_t reg, bool is_float) const {
+    return is_float ? IsAccessibleFPR(reg) : IsAccessibleGPR(reg);
+  }
+  uintptr_t GetRegister(uint32_t reg, bool is_float) const {
+    DCHECK(IsAccessibleRegister(reg, is_float));
+    return is_float ? GetFPR(reg) : GetGPR(reg);
+  }
+  void SetRegister(uint32_t reg, uintptr_t value, bool is_float) {
+    DCHECK(IsAccessibleRegister(reg, is_float));
+    if (is_float) {
+      SetFPR(reg, value);
+    } else {
+      SetGPR(reg, value);
+    }
+  }
+
+  bool IsAccessibleGPR(uint32_t reg) const;
+  uintptr_t GetGPR(uint32_t reg) const;
+  void SetGPR(uint32_t reg, uintptr_t value);
+
+  bool IsAccessibleFPR(uint32_t reg) const;
+  uintptr_t GetFPR(uint32_t reg) const;
+  void SetFPR(uint32_t reg, uintptr_t value);
 
   void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index a85d608..16add79 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -213,22 +213,23 @@
   if (tlsPtr_.thread_local_alloc_stack_top < tlsPtr_.thread_local_alloc_stack_end) {
     // There's room.
     DCHECK_LE(reinterpret_cast<uint8_t*>(tlsPtr_.thread_local_alloc_stack_top) +
-                  sizeof(mirror::Object*),
+              sizeof(StackReference<mirror::Object>),
               reinterpret_cast<uint8_t*>(tlsPtr_.thread_local_alloc_stack_end));
-    DCHECK(*tlsPtr_.thread_local_alloc_stack_top == nullptr);
-    *tlsPtr_.thread_local_alloc_stack_top = obj;
+    DCHECK(tlsPtr_.thread_local_alloc_stack_top->AsMirrorPtr() == nullptr);
+    tlsPtr_.thread_local_alloc_stack_top->Assign(obj);
     ++tlsPtr_.thread_local_alloc_stack_top;
     return true;
   }
   return false;
 }
 
-inline void Thread::SetThreadLocalAllocationStack(mirror::Object** start, mirror::Object** end) {
+inline void Thread::SetThreadLocalAllocationStack(StackReference<mirror::Object>* start,
+                                                  StackReference<mirror::Object>* end) {
   DCHECK(Thread::Current() == this) << "Should be called by self";
   DCHECK(start != nullptr);
   DCHECK(end != nullptr);
-  DCHECK_ALIGNED(start, sizeof(mirror::Object*));
-  DCHECK_ALIGNED(end, sizeof(mirror::Object*));
+  DCHECK_ALIGNED(start, sizeof(StackReference<mirror::Object>));
+  DCHECK_ALIGNED(end, sizeof(StackReference<mirror::Object>));
   DCHECK_LT(start, end);
   tlsPtr_.thread_local_alloc_stack_end = end;
   tlsPtr_.thread_local_alloc_stack_top = start;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 17dfd8c..16edab3 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1149,7 +1149,7 @@
   wait_mutex_ = new Mutex("a thread wait mutex");
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.debug_invoke_req = new DebugInvokeReq;
-  tlsPtr_.single_step_control = new SingleStepControl;
+  tlsPtr_.single_step_control = nullptr;
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
   tlsPtr_.name = new std::string(kThreadNameDuringStartup);
   tlsPtr_.nested_signal_state = static_cast<jmp_buf*>(malloc(sizeof(jmp_buf)));
@@ -1302,7 +1302,9 @@
   }
 
   delete tlsPtr_.debug_invoke_req;
-  delete tlsPtr_.single_step_control;
+  if (tlsPtr_.single_step_control != nullptr) {
+    delete tlsPtr_.single_step_control;
+  }
   delete tlsPtr_.instrumentation_stack;
   delete tlsPtr_.name;
   delete tlsPtr_.stack_trace_sample;
@@ -2420,5 +2422,19 @@
   return mprotect(pregion, kStackOverflowProtectedSize, PROT_READ|PROT_WRITE) == 0;
 }
 
+void Thread::ActivateSingleStepControl(SingleStepControl* ssc) {
+  CHECK(Dbg::IsDebuggerActive());
+  CHECK(GetSingleStepControl() == nullptr) << "Single step already active in thread " << *this;
+  CHECK(ssc != nullptr);
+  tlsPtr_.single_step_control = ssc;
+}
+
+void Thread::DeactivateSingleStepControl() {
+  CHECK(Dbg::IsDebuggerActive());
+  CHECK(GetSingleStepControl() != nullptr) << "Single step not active in thread " << *this;
+  SingleStepControl* ssc = GetSingleStepControl();
+  tlsPtr_.single_step_control = nullptr;
+  delete ssc;
+}
 
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index 7db9ba5..26b7b6f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -75,7 +75,7 @@
 class Runtime;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
-struct SingleStepControl;
+class SingleStepControl;
 class Thread;
 class ThreadList;
 
@@ -709,6 +709,15 @@
     return tlsPtr_.single_step_control;
   }
 
+  // Activates single step control for debugging. The thread takes the
+  // ownership of the given SingleStepControl*. It is deleted by a call
+  // to DeactivateSingleStepControl or upon thread destruction.
+  void ActivateSingleStepControl(SingleStepControl* ssc);
+
+  // Deactivates single step control for debugging.
+  void DeactivateSingleStepControl();
+
+
   // Returns the fake exception used to activate deoptimization.
   static mirror::Throwable* GetDeoptimizationException() {
     return reinterpret_cast<mirror::Throwable*>(-1);
@@ -807,10 +816,12 @@
 
 
   // Push an object onto the allocation stack.
-  bool PushOnThreadLocalAllocationStack(mirror::Object* obj);
+  bool PushOnThreadLocalAllocationStack(mirror::Object* obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Set the thread local allocation pointers to the given pointers.
-  void SetThreadLocalAllocationStack(mirror::Object** start, mirror::Object** end);
+  void SetThreadLocalAllocationStack(StackReference<mirror::Object>* start,
+                                     StackReference<mirror::Object>* end);
 
   // Resets the thread local allocation pointers.
   void RevokeThreadLocalAllocationStack();
@@ -1149,8 +1160,8 @@
     void* rosalloc_runs[kNumRosAllocThreadLocalSizeBrackets];
 
     // Thread-local allocation stack data/routines.
-    mirror::Object** thread_local_alloc_stack_top;
-    mirror::Object** thread_local_alloc_stack_end;
+    StackReference<mirror::Object>* thread_local_alloc_stack_top;
+    StackReference<mirror::Object>* thread_local_alloc_stack_end;
 
     // Support for Mutex lock hierarchy bug detection.
     BaseMutex* held_mutexes[kLockLevelCount];
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 908cfbd2..8347b7e 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1228,8 +1228,9 @@
     if (!it->map) {
       os << StringPrintf("%08" PRIxPTR "  ???", it->pc);
     } else {
-      os << StringPrintf("%08" PRIxPTR "  ", it->pc - it->map->start)
-         << it->map->name << " (";
+      os << StringPrintf("%08" PRIxPTR "  ", it->pc - it->map->start);
+      os << it->map->name;
+      os << " (";
       if (!it->func_name.empty()) {
         os << it->func_name;
         if (it->func_offset != 0) {
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index a737ccd..5561a09 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -130,7 +130,11 @@
     Assert.assertEquals('N', testStr.charAt(0));
     Assert.assertEquals('o', testStr.charAt(1));
     Assert.assertEquals(' ', testStr.charAt(10));
-    Assert.assertEquals('e', testStr.charAt(testStr.length()-1));
+    Assert.assertEquals('e', testStr.charAt(14));  // 14 = testStr.length()-1 as a constant.
+    Assert.assertEquals('N', test_String_charAt_inner(testStr, 0));
+    Assert.assertEquals('o', test_String_charAt_inner(testStr, 1));
+    Assert.assertEquals(' ', test_String_charAt_inner(testStr, 10));
+    Assert.assertEquals('e', test_String_charAt_inner(testStr, testStr.length()-1));
 
     test_String_charAtExc();
     test_String_charAtExc2();
@@ -148,6 +152,33 @@
       Assert.fail();
     } catch (StringIndexOutOfBoundsException expected) {
     }
+    try {
+      testStr.charAt(15);  // 15 = "Now is the time".length()
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
+    try {
+      test_String_charAt_inner(testStr, -1);
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
+    try {
+      test_String_charAt_inner(testStr, 80);
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
+    try {
+      test_String_charAt_inner(testStr, 15);  // 15 = "Now is the time".length()
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
+
+    String strEmpty = "";
+    try {
+      strEmpty.charAt(0);
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
 
     String strNull = null;
     try {
@@ -157,6 +188,11 @@
     }
   }
 
+  private static char test_String_charAt_inner(String s, int index) {
+    // Using non-constant index here (assuming that this method wasn't inlined).
+    return s.charAt(index);
+  }
+
   private static void test_String_charAtExc2() {
     try {
       test_String_charAtExc3();
diff --git a/test/114-ParallelGC/src/Main.java b/test/114-ParallelGC/src/Main.java
index df2243c..46029cf 100644
--- a/test/114-ParallelGC/src/Main.java
+++ b/test/114-ParallelGC/src/Main.java
@@ -53,13 +53,17 @@
         }
 
         // Allocate objects to definitely run GC before quitting.
+        ArrayList<Object> l = new ArrayList<Object>();
         try {
-            ArrayList<Object> l = new ArrayList<Object>();
             for (int i = 0; i < 100000; i++) {
                 l.add(new ArrayList<Object>(i));
             }
         } catch (OutOfMemoryError oom) {
         }
+        // Make the (outer) ArrayList unreachable. Note it may still
+        // be reachable under an interpreter or a compiler without a
+        // liveness analysis.
+        l = null;
         new ArrayList<Object>(50);
     }