Merge "Fix some bugs in graph construction/simplification methods."
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb48cc3..f0b4787 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -48,10 +48,16 @@
   kArg1,
   kArg2,
   kArg3,
+  kArg4,
+  kArg5,
   kFArg0,
   kFArg1,
   kFArg2,
   kFArg3,
+  kFArg4,
+  kFArg5,
+  kFArg6,
+  kFArg7,
   kRet0,
   kRet1,
   kInvokeTgt,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 58d2ed2..547c0f6 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -707,7 +707,7 @@
 //         which has problems with long, float, double
 constexpr char arm64_supported_types[] = "ZBSCILVJFD";
 // (x84_64) We still have troubles with compiling longs/doubles/floats
-constexpr char x86_64_supported_types[] = "ZBSCILV";
+constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
 
 // TODO: Remove this when we are able to compile everything.
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
@@ -718,7 +718,7 @@
   // 1 is for the return type. Currently, we only accept 2 parameters at the most.
   // (x86_64): For now we have the same limitation. But we might want to split this
   //           check in future into two separate cases for arm64 and x86_64.
-  if (shorty_size > (1 + 2)) {
+  if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
     return false;
   }
 
@@ -907,7 +907,7 @@
     cu.mir_graph->EnableOpcodeCounting();
   }
 
-  // Check early if we should skip this compilation if using the profiled filter.
+  // Check early if we should skip this compilation if the profiler is enabled.
   if (cu.compiler_driver->ProfilePresent()) {
     std::string methodname = PrettyMethod(method_idx, dex_file);
     if (cu.mir_graph->SkipCompilation(methodname)) {
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 7129f8a..2ec17de 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -1015,7 +1015,7 @@
     return true;
   }
 
-  if (!compiler_options.IsCompilationEnabled() || compiler_filter == CompilerOptions::kProfiled) {
+  if (!compiler_options.IsCompilationEnabled()) {
     return true;
   }
 
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 69c394f..29c353a 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -188,7 +188,9 @@
   }
 
   void PerformClassInitCheckElimination() {
+    cu_.mir_graph->SSATransformationStart();
     cu_.mir_graph->ComputeDFSOrders();
+    cu_.mir_graph->SSATransformationEnd();
     bool gate_result = cu_.mir_graph->EliminateClassInitChecksGate();
     ASSERT_TRUE(gate_result);
     RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get());
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 9f9e618..db0731f 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -19,6 +19,7 @@
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index bd9c8b4..3b30cde 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -113,6 +113,7 @@
     case kHiddenArg: res_reg = rs_r12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index d0f8e74..b80938a 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -19,6 +19,7 @@
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 
 namespace art {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index b287399..ce95286 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -127,6 +127,7 @@
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 62c81d0..69ca715 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1959,7 +1959,7 @@
 
   switch (opcode) {
     case Instruction::NOT_LONG:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenNotLong(rl_dest, rl_src2);
         return;
       }
@@ -2009,7 +2009,7 @@
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
         return;
       }
@@ -2020,7 +2020,7 @@
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
         return;
       }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 842533b..ee68fe2 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -290,26 +290,51 @@
         }
         LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
-        RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        RegStorage r_tmp;
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        }
         LoadValueDirectWideFixed(arg1, r_tmp);
       }
     }
   } else {
     RegStorage r_tmp;
     if (arg0.fp) {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      }
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      }
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
     if (arg1.wide == 0) {
-      LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      if (cu_->instruction_set == kX86_64) {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      }
     } else {
       RegStorage r_tmp;
       if (arg1.fp) {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        }
       } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        }
       }
       LoadValueDirectWideFixed(arg1, r_tmp);
     }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 2c8b9b9..6ef7934 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -391,24 +391,34 @@
   return loc;
 }
 
-// FIXME: will need an update for 64-bit core regs.
 RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
   DCHECK(loc.wide);
   DCHECK(loc.location == kLocPhysReg);
   DCHECK(!loc.reg.IsFloat());
-  if (IsTemp(loc.reg.GetLow())) {
-    Clobber(loc.reg.GetLow());
+
+  if (!loc.reg.IsPair()) {
+    if (IsTemp(loc.reg)) {
+      Clobber(loc.reg);
+    } else {
+      RegStorage temp = AllocTempWide();
+      OpRegCopy(temp, loc.reg);
+      loc.reg = temp;
+    }
   } else {
-    RegStorage temp_low = AllocTemp();
-    OpRegCopy(temp_low, loc.reg.GetLow());
-    loc.reg.SetLowReg(temp_low.GetReg());
-  }
-  if (IsTemp(loc.reg.GetHigh())) {
-    Clobber(loc.reg.GetHigh());
-  } else {
-    RegStorage temp_high = AllocTemp();
-    OpRegCopy(temp_high, loc.reg.GetHigh());
-    loc.reg.SetHighReg(temp_high.GetReg());
+    if (IsTemp(loc.reg.GetLow())) {
+      Clobber(loc.reg.GetLow());
+    } else {
+      RegStorage temp_low = AllocTemp();
+      OpRegCopy(temp_low, loc.reg.GetLow());
+      loc.reg.SetLowReg(temp_low.GetReg());
+    }
+    if (IsTemp(loc.reg.GetHigh())) {
+      Clobber(loc.reg.GetHigh());
+    } else {
+      RegStorage temp_high = AllocTemp();
+      OpRegCopy(temp_high, loc.reg.GetHigh());
+      loc.reg.SetHighReg(temp_high.GetReg());
+    }
   }
 
   // Ensure that this doesn't represent the original SR any more.
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index e1bdb2e..c734202 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -19,6 +19,7 @@
 #include "codegen_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mips_lir.h"
 
 namespace art {
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c1a7c99..381c7ce 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -98,6 +98,7 @@
     case kHiddenArg: res_reg = rs_rT0; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = rs_rMIPS_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f12b6f..a85be5e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -68,20 +68,51 @@
 
 // TODO: needs revisit for 64-bit.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
+  if (cu_->instruction_set == kX86_64) {
+    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+    if (!reg_arg.Valid()) {
+      RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+      LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
+      return new_reg;
+    } else {
+      // Check if we need to copy the arg to a different reg_class.
+      if (!RegClassMatches(reg_class, reg_arg)) {
+        if (wide) {
+          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+          OpRegCopyWide(new_reg, reg_arg);
+          reg_arg = new_reg;
+        } else {
+          RegStorage new_reg = AllocTypedTemp(false, reg_class);
+          OpRegCopy(new_reg, reg_arg);
+          reg_arg = new_reg;
+        }
+      }
+    }
+    return reg_arg;
+  }
+
+  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
+      RegStorage::InvalidReg();
+
   // If the VR is wide and there is no register for high part, we need to load it.
   if (wide && !reg_arg_high.Valid()) {
     // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
@@ -129,15 +160,22 @@
 
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
   if (!rl_dest.wide) {
     RegStorage reg = GetArgMappingToPhysicalReg(in_position);
     if (reg.Valid()) {
@@ -146,6 +184,16 @@
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
+    if (cu_->instruction_set == kX86_64) {
+      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
+      if (reg.Valid()) {
+        OpRegCopy(rl_dest.reg, reg);
+      } else {
+        LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
+      }
+      return;
+    }
+
     RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
     RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ed94a8d..9718acd 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -910,13 +910,13 @@
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
                              uint32_t vtable_idx,
                              uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
                              bool skip_this);
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
                            uint32_t vtable_idx,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 39a0365..c383296 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -317,6 +317,7 @@
 #undef UNARY_ENCODING_MAP
 
   { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
+  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0, 0, 0, 0, 0 }, "Cqo", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
@@ -326,6 +327,11 @@
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
 #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -341,8 +347,12 @@
 
   EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd,  0xF2, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss,  0xF3, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
@@ -428,10 +438,19 @@
   { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0),
+  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxRR", "!0r,!1r" },
+  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxMR", "[!0r+!1d],!2r" },
+  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
   { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
   { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
+  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRR", "!0r,!1r" },
+  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRM", "!0r,[!1r+!2d]" },
+  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
   { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
   { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
   { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
@@ -442,6 +461,7 @@
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul64,  REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
   { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
   { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
@@ -507,7 +527,7 @@
   }
   if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
     // BP requires an explicit displacement, even when it's 0.
-    if (entry->opcode != kX86Lea32RA) {
+    if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
       DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
     }
     size += IS_SIMM8(displacement) ? 1 : 4;
@@ -676,7 +696,7 @@
     case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0,
+          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], 0, 0,
                       lir->operands[0], NO_REG, false) -
           // shorter ax encoding
           (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
@@ -1408,8 +1428,8 @@
   DCHECK_LT(RegStorage::RegNum(reg), 8);
   code_buffer_.push_back(0x58 + RegStorage::RegNum(reg));  // pop reg
 
-  EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg),
-             offset + 5 /* size of call +0 */);
+  EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+             RegStorage::RegNum(reg), offset + 5 /* size of call +0 */);
 }
 
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index f363eb3..f5fce34 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -18,6 +18,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "gc/accounting/card_table.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -85,11 +86,19 @@
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    rl_method = LoadValue(rl_method, kCoreReg);
+    if (rl_method.wide) {
+      rl_method = LoadValueWide(rl_method, kCoreReg);
+    } else {
+      rl_method = LoadValue(rl_method, kCoreReg);
+    }
     start_of_method_reg = rl_method.reg;
     store_method_addr_used_ = true;
   } else {
-    start_of_method_reg = AllocTemp();
+    if (Gen64Bit()) {
+      start_of_method_reg = AllocTempWide();
+    } else {
+      start_of_method_reg = AllocTemp();
+    }
     NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
   }
   int low_key = s4FromSwitchData(&table[2]);
@@ -107,9 +116,14 @@
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec));
+  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+          2, WrapPointer(tab_rec));
   // Add displacement to start of method
-  OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  if (Gen64Bit()) {
+    NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg());
+  } else {
+    OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  }
   // ..and go!
   LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
   tab_rec->anchor = switch_branch;
@@ -149,13 +163,18 @@
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    LoadValueDirect(rl_method, rs_rX86_ARG2);
+    if (rl_method.wide) {
+      LoadValueDirectWide(rl_method, rs_rX86_ARG2);
+    } else {
+      LoadValueDirect(rl_method, rs_rX86_ARG2);
+    }
     store_method_addr_used_ = true;
   } else {
+    // TODO(64) force to be 64-bit
     NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg());
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
-  NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
+  NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
   if (Is64BitInstructionSet(cu_->instruction_set)) {
     CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
                             rs_rX86_ARG1, true);
@@ -263,9 +282,10 @@
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
     }
     LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch,
-                                                 frame_size_ -
-                                                 GetInstructionSetPointerSize(cu_->instruction_set)));
+    AddSlowPath(
+        new(arena_)StackOverflowSlowPath(this, branch,
+                                         frame_size_ -
+                                         GetInstructionSetPointerSize(cu_->instruction_set)));
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -275,7 +295,7 @@
     setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg());
     int displacement = SRegOffset(base_of_code_->s_reg_low);
     // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0);
+    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32);
   }
 
   FreeTemp(rs_rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 648c148..38d60d2 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -20,9 +20,43 @@
 #include "dex/compiler_internals.h"
 #include "x86_lir.h"
 
+#include <map>
+
 namespace art {
 
 class X86Mir2Lir : public Mir2Lir {
+  protected:
+    class InToRegStorageMapper {
+      public:
+        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+        virtual ~InToRegStorageMapper() {}
+    };
+
+    class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+      public:
+        InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+        virtual ~InToRegStorageX86_64Mapper() {}
+        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+      private:
+        int cur_core_reg_;
+        int cur_fp_reg_;
+    };
+
+    class InToRegStorageMapping {
+      public:
+        InToRegStorageMapping() : initialized_(false) {}
+        void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+        int GetMaxMappedIn() { return max_mapped_in_; }
+        bool IsThereStackMapped() { return is_there_stack_mapped_; }
+        RegStorage Get(int in_position);
+        bool IsInitialized() { return initialized_; }
+      private:
+        std::map<int, RegStorage> mapping_;
+        int max_mapped_in_;
+        bool is_there_stack_mapped_;
+        bool initialized_;
+    };
+
   public:
     X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
@@ -56,6 +90,7 @@
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -151,22 +186,25 @@
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
 
     /*
      * @brief Generate a two address long operation with a constant value
      * @param rl_dest location of result
      * @param rl_src constant source operand
      * @param op Opcode to be generated
+     * @return success or not
      */
-    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
     /*
      * @brief Generate a three address long operation with a constant value
      * @param rl_dest location of result
      * @param rl_src1 source operand
      * @param rl_src2 constant source operand
      * @param op Opcode to be generated
+     * @return success or not
      */
-    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+    bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                         Instruction::Code op);
 
     /**
@@ -222,6 +260,9 @@
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
 
+    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                        RegLocation rl_src1, RegLocation rl_shift);
+
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
@@ -306,6 +347,22 @@
      */
     void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
 
+    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+
+    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                             NextCallInsn next_call_insn,
+                             const MethodReference& target_method,
+                             uint32_t vtable_idx,
+                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                             bool skip_this);
+
+    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this);
+
     /*
      * @brief Generate a relative call to the method that will be patched at link time.
      * @param target_method The MethodReference of the method to be invoked.
@@ -794,6 +851,8 @@
      * @param mir A kMirOpConst128b MIR instruction to match.
      */
     LIR *AddVectorLiteral(MIR *mir);
+
+    InToRegStorageMapping in_to_reg_storage_mapping_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 0421a59..c3580f7 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -272,21 +272,67 @@
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2sdRR;
+        break;
+      }
       GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2ssRR;
+       break;
+      }
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValue(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempSingle();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
       }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValueWide(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempDouble();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
       }
@@ -434,9 +480,14 @@
 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
-  OpRegCopy(rl_result.reg, rl_src.reg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  if (Gen64Bit()) {
+    LoadConstantWide(rl_result.reg, 0x8000000000000000);
+    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
+  } else {
+    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+  }
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 1cc16b9..d214b8d 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -31,6 +31,23 @@
  */
 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
+  if (Gen64Bit()) {
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(kOpXor, rl_result.reg, rl_result.reg);  // result = 0
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe);  // result = (src1 != src2) ? 1 : result
+    RegStorage temp_reg = AllocTemp();
+    OpRegReg(kOpNeg, temp_reg, rl_result.reg);
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    // result = (src1 < src2) ? -result : result
+    OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg);
+    StoreValue(rl_dest, rl_result);
+    FreeTemp(temp_reg);
+    return;
+  }
+
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
@@ -108,7 +125,7 @@
   }
   if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
-  LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR,
+  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
                     r_dest.GetReg(), r_src.GetReg());
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
@@ -133,36 +150,51 @@
       } else {
         // TODO: Prevent this from happening in the code. The result is often
         // unused or could have been loaded more easily from memory.
-        NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
-        RegStorage r_tmp = AllocTempDouble();
-        NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
-        NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
-        FreeTemp(r_tmp);
+        if (!r_src.IsPair()) {
+          DCHECK(!r_dest.IsPair());
+          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
+        } else {
+          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
+          RegStorage r_tmp = AllocTempDouble();
+          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
+          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
+          FreeTemp(r_tmp);
+        }
       }
     } else {
       if (src_fp) {
-        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
-        RegStorage temp_reg = AllocTempDouble();
-        NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
-        NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
-        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
-      } else {
-        DCHECK(r_dest.IsPair());
-        DCHECK(r_src.IsPair());
-        // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
-          // Deal with cycles.
-          RegStorage temp_reg = AllocTemp();
-          OpRegCopy(temp_reg, r_dest.GetHigh());
-          OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
-          OpRegCopy(r_dest.GetLow(), temp_reg);
-          FreeTemp(temp_reg);
-        } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        if (!r_dest.IsPair()) {
+          DCHECK(!r_src.IsPair());
+          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
         } else {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
+          RegStorage temp_reg = AllocTempDouble();
+          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
+        }
+      } else {
+        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
+        if (!r_src.IsPair()) {
+          // Just copy the register directly.
+          OpRegCopy(r_dest, r_src);
+        } else {
+          // Handle overlap
+          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
+              r_src.GetLowReg() == r_dest.GetHighReg()) {
+            // Deal with cycles.
+            RegStorage temp_reg = AllocTemp();
+            OpRegCopy(temp_reg, r_dest.GetHigh());
+            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
+            OpRegCopy(r_dest.GetLow(), temp_reg);
+            FreeTemp(temp_reg);
+          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          } else {
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          }
         }
       }
     }
@@ -832,7 +864,11 @@
 
   // Address the start of the method
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  LoadValueDirectFixed(rl_method, reg);
+  if (rl_method.wide) {
+    LoadValueDirectWideFixed(rl_method, reg);
+  } else {
+    LoadValueDirectFixed(rl_method, reg);
+  }
   store_method_addr_used_ = true;
 
   // Load the proper value from the literal area.
@@ -871,18 +907,23 @@
 }
 
 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  DCHECK(reg.IsPair());  // TODO: allow 64BitSolo.
-  // We are not supposed to clobber the incoming storage, so allocate a temporary.
-  RegStorage t_reg = AllocTemp();
+  if (Gen64Bit()) {
+    DCHECK(reg.Is64Bit());
 
-  // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
-  OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
+  } else {
+    DCHECK(reg.IsPair());
+
+    // We are not supposed to clobber the incoming storage, so allocate a temporary.
+    RegStorage t_reg = AllocTemp();
+    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
+    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    // The temp is no longer needed so free it at this time.
+    FreeTemp(t_reg);
+  }
 
   // In case of zero, throw ArithmeticException.
   GenDivZeroCheck(kCondEq);
-
-  // The temp is no longer needed so free it at this time.
-  FreeTemp(t_reg);
 }
 
 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
@@ -1221,18 +1262,22 @@
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-      // The registers are the same, so we would clobber it before the use.
-      RegStorage temp_reg = AllocTemp();
-      OpRegCopy(temp_reg, rl_dest.reg);
-      rl_src.reg.SetHighReg(temp_reg.GetReg());
-    }
-    NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
+    if (Gen64Bit()) {
+      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+        // The registers are the same, so we would clobber it before the use.
+        RegStorage temp_reg = AllocTemp();
+        OpRegCopy(temp_reg, rl_dest.reg);
+        rl_src.reg.SetHighReg(temp_reg.GetReg());
+      }
+      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
 
-    x86op = GetOpcode(op, rl_dest, rl_src, true);
-    NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
-    FreeTemp(rl_src.reg);
+      x86op = GetOpcode(op, rl_dest, rl_src, true);
+      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
+      FreeTemp(rl_src.reg);  // ???
+    }
     return;
   }
 
@@ -1242,11 +1287,13 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_src.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
+  LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+  }
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
 }
@@ -1273,13 +1320,16 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_dest.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg());
+  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
+                     Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           false /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+  }
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
@@ -1330,23 +1380,44 @@
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
-  } else if (is_commutative) {
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-    // We need at least one of them to be a temporary.
-    if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
+  if (Gen64Bit()) {
+    if (IsTemp(rl_src1.reg)) {
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!IsTemp(rl_src2.reg)) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
+    } else {
+      // Need LHS to be the temp.
       rl_src1 = ForceTempWide(rl_src1);
       GenLongRegOrMemOp(rl_src1, rl_src2, op);
-    } else {
-      GenLongRegOrMemOp(rl_src2, rl_src1, op);
-      StoreFinalValueWide(rl_dest, rl_src2);
-      return;
     }
   } else {
-    // Need LHS to be the temp.
-    rl_src1 = ForceTempWide(rl_src1);
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
+    } else {
+      // Need LHS to be the temp.
+      rl_src1 = ForceTempWide(rl_src1);
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    }
   }
 
   StoreFinalValueWide(rl_dest, rl_src1);
@@ -1378,27 +1449,91 @@
 }
 
 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  LOG(FATAL) << "Unexpected use GenNotLong()";
+  if (Gen64Bit()) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result;
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+    OpReg(kOpNot, rl_result.reg);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    LOG(FATAL) << "Unexpected use GenNotLong()";
+  }
 }
 
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2, bool is_div) {
-  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+  if (!Gen64Bit()) {
+    LOG(FATAL) << "Unexpected use GenDivRemLong()";
+    return;
+  }
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into RAX.
+  LoadValueDirectWideFixed(rl_src1, rs_r0q);
+
+  // Load RHS into RCX.
+  LoadValueDirectWideFixed(rl_src2, rs_r1q);
+
+  // Copy LHS sign bit into RDX.
+  NewLIR0(kx86Cqo64Da);
+
+  // Handle division by zero case.
+  GenDivZeroCheckWide(rs_r1q);
+
+  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
+  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  LoadConstantWide(rs_r3q, 0x8000000000000000);
+  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg());
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x8000000000000000/-1 case.
+  if (!is_div) {
+    // For DIV, RAX is already right. For REM, we need RDX 0.
+    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in RAX for div and RDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.reg.SetReg(r2q);
+  }
+
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = ForceTempWide(rl_src);
-  if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
-      ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
-    // The registers are the same, so we would clobber it before the use.
-    RegStorage temp_reg = AllocTemp();
-    OpRegCopy(temp_reg, rl_result.reg);
-    rl_result.reg.SetHighReg(temp_reg.GetReg());
+  RegLocation rl_result;
+  if (Gen64Bit()) {
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
+  } else {
+    rl_result = ForceTempWide(rl_src);
+    if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
+        ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
+      // The registers are the same, so we would clobber it before the use.
+      RegStorage temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_result.reg);
+      rl_result.reg.SetHighReg(temp_reg.GetReg());
+    }
+    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
+    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
+    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   }
-  OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
-  OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
-  OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1551,60 +1686,84 @@
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount);
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
-      }
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
+  if (Gen64Bit()) {
+    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        op = kOpLsl;
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        op = kOpAsr;
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        op = kOpLsr;
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
+    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
+  } else {
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
+                  shift_amount);
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
+        }
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
   }
   return rl_result;
 }
@@ -1634,24 +1793,26 @@
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  bool isConstSuccess = false;
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+        isConstSuccess = true;
       }
       break;
     case Instruction::ADD_LONG_2ADDR:
@@ -1660,20 +1821,24 @@
     case Instruction::AND_LONG_2ADDR:
       if (rl_src2.is_const) {
         if (GenerateTwoOperandInstructions()) {
-          GenLongImm(rl_dest, rl_src2, opcode);
+          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
         } else {
-          GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
         }
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     default:
-      // Default - bail to non-const handler.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      isConstSuccess = false;
       break;
   }
+
+  if (!isConstSuccess) {
+    // Default - bail to non-const handler.
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  }
 }
 
 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
@@ -1695,40 +1860,50 @@
                                 bool is_high_op) {
   bool rhs_in_mem = rhs.location != kLocPhysReg;
   bool dest_in_mem = dest.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   DCHECK(!rhs_in_mem || !dest_in_mem);
   switch (op) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
       }
-      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
       }
-      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (dest_in_mem) {
-        return kX86And32MR;
+        return is64Bit ? kX86And64MR : kX86And32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86And64RM : kX86And64RR;
       }
       return rhs_in_mem ? kX86And32RM : kX86And32RR;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Or32MR;
+        return is64Bit ? kX86Or64MR : kX86Or32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
       }
       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Xor32MR;
+        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
       }
       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
     default:
@@ -1740,6 +1915,7 @@
 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
                                 int32_t value) {
   bool in_mem = loc.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   bool byte_imm = IS_SIMM8(value);
   DCHECK(in_mem || !loc.reg.IsFloat());
   switch (op) {
@@ -1747,43 +1923,61 @@
     case Instruction::ADD_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
         }
-        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
       }
-      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
         }
-        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
       }
-      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86And64MI8 : kX86And64RI8;
+        }
         return in_mem ? kX86And32MI8 : kX86And32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86And64MI : kX86And64RI;
+      }
       return in_mem ? kX86And32MI : kX86And32RI;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
+        }
         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Or64MI : kX86Or64RI;
+      }
       return in_mem ? kX86Or32MI : kX86Or32RI;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
+        }
         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Xor64MI : kX86Xor64RI;
+      }
       return in_mem ? kX86Xor32MI : kX86Xor32RI;
     default:
       LOG(FATAL) << "Unexpected opcode: " << op;
@@ -1791,9 +1985,43 @@
   }
 }
 
-void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   DCHECK(rl_src.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+
+    rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+
+    if ((rl_dest.location == kLocDalvikFrame) ||
+        (rl_dest.location == kLocCompilerTemp)) {
+      int r_base = TargetReg(kSp).GetReg();
+      int displacement = SRegOffset(rl_dest.s_reg_low);
+
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, true /* is64bit */);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+      return true;
+    }
+
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    DCHECK_EQ(rl_result.location, kLocPhysReg);
+    DCHECK(!rl_result.reg.IsFloat());
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1820,7 +2048,7 @@
       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                                 false /* is_load */, true /* is64bit */);
     }
-    return;
+    return true;
   }
 
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
@@ -1836,12 +2064,38 @@
     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   }
   StoreValueWide(rl_dest, rl_result);
+  return true;
 }
 
-void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
                                 RegLocation rl_src2, Instruction::Code op) {
   DCHECK(rl_src2.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+    if (rl_dest.location == kLocPhysReg &&
+        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
+      StoreFinalValueWide(rl_dest, rl_dest);
+      return true;
+    }
+
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    // We need the values to be in a temporary
+    RegLocation rl_result = ForceTempWide(rl_src1);
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreFinalValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1861,7 +2115,7 @@
     }
 
     StoreFinalValueWide(rl_dest, rl_dest);
-    return;
+    return true;
   }
 
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -1879,6 +2133,7 @@
   }
 
   StoreFinalValueWide(rl_dest, rl_result);
+  return true;
 }
 
 // For final classes there are no sub-classes to check and so we can answer the instance-of
@@ -2239,7 +2494,8 @@
             // We should be careful with order here
             // If rl_dest and rl_lhs points to the same VR we should load first
             // If the are different we should find a register first for dest
-            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
+            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
               rl_lhs = LoadValue(rl_lhs, kCoreReg);
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
               // No-op if these are the same.
@@ -2289,4 +2545,82 @@
   // Everything will be fine :-).
   return true;
 }
+
+void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenIntToLong(rl_dest, rl_src);
+    return;
+  }
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (rl_src.location == kLocPhysReg) {
+    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    int displacement = SRegOffset(rl_src.s_reg_low);
+    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
+                     displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                        RegLocation rl_src1, RegLocation rl_shift) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+    return;
+  }
+
+  bool is_two_addr = false;
+  OpKind op = kOpBkpt;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::SHL_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_LONG:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_LONG:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_LONG:
+      op = kOpLsr;
+      break;
+    default:
+      op = kOpBkpt;
+  }
+
+  // X86 doesn't require masking and must use ECX.
+  RegStorage t_reg = TargetReg(kCount);  // rCX
+  LoadValueDirectFixed(rl_shift, t_reg);
+  if (is_two_addr) {
+    // Can we do this directly into memory?
+    rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
+    if (rl_result.location != kLocPhysReg) {
+      // Okay, we can do this into memory
+      OpMemReg(op, rl_result, t_reg.GetReg());
+    } else if (!rl_result.reg.IsFloat()) {
+      // Can do this directly into the result register
+      OpRegReg(op, rl_result.reg, t_reg);
+      StoreFinalValueWide(rl_dest, rl_result);
+    }
+  } else {
+    // Three address form, or we can't do directly.
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
+    StoreFinalValueWide(rl_dest, rl_result);
+  }
+
+  FreeTemp(t_reg);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4d8fd1b..1ac15a2 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,10 +132,18 @@
 X86NativeRegisterPool rX86_ARG1;
 X86NativeRegisterPool rX86_ARG2;
 X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+X86NativeRegisterPool rX86_ARG4;
+X86NativeRegisterPool rX86_ARG5;
+#endif
 X86NativeRegisterPool rX86_FARG0;
 X86NativeRegisterPool rX86_FARG1;
 X86NativeRegisterPool rX86_FARG2;
 X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_FARG4;
+X86NativeRegisterPool rX86_FARG5;
+X86NativeRegisterPool rX86_FARG6;
+X86NativeRegisterPool rX86_FARG7;
 X86NativeRegisterPool rX86_RET0;
 X86NativeRegisterPool rX86_RET1;
 X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -145,10 +153,16 @@
 RegStorage rs_rX86_ARG1;
 RegStorage rs_rX86_ARG2;
 RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_ARG4;
+RegStorage rs_rX86_ARG5;
 RegStorage rs_rX86_FARG0;
 RegStorage rs_rX86_FARG1;
 RegStorage rs_rX86_FARG2;
 RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_FARG4;
+RegStorage rs_rX86_FARG5;
+RegStorage rs_rX86_FARG6;
+RegStorage rs_rX86_FARG7;
 RegStorage rs_rX86_RET0;
 RegStorage rs_rX86_RET1;
 RegStorage rs_rX86_INVOKE_TGT;
@@ -164,7 +178,7 @@
 }
 
 RegLocation X86Mir2Lir::LocCReturnWide() {
-  return x86_loc_c_return_wide;
+  return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
 }
 
 RegLocation X86Mir2Lir::LocCReturnFloat() {
@@ -188,35 +202,27 @@
     case kArg1: res_reg = rs_rX86_ARG1; break;
     case kArg2: res_reg = rs_rX86_ARG2; break;
     case kArg3: res_reg = rs_rX86_ARG3; break;
+    case kArg4: res_reg = rs_rX86_ARG4; break;
+    case kArg5: res_reg = rs_rX86_ARG5; break;
     case kFArg0: res_reg = rs_rX86_FARG0; break;
     case kFArg1: res_reg = rs_rX86_FARG1; break;
     case kFArg2: res_reg = rs_rX86_FARG2; break;
     case kFArg3: res_reg = rs_rX86_FARG3; break;
+    case kFArg4: res_reg = rs_rX86_FARG4; break;
+    case kFArg5: res_reg = rs_rX86_FARG5; break;
+    case kFArg6: res_reg = rs_rX86_FARG6; break;
+    case kFArg7: res_reg = rs_rX86_FARG7; break;
     case kRet0: res_reg = rs_rX86_RET0; break;
     case kRet1: res_reg = rs_rX86_RET1; break;
     case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
     case kHiddenArg: res_reg = rs_rAX; break;
     case kHiddenFpArg: res_reg = rs_fr0; break;
     case kCount: res_reg = rs_rX86_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
 
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // TODO: This is not 64-bit compliant and depends on new internal ABI.
-  switch (arg_num) {
-    case 0:
-      return rs_rX86_ARG1;
-    case 1:
-      return rs_rX86_ARG2;
-    case 2:
-      return rs_rX86_ARG3;
-    default:
-      return RegStorage::InvalidReg();
-  }
-}
-
 /*
  * Decode the register id.
  */
@@ -482,6 +488,20 @@
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
   LockTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+  if (Gen64Bit()) {
+    LockTemp(rs_rX86_ARG4);
+    LockTemp(rs_rX86_ARG5);
+    LockTemp(rs_rX86_FARG0);
+    LockTemp(rs_rX86_FARG1);
+    LockTemp(rs_rX86_FARG2);
+    LockTemp(rs_rX86_FARG3);
+    LockTemp(rs_rX86_FARG4);
+    LockTemp(rs_rX86_FARG5);
+    LockTemp(rs_rX86_FARG6);
+    LockTemp(rs_rX86_FARG7);
+  }
+#endif
 }
 
 /* To be used when explicitly managing register use */
@@ -490,6 +510,20 @@
   FreeTemp(rs_rX86_ARG1);
   FreeTemp(rs_rX86_ARG2);
   FreeTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+  if (Gen64Bit()) {
+    FreeTemp(rs_rX86_ARG4);
+    FreeTemp(rs_rX86_ARG5);
+    FreeTemp(rs_rX86_FARG0);
+    FreeTemp(rs_rX86_FARG1);
+    FreeTemp(rs_rX86_FARG2);
+    FreeTemp(rs_rX86_FARG3);
+    FreeTemp(rs_rX86_FARG4);
+    FreeTemp(rs_rX86_FARG5);
+    FreeTemp(rs_rX86_FARG6);
+    FreeTemp(rs_rX86_FARG7);
+  }
+#endif
 }
 
 bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
@@ -653,6 +687,14 @@
 }
 
 RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  // X86_64 can handle any size.
+  if (Gen64Bit()) {
+    if (size == kReference) {
+      return kRefReg;
+    }
+    return kCoreReg;
+  }
+
   if (UNLIKELY(is_volatile)) {
     // On x86, atomic 64-bit load/store requires an fp register.
     // Smaller aligned load/store is atomic for both core and fp registers.
@@ -688,11 +730,37 @@
     rs_rX86_ARG1 = rs_rSI;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rCX;
+#ifdef TARGET_REX_SUPPORT
+    rs_rX86_ARG4 = rs_r8;
+    rs_rX86_ARG5 = rs_r9;
+#else
+    rs_rX86_ARG4 = RegStorage::InvalidReg();
+    rs_rX86_ARG5 = RegStorage::InvalidReg();
+#endif
+    rs_rX86_FARG0 = rs_fr0;
+    rs_rX86_FARG1 = rs_fr1;
+    rs_rX86_FARG2 = rs_fr2;
+    rs_rX86_FARG3 = rs_fr3;
+    rs_rX86_FARG4 = rs_fr4;
+    rs_rX86_FARG5 = rs_fr5;
+    rs_rX86_FARG6 = rs_fr6;
+    rs_rX86_FARG7 = rs_fr7;
     rX86_ARG0 = rDI;
     rX86_ARG1 = rSI;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rCX;
-    // TODO: ARG4(r8), ARG5(r9), floating point args.
+#ifdef TARGET_REX_SUPPORT
+    rX86_ARG4 = r8;
+    rX86_ARG5 = r9;
+#endif
+    rX86_FARG0 = fr0;
+    rX86_FARG1 = fr1;
+    rX86_FARG2 = fr2;
+    rX86_FARG3 = fr3;
+    rX86_FARG4 = fr4;
+    rX86_FARG5 = fr5;
+    rX86_FARG6 = fr6;
+    rX86_FARG7 = fr7;
   } else {
     rs_rX86_SP = rs_rX86_SP_32;
 
@@ -700,23 +768,32 @@
     rs_rX86_ARG1 = rs_rCX;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rBX;
+    rs_rX86_ARG4 = RegStorage::InvalidReg();
+    rs_rX86_ARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG0 = rs_rAX;
+    rs_rX86_FARG1 = rs_rCX;
+    rs_rX86_FARG2 = rs_rDX;
+    rs_rX86_FARG3 = rs_rBX;
+    rs_rX86_FARG4 = RegStorage::InvalidReg();
+    rs_rX86_FARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG6 = RegStorage::InvalidReg();
+    rs_rX86_FARG7 = RegStorage::InvalidReg();
     rX86_ARG0 = rAX;
     rX86_ARG1 = rCX;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rBX;
+    rX86_FARG0 = rAX;
+    rX86_FARG1 = rCX;
+    rX86_FARG2 = rDX;
+    rX86_FARG3 = rBX;
+    // TODO(64): Initialize with invalid reg
+//    rX86_ARG4 = RegStorage::InvalidReg();
+//    rX86_ARG5 = RegStorage::InvalidReg();
   }
-  rs_rX86_FARG0 = rs_rAX;
-  rs_rX86_FARG1 = rs_rCX;
-  rs_rX86_FARG2 = rs_rDX;
-  rs_rX86_FARG3 = rs_rBX;
   rs_rX86_RET0 = rs_rAX;
   rs_rX86_RET1 = rs_rDX;
   rs_rX86_INVOKE_TGT = rs_rAX;
   rs_rX86_COUNT = rs_rCX;
-  rX86_FARG0 = rAX;
-  rX86_FARG1 = rCX;
-  rX86_FARG2 = rDX;
-  rX86_FARG3 = rBX;
   rX86_RET0 = rAX;
   rX86_RET1 = rDX;
   rX86_INVOKE_TGT = rAX;
@@ -1356,7 +1433,11 @@
 
   // Address the start of the method.
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  rl_method = LoadValue(rl_method, kCoreReg);
+  if (rl_method.wide) {
+    rl_method = LoadValueWide(rl_method, kCoreReg);
+  } else {
+    rl_method = LoadValue(rl_method, kCoreReg);
+  }
 
   // Load the proper value from the literal area.
   // We don't know the proper offset for the value, so pick one that will force
@@ -1676,4 +1757,458 @@
   return new_value;
 }
 
+// ------------ ABI support: mapping of args to physical registers -------------
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
+  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
+                                                  rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
+  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
+      }
+    }
+  }
+  return result;
+}
+
+RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+       if (reg.Is64BitSolo()) {
+         // We covered 2 args, so skip the next one
+         in_position++;
+       }
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!Gen64Bit()) {
+    return GetCoreArgMappingToPhysicalReg(arg_num);
+  }
+
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageX86_64Mapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  // Not used for 64-bit, TODO: Move X86_32 to the same framework
+  switch (core_arg_num) {
+    case 0:
+      return rs_rX86_ARG1;
+    case 1:
+      return rs_rX86_ARG2;
+    case 2:
+      return rs_rX86_ARG3;
+    default:
+      return RegStorage::InvalidReg();
+  }
+}
+
+// ---------End of ABI support: mapping of args to physical registers -------------
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+  StoreValue(rl_method, rl_src);
+  // If Method* has been promoted, explicitly flush
+  if (rl_method.location == kLocPhysReg) {
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  /*
+   * Copy incoming arguments to their proper home locations.
+   * NOTE: an older version of dx had an issue in which
+   * it would reuse static method argument registers.
+   * This could result in the same Dalvik virtual register
+   * being promoted to both core and fp regs. To account for this,
+   * we only copy to the corresponding promoted physical register
+   * if it matches the type of the SSA name for the incoming
+   * argument.  It is also possible that long and double arguments
+   * end up half-promoted.  In those cases, we must flush the promoted
+   * half to memory as well.
+   */
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegStorage reg = RegStorage::InvalidReg();
+    // get reg corresponding to input
+    reg = GetArgMappingToPhysicalReg(i);
+
+    if (reg.Valid()) {
+      // If arriving in register
+      bool need_flush = true;
+      RegLocation* t_loc = &ArgLocs[i];
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+        need_flush = false;
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+        need_flush = false;
+      } else {
+        need_flush = true;
+      }
+
+      // For wide args, force flush if not fully promoted
+      if (t_loc->wide) {
+        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+        // Is only half promoted?
+        need_flush |= (p_map->core_location != v_map->core_location) ||
+            (p_map->fp_location != v_map->fp_location);
+      }
+      if (need_flush) {
+        if (t_loc->wide && t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else if (t_loc->wide && !t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else {
+          Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                  const MethodReference& target_method,
+                                  uint32_t vtable_idx, uintptr_t direct_code,
+                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsNoRange(info,
+                                  call_state, pcrLabel, next_call_insn,
+                                  target_method,
+                                  vtable_idx, direct_code,
+                                  direct_method, type, skip_this);
+  }
+  return GenDalvikArgsRange(info,
+                       call_state, pcrLabel, next_call_insn,
+                       target_method,
+                       vtable_idx, direct_code,
+                       direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                LIR** pcrLabel, NextCallInsn next_call_insn,
+                                const MethodReference& target_method,
+                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                                InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsRange(info, call_state,
+                                pcrLabel, next_call_insn,
+                                target_method,
+                                vtable_idx, direct_code, direct_method,
+                                type, skip_this);
+  }
+
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageX86_64Mapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+  // Fisrt of all, check whether it make sense to use bulk copying
+  // Optimization is aplicable only for range case
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+        }
+        next_arg++;
+      }
+    }
+
+    // Logic below assumes that Method pointer is at offset zero from SP.
+    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
+        // there are no free registers.
+        RegStorage temp = AllocTempDouble();
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                            kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                            temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+            ld1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            SetMemRefType(st1, false /* is_load */, kDalvikReg);
+            st1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        RegStorage temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        Load32Disp(TargetReg(kSp), current_src_offset, temp);
+        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regSingle = TargetReg(kArg2);
+    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        if (rl_arg.wide) {
+          if (rl_arg.location == kLocPhysReg) {
+            StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+          } else {
+            LoadValueDirectWideFixed(rl_arg, regWide);
+            StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+          }
+          i++;
+        } else {
+          if (rl_arg.location == kLocPhysReg) {
+            StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+          } else {
+            LoadValueDirectFixed(rl_arg, regSingle);
+            StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+    }
+  }
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        LoadValueDirectWideFixed(rl_arg, reg);
+        i++;
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
+  }
+  return call_state;
+}
+
 }  // namespace art
+
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 618b3a5..d074d81 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -89,11 +89,8 @@
     res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg());
   } else {
     // Note, there is no byte immediate form of a 32 bit immediate move.
-    if (r_dest.Is64Bit()) {
-      res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value);
-    } else {
-      res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
-    }
+    // 64-bit immediate is not supported by LIR structure
+    res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
   }
 
   if (r_dest_save.IsFloat()) {
@@ -120,8 +117,8 @@
 LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
   X86OpCode opcode = kX86Bkpt;
   switch (op) {
-    case kOpNeg: opcode = kX86Neg32R; break;
-    case kOpNot: opcode = kX86Not32R; break;
+    case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break;
+    case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break;
     case kOpRev: opcode = kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
@@ -138,6 +135,9 @@
     switch (op) {
       case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
       case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      case kOpLsl: opcode = kX86Sal64RI; break;
+      case kOpLsr: opcode = kX86Shr64RI; break;
+      case kOpAsr: opcode = kX86Sar64RI; break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
     }
@@ -189,6 +189,7 @@
 }
 
 LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+    bool is64Bit = r_dest_src1.Is64Bit();
     X86OpCode opcode = kX86Nop;
     bool src2_must_be_cx = false;
     switch (op) {
@@ -207,33 +208,34 @@
         OpReg(kOpRev, r_dest_src1);
         return OpRegImm(kOpAsr, r_dest_src1, 16);
         // X86 binary opcodes
-      case kOpSub: opcode = kX86Sub32RR; break;
-      case kOpSbc: opcode = kX86Sbb32RR; break;
-      case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break;
-      case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break;
-      case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break;
-      case kOpMov: opcode = kX86Mov32RR; break;
-      case kOpCmp: opcode = kX86Cmp32RR; break;
-      case kOpAdd: opcode = kX86Add32RR; break;
-      case kOpAdc: opcode = kX86Adc32RR; break;
-      case kOpAnd: opcode = kX86And32RR; break;
-      case kOpOr:  opcode = kX86Or32RR; break;
-      case kOpXor: opcode = kX86Xor32RR; break;
+      case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break;
+      case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break;
+      case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break;
+      case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break;
+      case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break;
+      case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break;
+      case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break;
+      case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break;
+      case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break;
+      case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break;
+      case kOpOr:  opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break;
+      case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break;
       case kOp2Byte:
         // TODO: there are several instances of this check.  A utility function perhaps?
         // TODO: Similar to Arm's reg < 8 check.  Perhaps add attribute checks to RegStorage?
         // Use shifts instead of a byte operand if the source can't be byte accessed.
         if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
-          NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
-          NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24);
-          return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24);
+          NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
+          NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24);
+          return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(),
+                         is64Bit ? 56 : 24);
         } else {
-          opcode = kX86Movsx8RR;
+          opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR;
         }
         break;
-      case kOp2Short: opcode = kX86Movsx16RR; break;
-      case kOp2Char: opcode = kX86Movzx16RR; break;
-      case kOpMul: opcode = kX86Imul32RR; break;
+      case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break;
+      case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break;
+      case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break;
       default:
         LOG(FATAL) << "Bad case in OpRegReg " << op;
         break;
@@ -354,16 +356,17 @@
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  bool is64Bit = r_dest.Is64Bit();
   X86OpCode opcode = kX86Nop;
   switch (op) {
       // X86 binary opcodes
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
     case kOp2Byte: opcode = kX86Movsx8RM; break;
     case kOp2Short: opcode = kX86Movsx16RM; break;
     case kOp2Char: opcode = kX86Movzx16RM; break;
@@ -382,63 +385,68 @@
 LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
   DCHECK_NE(rl_dest.location, kLocPhysReg);
   int displacement = SRegOffset(rl_dest.s_reg_low);
+  bool is64Bit = rl_dest.wide != 0;
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32MR; break;
-    case kOpMov: opcode = kX86Mov32MR; break;
-    case kOpCmp: opcode = kX86Cmp32MR; break;
-    case kOpAdd: opcode = kX86Add32MR; break;
-    case kOpAnd: opcode = kX86And32MR; break;
-    case kOpOr:  opcode = kX86Or32MR; break;
-    case kOpXor: opcode = kX86Xor32MR; break;
-    case kOpLsl: opcode = kX86Sal32MC; break;
-    case kOpLsr: opcode = kX86Shr32MC; break;
-    case kOpAsr: opcode = kX86Sar32MC; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break;
+    case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break;
+    case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break;
+    case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break;
     default:
       LOG(FATAL) << "Bad case in OpMemReg " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
-  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */);
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) {
   DCHECK_NE(rl_value.location, kLocPhysReg);
+  bool is64Bit = r_dest.Is64Bit();
   int displacement = SRegOffset(rl_value.s_reg_low);
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
-    case kOpMul: opcode = kX86Imul32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
+    case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break;
     default:
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
                              RegStorage r_src2) {
+  bool is64Bit = r_dest.Is64Bit();
   if (r_dest != r_src1 && r_dest != r_src2) {
     if (op == kOpAdd) {  // lea special case, except can't encode rbp as base
       if (r_src1 == r_src2) {
         OpRegCopy(r_dest, r_src1);
         return OpRegImm(kOpLsl, r_dest, 1);
       } else if (r_src1 != rs_rBP) {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src1.GetReg() /* base */,
-                       r_src2.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src1.GetReg() /* base */, r_src2.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       } else {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src2.GetReg() /* base */,
-                       r_src1.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src2.GetReg() /* base */, r_src1.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       }
     } else {
       OpRegCopy(r_dest, r_src1);
@@ -476,10 +484,10 @@
 }
 
 LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) {
-  if (op == kOpMul) {
+  if (op == kOpMul && !Gen64Bit()) {
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
-  } else if (op == kOpAnd) {
+  } else if (op == kOpAnd && !Gen64Bit()) {
     if (value == 0xFF && r_src.Low4()) {
       return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
     } else if (value == 0xFFFF) {
@@ -492,8 +500,9 @@
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
+      return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                     r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */,
+                     0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -556,7 +565,11 @@
 
         // Address the start of the method
         RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-        rl_method = LoadValue(rl_method, kCoreReg);
+        if (rl_method.wide) {
+          rl_method = LoadValueWide(rl_method, kCoreReg);
+        } else {
+          rl_method = LoadValue(rl_method, kCoreReg);
+        }
 
         // Load the proper value from the literal area.
         // We don't know the proper offset for the value, so pick one that will force
@@ -582,8 +595,20 @@
         }
       }
     } else {
-      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      if (r_dest.IsPair()) {
+        res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
+        LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      } else {
+        // TODO(64) make int64_t value parameter of LoadConstantNoClobber
+        if (val_lo < 0) {
+          val_hi += 1;
+        }
+        res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
+        NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
+        if (val_lo != 0) {
+          NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo);
+        }
+      }
     }
     return res;
 }
@@ -601,6 +626,8 @@
     case kDouble:
       if (r_dest.IsFloat()) {
         opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
       } else {
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
       }
@@ -742,13 +769,10 @@
     case kDouble:
       if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
       } else {
-        if (Gen64Bit()) {
-          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
-        } else {
-          // TODO(64): pair = true;
-          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
-        }
+        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
@@ -855,7 +879,7 @@
 
   // Did we need a pointer to the method code?
   if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true);
   } else {
     base_of_code_ = nullptr;
   }
@@ -971,6 +995,7 @@
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
 
@@ -984,7 +1009,7 @@
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index bb8df89..e550488 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -334,10 +334,18 @@
 extern X86NativeRegisterPool rX86_ARG1;
 extern X86NativeRegisterPool rX86_ARG2;
 extern X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+extern X86NativeRegisterPool rX86_ARG4;
+extern X86NativeRegisterPool rX86_ARG5;
+#endif
 extern X86NativeRegisterPool rX86_FARG0;
 extern X86NativeRegisterPool rX86_FARG1;
 extern X86NativeRegisterPool rX86_FARG2;
 extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_FARG4;
+extern X86NativeRegisterPool rX86_FARG5;
+extern X86NativeRegisterPool rX86_FARG6;
+extern X86NativeRegisterPool rX86_FARG7;
 extern X86NativeRegisterPool rX86_RET0;
 extern X86NativeRegisterPool rX86_RET1;
 extern X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -347,10 +355,16 @@
 extern RegStorage rs_rX86_ARG1;
 extern RegStorage rs_rX86_ARG2;
 extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_ARG4;
+extern RegStorage rs_rX86_ARG5;
 extern RegStorage rs_rX86_FARG0;
 extern RegStorage rs_rX86_FARG1;
 extern RegStorage rs_rX86_FARG2;
 extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_FARG4;
+extern RegStorage rs_rX86_FARG5;
+extern RegStorage rs_rX86_FARG6;
+extern RegStorage rs_rX86_FARG7;
 extern RegStorage rs_rX86_RET0;
 extern RegStorage rs_rX86_RET1;
 extern RegStorage rs_rX86_INVOKE_TGT;
@@ -363,6 +377,9 @@
 const RegLocation x86_loc_c_return_wide
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
+const RegLocation x86_64_loc_c_return_wide
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
 const RegLocation x86_loc_c_return_float
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
@@ -505,6 +522,7 @@
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
   kx86Cdq32Da,
+  kx86Cqo64Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
@@ -518,8 +536,12 @@
   kX86MovssAR,
   Binary0fOpCode(kX86Cvtsi2sd),  // int to double
   Binary0fOpCode(kX86Cvtsi2ss),  // int to float
+  Binary0fOpCode(kX86Cvtsqi2sd),  // long to double
+  Binary0fOpCode(kX86Cvtsqi2ss),  // long to float
   Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
   Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
+  Binary0fOpCode(kX86Cvttsd2sqi),  // truncating double to long
+  Binary0fOpCode(kX86Cvttss2sqi),  // truncating float to long
   Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
   Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
   Binary0fOpCode(kX86Ucomisd),  // unordered double compare
@@ -587,11 +609,15 @@
   kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
   kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
+  Binary0fOpCode(kX86Movqxr),   // move into xmm from 64 bit gpr
+  kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR,  // move into 64 bit reg from xmm
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
+  kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA,  // move 32 bit to 64 bit with sign extension
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
   kX86Mfence,                   // memory barrier
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
+  Binary0fOpCode(kX86Imul64),   // 64bit multiply
   kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
   kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
   kX86LockCmpxchg8bM, kX86LockCmpxchg8bA,  // locked compare and exchange
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index bd6bc225..4324325 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -16,6 +16,7 @@
 
 #include "compiler_internals.h"
 #include "dataflow_iterator-inl.h"
+#include "utils/scoped_arena_containers.h"
 
 #define NOTVISITED (-1)
 
@@ -69,7 +70,8 @@
 }
 
 void MIRGraph::RecordDFSOrders(BasicBlock* block) {
-  std::vector<BasicBlock*> succ;
+  DCHECK(temp_scoped_alloc_.get() != nullptr);
+  ScopedArenaVector<BasicBlock*> succ(temp_scoped_alloc_->Adapter());
   MarkPreOrder(block);
   succ.push_back(block);
   while (!succ.empty()) {
@@ -176,7 +178,9 @@
     dom_post_order_traversal_->Reset();
   }
   ClearAllVisitedFlags();
-  std::vector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack;
+  DCHECK(temp_scoped_alloc_.get() != nullptr);
+  ScopedArenaVector<std::pair<BasicBlock*, ArenaBitVector::IndexIterator>> work_stack(
+      temp_scoped_alloc_->Adapter());
   bb->visited = true;
   work_stack.push_back(std::make_pair(bb, bb->i_dominated->Indexes().begin()));
   while (!work_stack.empty()) {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 45abfcc..324f717 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -135,8 +135,10 @@
       } else {
         // Search dex file for localized ssb index, may fail if field's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
+        StackHandleScope<1> hs(Thread::Current());
         const DexFile::StringId* string_id =
-            dex_file->FindStringId(FieldHelper(resolved_field).GetDeclaringClassDescriptor());
+            dex_file->FindStringId(
+                FieldHelper(hs.NewHandle(resolved_field)).GetDeclaringClassDescriptor());
         if (string_id != nullptr) {
           const DexFile::TypeId* type_id =
              dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 8d4e283..1cfd194 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -35,6 +35,7 @@
 #include "driver/compiler_options.h"
 #include "jni_internal.h"
 #include "object_utils.h"
+#include "profiler.h"
 #include "runtime.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
@@ -57,10 +58,6 @@
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-#endif
-
 namespace art {
 
 static double Percentage(size_t x, size_t y) {
@@ -333,7 +330,7 @@
                                bool image, DescriptorSet* image_classes, size_t thread_count,
                                bool dump_stats, bool dump_passes, CumulativeLogger* timer,
                                std::string profile_file)
-    : profile_ok_(false), compiler_options_(compiler_options),
+    : profile_present_(false), compiler_options_(compiler_options),
       verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_(Compiler::Create(this, compiler_kind)),
@@ -367,11 +364,6 @@
 
   CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
 
-  // Read the profile file if one is provided.
-  if (profile_file != "") {
-    profile_ok_ = ProfileHelper::LoadProfileMap(profile_map_, profile_file);
-  }
-
   dex_to_dex_compiler_ = reinterpret_cast<DexToDexCompilerFn>(ArtCompileDEX);
 
   compiler_->Init();
@@ -387,6 +379,16 @@
   if (compiler_options->GetGenerateGDBInformation()) {
     cfi_info_.reset(compiler_->GetCallFrameInformationInitialization(*this));
   }
+
+  // Read the profile file if one is provided.
+  if (!profile_file.empty()) {
+    profile_present_ = profile_file_.LoadFile(profile_file);
+    if (profile_present_) {
+      LOG(INFO) << "Using profile data form file " << profile_file;
+    } else {
+      LOG(INFO) << "Failed to load profile file " << profile_file;
+    }
+  }
 }
 
 std::vector<uint8_t>* CompilerDriver::DeduplicateCode(const std::vector<uint8_t>& code) {
@@ -2046,39 +2048,29 @@
   }
 
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
-  if (!profile_ok_) {
+  if (!profile_present_) {
     return false;
   }
-  // Methods that comprise topKPercentThreshold % of the total samples will be compiled.
-  double topKPercentThreshold = 90.0;
-#ifdef HAVE_ANDROID_OS
-  char buf[PROP_VALUE_MAX];
-  property_get("dalvik.vm.profile.compile_thr", buf, "90.0");
-  topKPercentThreshold = strtod(buf, nullptr);
-#endif
-  // Test for reasonable thresholds.
-  if (topKPercentThreshold < 10.0 || topKPercentThreshold > 90.0) {
-    topKPercentThreshold = 90.0;
-  }
-
-  // First find the method in the profile map.
-  ProfileMap::iterator i = profile_map_.find(method_name);
-  if (i == profile_map_.end()) {
+  // First find the method in the profile file.
+  ProfileFile::ProfileData data;
+  if (!profile_file_.GetProfileData(&data, method_name)) {
     // Not in profile, no information can be determined.
     VLOG(compiler) << "not compiling " << method_name << " because it's not in the profile";
     return true;
   }
-  const ProfileData& data = i->second;
 
+  // Methods that comprise top_k_threshold % of the total samples will be compiled.
   // Compare against the start of the topK percentage bucket just in case the threshold
   // falls inside a bucket.
-  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent() <= topKPercentThreshold;
+  bool compile = data.GetTopKUsedPercentage() - data.GetUsedPercent()
+                 <= compiler_options_->GetTopKProfileThreshold();
   if (compile) {
     LOG(INFO) << "compiling method " << method_name << " because its usage is part of top "
-        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%";
+        << data.GetTopKUsedPercentage() << "% with a percent of " << data.GetUsedPercent() << "%"
+        << " (topKThreshold=" << compiler_options_->GetTopKProfileThreshold() << ")";
   } else {
     VLOG(compiler) << "not compiling method " << method_name << " because it's not part of leading "
-        << topKPercentThreshold << "% samples)";
+        << compiler_options_->GetTopKProfileThreshold() << "% samples)";
   }
   return !compile;
 }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 14ccb50..fad6798 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -143,7 +143,7 @@
   }
 
   bool ProfilePresent() const {
-    return profile_ok_;
+    return profile_present_;
   }
 
   // Are we compiling and creating an image file?
@@ -595,8 +595,8 @@
     return cfi_info_.get();
   }
 
-  ProfileMap profile_map_;
-  bool profile_ok_;
+  ProfileFile profile_file_;
+  bool profile_present_;
 
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 20c6bc8..05a9ac7 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -24,7 +24,6 @@
   enum CompilerFilter {
     kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
     kInterpretOnly,       // Compile nothing except JNI stubs.
-    kProfiled,            // Compile based on profile.
     kSpace,               // Maximize space savings.
     kBalanced,            // Try to get the best performance return on compilation investment.
     kSpeed,               // Maximize runtime performance.
@@ -33,7 +32,7 @@
 
   // Guide heuristics to determine whether to compile method if profile data not available.
 #if ART_SMALL_MODE
-  static const CompilerFilter kDefaultCompilerFilter = kProfiled;
+  static const CompilerFilter kDefaultCompilerFilter = kInterpretOnly;
 #else
   static const CompilerFilter kDefaultCompilerFilter = kSpeed;
 #endif
@@ -42,6 +41,7 @@
   static const size_t kDefaultSmallMethodThreshold = 60;
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
+  static constexpr double kDefaultTopKProfileThreshold = 90.0;
 
   CompilerOptions() :
     compiler_filter_(kDefaultCompilerFilter),
@@ -50,7 +50,8 @@
     small_method_threshold_(kDefaultSmallMethodThreshold),
     tiny_method_threshold_(kDefaultTinyMethodThreshold),
     num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
-    generate_gdb_information_(false)
+    generate_gdb_information_(false),
+    top_k_profile_threshold_(kDefaultTopKProfileThreshold)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(false)
 #endif
@@ -62,7 +63,8 @@
                   size_t small_method_threshold,
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
-                  bool generate_gdb_information
+                  bool generate_gdb_information,
+                  double top_k_profile_threshold
 #ifdef ART_SEA_IR_MODE
                   , bool sea_ir_mode
 #endif
@@ -73,7 +75,8 @@
     small_method_threshold_(small_method_threshold),
     tiny_method_threshold_(tiny_method_threshold),
     num_dex_methods_threshold_(num_dex_methods_threshold),
-    generate_gdb_information_(generate_gdb_information)
+    generate_gdb_information_(generate_gdb_information),
+    top_k_profile_threshold_(top_k_profile_threshold)
 #ifdef ART_SEA_IR_MODE
     , sea_ir_mode_(sea_ir_mode)
 #endif
@@ -132,6 +135,10 @@
     return num_dex_methods_threshold_;
   }
 
+  double GetTopKProfileThreshold() const {
+    return top_k_profile_threshold_;
+  }
+
 #ifdef ART_SEA_IR_MODE
   bool GetSeaIrMode();
 #endif
@@ -148,7 +155,8 @@
   size_t tiny_method_threshold_;
   size_t num_dex_methods_threshold_;
   bool generate_gdb_information_;
-
+  // When using a profile file only the top K% of the profiled samples will be compiled.
+  double top_k_profile_threshold_;
 #ifdef ART_SEA_IR_MODE
   bool sea_ir_mode_;
 #endif
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
index 8c06c9f..0b86ad0 100644
--- a/compiler/elf_stripper.cc
+++ b/compiler/elf_stripper.cc
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "base/logging.h"
+#include "base/stringprintf.h"
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "utils.h"
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index a7727c0..2c8166e 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
 #define ART_COMPILER_OPTIMIZING_PRETTY_PRINTER_H_
 
+#include "base/stringprintf.h"
 #include "nodes.h"
 
 namespace art {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index f27da89..35149cf 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -708,6 +708,38 @@
   return result;
 }
 
+void ParseStringAfterChar(const std::string& s, char c, std::string* parsed_value) {
+  std::string::size_type colon = s.find(c);
+  if (colon == std::string::npos) {
+    Usage("Missing char %c in option %s\n", c, s.c_str());
+  }
+  // Add one to remove the char we were trimming until.
+  *parsed_value = s.substr(colon + 1);
+}
+
+void ParseDouble(const std::string& option, char after_char,
+                 double min, double max, double* parsed_value) {
+  std::string substring;
+  ParseStringAfterChar(option, after_char, &substring);
+  bool sane_val = true;
+  double value;
+  if (false) {
+    // TODO: this doesn't seem to work on the emulator.  b/15114595
+    std::stringstream iss(substring);
+    iss >> value;
+    // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+    sane_val = iss.eof() && (value >= min) && (value <= max);
+  } else {
+    char* end = nullptr;
+    value = strtod(substring.c_str(), &end);
+    sane_val = *end == '\0' && value >= min && value <= max;
+  }
+  if (!sane_val) {
+    Usage("Invalid double value %s for option %s\n", substring.c_str(), option.c_str());
+  }
+  *parsed_value = value;
+}
+
 static int dex2oat(int argc, char** argv) {
 #if defined(__linux__) && defined(__arm__)
   int major, minor;
@@ -778,6 +810,7 @@
 
   // Profile file to use
   std::string profile_file;
+  double top_k_profile_threshold = CompilerOptions::kDefaultTopKProfileThreshold;
 
   bool is_host = false;
   bool dump_stats = false;
@@ -941,6 +974,8 @@
       VLOG(compiler) << "dex2oat: profile file is " << profile_file;
     } else if (option == "--no-profile-file") {
       // No profile
+    } else if (option.starts_with("--top-k-profile-threshold=")) {
+      ParseDouble(option.data(), '=', 10.0, 90.0, &top_k_profile_threshold);
     } else if (option == "--print-pass-names") {
       PassDriverMEOpts::PrintPassNames();
     } else if (option.starts_with("--disable-passes=")) {
@@ -1086,7 +1121,8 @@
                                    small_method_threshold,
                                    tiny_method_threshold,
                                    num_dex_methods_threshold,
-                                   generate_gdb_information
+                                   generate_gdb_information,
+                                   top_k_profile_threshold
 #ifdef ART_SEA_IR_MODE
                                    , compiler_options.sea_ir_ = true;
 #endif
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 456e3b5..e6a6860 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -258,6 +258,17 @@
     reg_in_opcode = true;
     target_specific = true;
     break;
+  case 0x63:
+    if (rex == 0x48) {
+      opcode << "movsxd";
+      has_modrm = true;
+      load = true;
+    } else {
+      // In 32-bit mode (!supports_rex_) this is ARPL, with no REX prefix the functionality is the
+      // same as 'mov' but the use of the instruction is discouraged.
+      opcode << StringPrintf("unknown opcode '%02X'", *instr);
+    }
+    break;
   case 0x68: opcode << "push"; immediate_bytes = 4; break;
   case 0x69: opcode << "imul"; load = true; has_modrm = true; immediate_bytes = 4; break;
   case 0x6A: opcode << "push"; immediate_bytes = 1; break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 183f667..d51179e 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -919,10 +919,11 @@
 
   static void PrintField(std::ostream& os, mirror::ArtField* field, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    FieldHelper fh(field);
-    const char* descriptor = fh.GetTypeDescriptor();
-    os << StringPrintf("%s: ", fh.GetName());
+    const char* descriptor = field->GetTypeDescriptor();
+    os << StringPrintf("%s: ", field->GetName());
     if (descriptor[0] != 'L' && descriptor[0] != '[') {
+      StackHandleScope<1> hs(Thread::Current());
+      FieldHelper fh(hs.NewHandle(field));
       mirror::Class* type = fh.GetType();
       if (type->IsPrimitiveLong()) {
         os << StringPrintf("%" PRId64 " (0x%" PRIx64 ")\n", field->Get64(obj), field->Get64(obj));
@@ -942,6 +943,8 @@
         os << StringPrintf("null   %s\n", PrettyDescriptor(descriptor).c_str());
       } else {
         // Grab the field type without causing resolution.
+        StackHandleScope<1> hs(Thread::Current());
+        FieldHelper fh(hs.NewHandle(field));
         mirror::Class* field_type = fh.GetType(false);
         if (field_type != NULL) {
           PrettyObjectValue(os, field_type, value);
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 8d08190..83cacac 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -20,6 +20,7 @@
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm.h"
 #include "runtime.h"  // for Runtime::CalleeSaveType.
+#include "utils.h"
 
 namespace art {
 namespace arm {
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0b7f268c..3be0faf 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -82,9 +82,9 @@
         "addl $4, %%esp"            // Pop referrer
         : "=a" (result)
           // Use the result from eax
-          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
-            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-            : );  // clobber.
+        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+        : "memory");  // clobber.
     // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
@@ -122,7 +122,7 @@
           // Use the result from r0
         : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
           [referrer] "r"(referrer)
-        : );  // clobber.
+        : "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -255,7 +255,8 @@
           "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
           "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+          "memory");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -268,9 +269,10 @@
         ".cfi_adjust_cfa_offset -16\n\t"
         : "=a" (result)
           // Use the result from rax
-          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
-            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+        : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+          // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+          "memory");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -303,9 +305,9 @@
         "addl $4, %%esp"            // Pop referrer
         : "=a" (result)
           // Use the result from eax
-          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
-            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
-            : );  // clobber.
+        : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"m"(referrer), [hidden]"r"(hidden)
+          // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+        : "memory");  // clobber.
     // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
     //       but compilation fails when declaring that.
 #elif defined(__arm__)
@@ -343,9 +345,9 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-          : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
-            [referrer] "r"(referrer), [hidden] "r"(hidden)
-            : );  // clobber.
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer), [hidden] "r"(hidden)
+        : "memory");  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         // Spill x0-x7 which we say we don't clobber. May contain args.
@@ -477,7 +479,8 @@
           "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
           "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
           "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
-          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");  // clobber.
+          "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+          "memory");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -494,7 +497,8 @@
         // Use the result from rax
         : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer), [hidden] "m"(hidden)
         // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
-        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+        : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+          "memory");  // clobber all
     // TODO: Should we clobber the other registers?
 #else
     LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -1567,8 +1571,7 @@
       StackHandleScope<1> hs(self);
       Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
 
-      FieldHelper fh(f.Get());
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = f->GetTypeAsPrimitiveType();
       switch (type) {
         case Primitive::Type::kPrimInt:
           if (test_type == type) {
@@ -1584,7 +1587,7 @@
 
         case Primitive::Type::kPrimNot:
           // Don't try array.
-          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
             GetSetObjStatic(&obj, &f, self, m.Get(), test);
           }
           break;
@@ -1603,8 +1606,7 @@
       StackHandleScope<1> hs(self);
       Handle<mirror::ArtField> f(hs.NewHandle(fields->Get(i)));
 
-      FieldHelper fh(f.Get());
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = f->GetTypeAsPrimitiveType();
       switch (type) {
         case Primitive::Type::kPrimInt:
           if (test_type == type) {
@@ -1620,7 +1622,7 @@
 
         case Primitive::Type::kPrimNot:
           // Don't try array.
-          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
             GetSetObjInstance(&obj, &f, self, m.Get(), test);
           }
           break;
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 0d9d388..1a60557 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -261,7 +261,7 @@
     // Helper signature is always
     // (method_idx, *this_object, *caller_method, *self, sp)
 
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
+    movl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %edx  // pass caller Method*
     movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
     movq %rsp, %r8                                         // pass SP
 
@@ -897,7 +897,7 @@
 
 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rsi                 // pass referrer
+    movl 8(%rsp), %esi                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0 is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
@@ -910,7 +910,7 @@
 
 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rdx                 // pass referrer
+    movl 8(%rsp), %edx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0 and arg1 are in rdi/rsi
     movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
@@ -923,7 +923,7 @@
 
 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
-    movq 8(%rsp), %rcx                 // pass referrer
+    movl 8(%rsp), %ecx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
@@ -953,7 +953,7 @@
 // This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
     movq %rsi, %rdx                    // pass new_val
-    movq 8(%rsp), %rsi                 // pass referrer
+    movl 8(%rsp), %esi                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
                                        // field_idx is in rdi
     movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
@@ -1008,7 +1008,7 @@
      * TODO: With proper hard-float support, this needs to be kept in sync with the quick compiler.
      */
 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
-    movq 16(%rsp), %rdi            // load caller Method*
+    movl 8(%rsp), %edi            // load caller Method*
     movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
     movd %xmm0, %rax               // get target method index stored in xmm0
     movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 9262db6..ed83a33 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -35,161 +35,14 @@
 
 class Mutex;
 
-#if ART_HAVE_STDATOMIC
-template<typename T>
-class Atomic : public std::atomic<T> {
- public:
-  COMPILE_ASSERT(sizeof(T) == sizeof(std::atomic<T>),
-                 std_atomic_size_differs_from_that_of_underlying_type);
-  COMPILE_ASSERT(alignof(T) == alignof(std::atomic<T>),
-                 std_atomic_alignment_differs_from_that_of_underlying_type);
-
-  Atomic<T>() : std::atomic<T>() { }
-
-  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
-
-  // Load from memory without ordering or synchronization constraints.
-  T LoadRelaxed() const {
-    return this->load(std::memory_order_relaxed);
-  }
-
-  // Load from memory with a total ordering.
-  T LoadSequentiallyConsistent() const {
-    return this->load(std::memory_order_seq_cst);
-  }
-
-  // Store to memory without ordering or synchronization constraints.
-  void StoreRelaxed(T desired) {
-    this->store(desired, std::memory_order_relaxed);
-  }
-
-  // Store to memory with a total ordering.
-  void StoreSequentiallyConsistent(T desired) {
-    this->store(desired, std::memory_order_seq_cst);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Doesn't
-  // imply ordering or synchronization constraints.
-  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Prior writes
-  // made to other memory locations by the thread that did the release become visible in this
-  // thread.
-  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. prior writes
-  // to other memory locations become visible to the threads that do a consume or an acquire on the
-  // same location.
-  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
-    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
-  }
-
-  T FetchAndAddSequentiallyConsistent(const T value) {
-    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndSubSequentiallyConsistent(const T value) {
-    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
-  }
-
-  volatile T* Address() {
-    return reinterpret_cast<T*>(this);
-  }
-
-  static T MaxValue() {
-    return std::numeric_limits<T>::max();
-  }
-};
-#else
-template<typename T>
-class Atomic {
- public:
-  Atomic<T>() : value_(0) { }
-
-  explicit Atomic<T>(T value) : value_(value) { }
-
-  // Load from memory without ordering or synchronization constraints.
-  T LoadRelaxed() const {
-    return value_;
-  }
-
-  // Load from memory with a total ordering.
-  T LoadSequentiallyConsistent() const;
-
-  // Store to memory without ordering or synchronization constraints.
-  void StoreRelaxed(T desired) {
-    value_ = desired;
-  }
-
-  // Store to memory with a total ordering.
-  void StoreSequentiallyConsistent(T desired);
-
-  // Atomically replace the value with desired value if it matches the expected value. Doesn't
-  // imply ordering or synchronization constraints.
-  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
-    // TODO: make this relaxed.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. Prior writes
-  // made to other memory locations by the thread that did the release become visible in this
-  // thread.
-  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
-    // TODO: make this acquire.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  // Atomically replace the value with desired value if it matches the expected value. prior writes
-  // to other memory locations become visible to the threads that do a consume or an acquire on the
-  // same location.
-  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
-    // TODO: make this release.
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile T* Address() {
-    return &value_;
-  }
-
-  T FetchAndAddSequentiallyConsistent(const T value) {
-    return __sync_fetch_and_add(&value_, value);  // Return old_value.
-  }
-
-  T FetchAndSubSequentiallyConsistent(const T value) {
-    return __sync_fetch_and_sub(&value_, value);  // Return old value.
-  }
-
-  T operator++() {  // Prefix operator.
-    return __sync_add_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  T operator++(int) {  // Postfix operator.
-    return __sync_fetch_and_add(&value_, 1);  // Return old value.
-  }
-
-  T operator--() {  // Prefix operator.
-    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  T operator--(int) {  // Postfix operator.
-    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
-  }
-
-  static T MaxValue() {
-    return std::numeric_limits<T>::max();
-  }
-
- private:
-  T value_;
-};
-#endif
-
-typedef Atomic<int32_t> AtomicInteger;
-
+// QuasiAtomic encapsulates two separate facilities that we are
+// trying to move away from:  "quasiatomic" 64 bit operations
+// and custom memory fences.  For the time being, they remain
+// exposed.  Clients should be converted to use either class Atomic
+// below whenever possible, and should eventually use C++11 atomics.
+// The two facilities that do not have a good C++11 analog are
+// ThreadFenceForConstructor and Atomic::*JavaData.
+//
 // NOTE: Two "quasiatomic" operations on the exact same memory address
 // are guaranteed to operate atomically with respect to each other,
 // but no guarantees are made about quasiatomic operations mixed with
@@ -286,6 +139,11 @@
 
   // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
   // and return true. Otherwise, don't swap, and return false.
+  // This is fully ordered, i.e. it has C++11 memory_order_seq_cst
+  // semantics (assuming all other accesses use a mutex if this one does).
+  // This has "strong" semantics; if it fails then it is guaranteed that
+  // at some point during the execution of Cas64, *addr was not equal to
+  // old_value.
   static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
     if (!kNeedSwapMutexes) {
       return __sync_bool_compare_and_swap(addr, old_value, new_value);
@@ -299,9 +157,37 @@
     return kNeedSwapMutexes;
   }
 
-  static void MembarLoadStore() {
+  #if ART_HAVE_STDATOMIC
+
+  static void ThreadFenceAcquire() {
+    std::atomic_thread_fence(std::memory_order_acquire);
+  }
+
+  static void ThreadFenceRelease() {
+    std::atomic_thread_fence(std::memory_order_release);
+  }
+
+  static void ThreadFenceForConstructor() {
+    #if defined(__aarch64__)
+      __asm__ __volatile__("dmb ishst" : : : "memory");
+    #else
+      std::atomic_thread_fence(std::memory_order_release);
+    #endif
+  }
+
+  static void ThreadFenceSequentiallyConsistent() {
+    std::atomic_thread_fence(std::memory_order_seq_cst);
+  }
+
+  #else
+
+  static void ThreadFenceAcquire() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
+    // Could possibly use dmb ishld on aarch64
+    // But currently we also use this on volatile loads
+    // to enforce store atomicity.  Ishld is
+    // insufficient for that purpose.
   #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
@@ -311,9 +197,10 @@
   #endif
   }
 
-  static void MembarLoadLoad() {
+  static void ThreadFenceRelease() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
+    // ishst doesn't order load followed by store.
   #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
@@ -323,7 +210,11 @@
   #endif
   }
 
-  static void MembarStoreStore() {
+  // Fence at the end of a constructor with final fields
+  // or allocation.  We believe this
+  // only has to order stores, and can thus be weaker than
+  // release on aarch64.
+  static void ThreadFenceForConstructor() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ishst" : : : "memory");
   #elif defined(__i386__) || defined(__x86_64__)
@@ -335,7 +226,7 @@
   #endif
   }
 
-  static void MembarStoreLoad() {
+  static void ThreadFenceSequentiallyConsistent() {
   #if defined(__arm__) || defined(__aarch64__)
     __asm__ __volatile__("dmb ish" : : : "memory");
   #elif defined(__i386__) || defined(__x86_64__)
@@ -346,6 +237,7 @@
   #error Unexpected architecture
   #endif
   }
+  #endif
 
  private:
   static Mutex* GetSwapMutex(const volatile int64_t* addr);
@@ -360,19 +252,352 @@
   DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
 };
 
+#if ART_HAVE_STDATOMIC
+template<typename T>
+class Atomic : public std::atomic<T> {
+ public:
+  Atomic<T>() : std::atomic<T>() { }
+
+  explicit Atomic<T>(T value) : std::atomic<T>(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Word tearing allowed, but may race.
+  // TODO: Optimize?
+  // There has been some discussion of eventually disallowing word
+  // tearing for Java data loads.
+  T LoadJavaData() const {
+    return this->load(std::memory_order_relaxed);
+  }
+
+  // Load from memory with a total ordering.
+  // Corresponds exactly to a Java volatile load.
+  T LoadSequentiallyConsistent() const {
+    return this->load(std::memory_order_seq_cst);
+  }
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Word tearing allowed, but may race.
+  void StoreJavaData(T desired) {
+    this->store(desired, std::memory_order_relaxed);
+  }
+
+  // Store to memory with release ordering.
+  void StoreRelease(T desired) {
+    this->store(desired, std::memory_order_release);
+  }
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired) {
+    this->store(desired, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value.
+  // Participates in total ordering of atomic operations.
+  bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_seq_cst);
+  }
+
+  // The same, except it may fail spuriously.
+  bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_seq_cst);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // The same, except it may fail spuriously.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_acquire);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
+  }
+
+  volatile T* Address() {
+    return reinterpret_cast<T*>(this);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+};
+
+#else
+
+template<typename T> class Atomic;
+
+// Helper class for Atomic to deal separately with size 8 and small
+// objects.  Should not be used directly.
+
+template<int SZ, class T> struct AtomicHelper {
+  friend class Atomic<T>;
+
+ private:
+  COMPILE_ASSERT(sizeof(T) <= 4, bad_atomic_helper_arg);
+
+  static T LoadRelaxed(const volatile T* loc) {
+    // sizeof(T) <= 4
+    return *loc;
+  }
+
+  static void StoreRelaxed(volatile T* loc, T desired) {
+    // sizeof(T) <= 4
+    *loc = desired;
+  }
+
+  static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
+                                                  T expected_value, T desired_value) {
+    // sizeof(T) <= 4
+    return __sync_bool_compare_and_swap(loc, expected_value, desired_value);
+  }
+};
+
+template<class T> struct AtomicHelper<8, T> {
+  friend class Atomic<T>;
+
+ private:
+  COMPILE_ASSERT(sizeof(T) == 8, bad_large_atomic_helper_arg);
+
+  static T LoadRelaxed(const volatile T* loc) {
+    // sizeof(T) == 8
+    volatile const int64_t* loc_ptr =
+              reinterpret_cast<volatile const int64_t*>(loc);
+    return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
+  }
+
+  static void StoreRelaxed(volatile T* loc, T desired) {
+    // sizeof(T) == 8
+    volatile int64_t* loc_ptr =
+                reinterpret_cast<volatile int64_t*>(loc);
+    QuasiAtomic::Write64(loc_ptr,
+                         static_cast<int64_t>(desired));
+  }
+
+
+  static bool CompareExchangeStrongSequentiallyConsistent(volatile T* loc,
+                                                  T expected_value, T desired_value) {
+    // sizeof(T) == 8
+    volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
+    return QuasiAtomic::Cas64(
+                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
+                 static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
+  }
+};
+
+template<typename T>
+class Atomic {
+ private:
+  COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
+
+ public:
+  Atomic<T>() : value_(0) { }
+
+  explicit Atomic<T>(T value) : value_(value) { }
+
+  // Load from memory without ordering or synchronization constraints.
+  T LoadRelaxed() const {
+    return AtomicHelper<sizeof(T), T>::LoadRelaxed(&value_);
+  }
+
+  // Word tearing allowed, but may race.
+  T LoadJavaData() const {
+    return value_;
+  }
+
+  // Load from memory with a total ordering.
+  T LoadSequentiallyConsistent() const;
+
+  // Store to memory without ordering or synchronization constraints.
+  void StoreRelaxed(T desired) {
+    AtomicHelper<sizeof(T), T>::StoreRelaxed(&value_, desired);
+  }
+
+  // Word tearing allowed, but may race.
+  void StoreJavaData(T desired) {
+    value_ = desired;
+  }
+
+  // Store to memory with release ordering.
+  void StoreRelease(T desired);
+
+  // Store to memory with a total ordering.
+  void StoreSequentiallyConsistent(T desired);
+
+  // Atomically replace the value with desired value if it matches the expected value.
+  // Participates in total ordering of atomic operations.
+  bool CompareExchangeStrongSequentiallyConsistent(T expected_value, T desired_value) {
+    return AtomicHelper<sizeof(T), T>::
+        CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
+  }
+
+  // The same, but may fail spuriously.
+  bool CompareExchangeWeakSequentiallyConsistent(T expected_value, T desired_value) {
+    // TODO: Take advantage of the fact that it may fail spuriously.
+    return AtomicHelper<sizeof(T), T>::
+        CompareExchangeStrongSequentiallyConsistent(&value_, expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Doesn't
+  // imply ordering or synchronization constraints.
+  bool CompareExchangeStrongRelaxed(T expected_value, T desired_value) {
+    // TODO: make this relaxed.
+    return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // The same, but may fail spuriously.
+  bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
+    // TODO: Take advantage of the fact that it may fail spuriously.
+    // TODO: make this relaxed.
+    return CompareExchangeStrongSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior accesses
+  // made to other memory locations by the thread that did the release become visible in this
+  // thread.
+  bool CompareExchangeWeakAcquire(T expected_value, T desired_value) {
+    // TODO: make this acquire.
+    return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  // Atomically replace the value with desired value if it matches the expected value. Prior accesses
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeWeakRelease(T expected_value, T desired_value) {
+    // TODO: make this release.
+    return CompareExchangeWeakSequentiallyConsistent(expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+  T FetchAndAddSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_add(&value_, value);  // Return old value.
+    } else {
+      T expected;
+      do {
+        expected = LoadRelaxed();
+      } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected + value));
+      return expected;
+    }
+  }
+
+  T FetchAndSubSequentiallyConsistent(const T value) {
+    if (sizeof(T) <= 4) {
+      return __sync_fetch_and_sub(&value_, value);  // Return old value.
+    } else {
+      return FetchAndAddSequentiallyConsistent(-value);
+    }
+  }
+
+  T operator++() {  // Prefix operator.
+    if (sizeof(T) <= 4) {
+      return __sync_add_and_fetch(&value_, 1);  // Return new value.
+    } else {
+      return FetchAndAddSequentiallyConsistent(1) + 1;
+    }
+  }
+
+  T operator++(int) {  // Postfix operator.
+    return FetchAndAddSequentiallyConsistent(1);
+  }
+
+  T operator--() {  // Prefix operator.
+    if (sizeof(T) <= 4) {
+      return __sync_sub_and_fetch(&value_, 1);  // Return new value.
+    } else {
+      return FetchAndSubSequentiallyConsistent(1) - 1;
+    }
+  }
+
+  T operator--(int) {  // Postfix operator.
+    return FetchAndSubSequentiallyConsistent(1);
+  }
+
+  static T MaxValue() {
+    return std::numeric_limits<T>::max();
+  }
+
+
+ private:
+  volatile T value_;
+};
+#endif
+
+typedef Atomic<int32_t> AtomicInteger;
+
+COMPILE_ASSERT(sizeof(AtomicInteger) == sizeof(int32_t), weird_atomic_int_size);
+COMPILE_ASSERT(alignof(AtomicInteger) == alignof(int32_t),
+               atomic_int_alignment_differs_from_that_of_underlying_type);
+COMPILE_ASSERT(sizeof(Atomic<int64_t>) == sizeof(int64_t), weird_atomic_int64_size);
+#if defined(__LP64__)
+  COMPILE_ASSERT(alignof(Atomic<int64_t>) == alignof(int64_t),
+                 atomic_int64_alignment_differs_from_that_of_underlying_type);
+#endif
+// The above fails on x86-32.
+// This is OK, since we explicitly arrange for alignment of 8-byte fields.
+
+
 #if !ART_HAVE_STDATOMIC
 template<typename T>
 inline T Atomic<T>::LoadSequentiallyConsistent() const {
   T result = value_;
-  QuasiAtomic::MembarLoadLoad();
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceAcquire();
+    // We optimistically assume this suffices for store atomicity.
+    // On ARMv8 we strengthen ThreadFenceAcquire to make that true.
+  }
   return result;
 }
 
 template<typename T>
+inline void Atomic<T>::StoreRelease(T desired) {
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceRelease();
+  }
+  StoreRelaxed(desired);
+}
+
+template<typename T>
 inline void Atomic<T>::StoreSequentiallyConsistent(T desired) {
-  QuasiAtomic::MembarStoreStore();
-  value_ = desired;
-  QuasiAtomic::MembarStoreLoad();
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceRelease();
+  }
+  StoreRelaxed(desired);
+  if (sizeof(T) != 8 || !QuasiAtomic::LongAtomicsUseMutexes()) {
+    QuasiAtomic::ThreadFenceSequentiallyConsistent();
+  }
 }
 
 #endif
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index a9472f7..d20eb17 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -25,6 +25,8 @@
 
 #include "cutils/atomic-inline.h"
 #include "cutils/trace.h"
+
+#include "base/stringprintf.h"
 #include "runtime.h"
 #include "thread.h"
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 11698e2..aeece74 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -331,7 +331,10 @@
         num_contenders_--;
       }
     } while (!done);
-    QuasiAtomic::MembarStoreLoad();
+    // We assert that no memory fence is needed here, since
+    // __sync_bool_compare_and_swap includes it.
+    // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
+    // that exposes the ordering semantics.
     DCHECK_EQ(state_, 1);
     exclusive_owner_ = SafeGetTid(self);
 #else
@@ -364,7 +367,7 @@
         return false;
       }
     } while (!done);
-    QuasiAtomic::MembarStoreLoad();
+    // We again assert no memory fence is needed.
     DCHECK_EQ(state_, 1);
     exclusive_owner_ = SafeGetTid(self);
 #else
@@ -403,7 +406,7 @@
   do {
     int32_t cur_state = state_;
     if (LIKELY(cur_state == 1)) {
-      QuasiAtomic::MembarStoreStore();
+      // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
       // We're no longer the owner.
       exclusive_owner_ = 0;
       // Change state to 0.
@@ -426,7 +429,6 @@
       }
     }
   } while (!done);
-  QuasiAtomic::MembarStoreLoad();
 #else
     CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
 #endif
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index cfd0c00..46c4389 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -195,8 +195,9 @@
    */
   void CheckFieldType(jvalue value, jfieldID fid, char prim, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    mirror::ArtField* f = CheckFieldID(fid);
-    if (f == nullptr) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::ArtField> f(hs.NewHandle(CheckFieldID(fid)));
+    if (f.Get() == nullptr) {
       return;
     }
     mirror::Class* field_type = FieldHelper(f).GetType();
@@ -215,22 +216,24 @@
         } else {
           if (!obj->InstanceOf(field_type)) {
             JniAbortF(function_name_, "attempt to set field %s with value of wrong type: %s",
-                      PrettyField(f).c_str(), PrettyTypeOf(obj).c_str());
+                      PrettyField(f.Get()).c_str(), PrettyTypeOf(obj).c_str());
             return;
           }
         }
       }
     } else if (field_type != Runtime::Current()->GetClassLinker()->FindPrimitiveClass(prim)) {
       JniAbortF(function_name_, "attempt to set field %s with value of wrong type: %c",
-                PrettyField(f).c_str(), prim);
+                PrettyField(f.Get()).c_str(), prim);
       return;
     }
 
-    if (isStatic != f->IsStatic()) {
+    if (isStatic != f.Get()->IsStatic()) {
       if (isStatic) {
-        JniAbortF(function_name_, "accessing non-static field %s as static", PrettyField(f).c_str());
+        JniAbortF(function_name_, "accessing non-static field %s as static",
+                  PrettyField(f.Get()).c_str());
       } else {
-        JniAbortF(function_name_, "accessing static field %s as non-static", PrettyField(f).c_str());
+        JniAbortF(function_name_, "accessing static field %s as non-static",
+                  PrettyField(f.Get()).c_str());
       }
       return;
     }
@@ -256,8 +259,7 @@
       return;
     }
     mirror::Class* c = o->GetClass();
-    FieldHelper fh(f);
-    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == nullptr) {
+    if (c->FindInstanceField(f->GetName(), f->GetTypeDescriptor()) == nullptr) {
       JniAbortF(function_name_, "jfieldID %s not valid for an object of class %s",
                 PrettyField(f).c_str(), PrettyTypeOf(o).c_str());
     }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b9c42ee..330b110 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -502,29 +502,24 @@
       FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
 
   mirror::ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
-  FieldHelper fh(pendingNext);
-  CHECK_STREQ(fh.GetName(), "pendingNext");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
+  CHECK_STREQ(pendingNext->GetName(), "pendingNext");
+  CHECK_STREQ(pendingNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
   mirror::ArtField* queue = java_lang_ref_Reference->GetInstanceField(1);
-  fh.ChangeField(queue);
-  CHECK_STREQ(fh.GetName(), "queue");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/ReferenceQueue;");
+  CHECK_STREQ(queue->GetName(), "queue");
+  CHECK_STREQ(queue->GetTypeDescriptor(), "Ljava/lang/ref/ReferenceQueue;");
 
   mirror::ArtField* queueNext = java_lang_ref_Reference->GetInstanceField(2);
-  fh.ChangeField(queueNext);
-  CHECK_STREQ(fh.GetName(), "queueNext");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
+  CHECK_STREQ(queueNext->GetName(), "queueNext");
+  CHECK_STREQ(queueNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
   mirror::ArtField* referent = java_lang_ref_Reference->GetInstanceField(3);
-  fh.ChangeField(referent);
-  CHECK_STREQ(fh.GetName(), "referent");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/Object;");
+  CHECK_STREQ(referent->GetName(), "referent");
+  CHECK_STREQ(referent->GetTypeDescriptor(), "Ljava/lang/Object;");
 
   mirror::ArtField* zombie = java_lang_ref_FinalizerReference->GetInstanceField(2);
-  fh.ChangeField(zombie);
-  CHECK_STREQ(fh.GetName(), "zombie");
-  CHECK_STREQ(fh.GetTypeDescriptor(), "Ljava/lang/Object;");
+  CHECK_STREQ(zombie->GetName(), "zombie");
+  CHECK_STREQ(zombie->GetTypeDescriptor(), "Ljava/lang/Object;");
 
   // ensure all class_roots_ are initialized
   for (size_t i = 0; i < kClassRootsMax; i++) {
@@ -3896,10 +3891,8 @@
   bool operator()(mirror::ArtField* field1, mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
-    FieldHelper fh1(field1);
-    Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
-    FieldHelper fh2(field2);
-    Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
+    Primitive::Type type1 = field1->GetTypeAsPrimitiveType();
+    Primitive::Type type2 = field2->GetTypeAsPrimitiveType();
     if (type1 != type2) {
       bool is_primitive1 = type1 != Primitive::kPrimNot;
       bool is_primitive2 = type2 != Primitive::kPrimNot;
@@ -3914,9 +3907,7 @@
       }
     }
     // same basic group? then sort by string.
-    const char* name1 = fh1.GetName();
-    const char* name2 = fh2.GetName();
-    return strcmp(name1, name2) < 0;
+    return strcmp(field1->GetName(), field2->GetName()) < 0;
   }
 };
 
@@ -3961,8 +3952,7 @@
   size_t num_reference_fields = 0;
   for (; current_field < num_fields; current_field++) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
-    FieldHelper fh(field);
-    Primitive::Type type = fh.GetTypeAsPrimitiveType();
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
     bool isPrimitive = type != Primitive::kPrimNot;
     if (isPrimitive) {
       break;  // past last reference, move on to the next phase
@@ -3980,8 +3970,7 @@
   if (current_field != num_fields && !IsAligned<8>(field_offset.Uint32Value())) {
     for (size_t i = 0; i < grouped_and_sorted_fields.size(); i++) {
       mirror::ArtField* field = grouped_and_sorted_fields[i];
-      FieldHelper fh(field);
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = field->GetTypeAsPrimitiveType();
       CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
       if (type == Primitive::kPrimLong || type == Primitive::kPrimDouble) {
         continue;
@@ -4003,8 +3992,7 @@
   while (!grouped_and_sorted_fields.empty()) {
     mirror::ArtField* field = grouped_and_sorted_fields.front();
     grouped_and_sorted_fields.pop_front();
-    FieldHelper fh(field);
-    Primitive::Type type = fh.GetTypeAsPrimitiveType();
+    Primitive::Type type = field->GetTypeAsPrimitiveType();
     CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
     fields->Set<false>(current_field, field);
     field->SetOffset(field_offset);
@@ -4020,8 +4008,7 @@
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
-    FieldHelper fh(fields->Get(num_fields - 1));
-    CHECK_STREQ(fh.GetName(), "referent") << PrettyClass(klass.Get());
+    CHECK_STREQ(fields->Get(num_fields - 1)->GetName(), "referent") << PrettyClass(klass.Get());
     --num_reference_fields;
   }
 
@@ -4038,11 +4025,10 @@
                     << " offset="
                     << field->GetField32(MemberOffset(mirror::ArtField::OffsetOffset()));
       }
-      FieldHelper fh(field);
-      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      Primitive::Type type = field->GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
       if (klass->DescriptorEquals("Ljava/lang/ref/Reference;") &&
-          strcmp("referent", fh.GetName()) == 0) {
+          strcmp("referent", field->GetName()) == 0) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
       if (is_primitive) {
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index e397a5c..45ab33a 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -171,11 +171,12 @@
 
   void AssertField(mirror::Class* klass, mirror::ArtField* field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    FieldHelper fh(field);
     EXPECT_TRUE(field != NULL);
     EXPECT_TRUE(field->GetClass() != NULL);
     EXPECT_EQ(klass, field->GetDeclaringClass());
-    EXPECT_TRUE(fh.GetName() != NULL);
+    EXPECT_TRUE(field->GetName() != NULL);
+    StackHandleScope<1> hs(Thread::Current());
+    FieldHelper fh(hs.NewHandle(field));
     EXPECT_TRUE(fh.GetType() != NULL);
   }
 
@@ -269,11 +270,12 @@
 
     // Confirm that all instances fields are packed together at the start
     EXPECT_GE(klass->NumInstanceFields(), klass->NumReferenceInstanceFields());
-    FieldHelper fh;
+    StackHandleScope<1> hs(Thread::Current());
+    FieldHelper fh(hs.NewHandle<mirror::ArtField>(nullptr));
     for (size_t i = 0; i < klass->NumReferenceInstanceFields(); i++) {
       mirror::ArtField* field = klass->GetInstanceField(i);
       fh.ChangeField(field);
-      ASSERT_TRUE(!fh.IsPrimitiveType());
+      ASSERT_TRUE(!field->IsPrimitiveType());
       mirror::Class* field_type = fh.GetType();
       ASSERT_TRUE(field_type != NULL);
       ASSERT_TRUE(!field_type->IsPrimitive());
@@ -283,10 +285,10 @@
       fh.ChangeField(field);
       mirror::Class* field_type = fh.GetType();
       ASSERT_TRUE(field_type != NULL);
-      if (!fh.IsPrimitiveType() || !field_type->IsPrimitive()) {
+      if (!fh.GetField()->IsPrimitiveType() || !field_type->IsPrimitive()) {
         // While Reference.referent is not primitive, the ClassLinker
         // treats it as such so that the garbage collector won't scan it.
-        EXPECT_EQ(PrettyField(field), "java.lang.Object java.lang.ref.Reference.referent");
+        EXPECT_EQ(PrettyField(fh.GetField()), "java.lang.Object java.lang.ref.Reference.referent");
       }
     }
 
@@ -390,11 +392,9 @@
       error = true;
     }
 
-    FieldHelper fh;
     for (size_t i = 0; i < offsets.size(); i++) {
       mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
-      fh.ChangeField(field);
-      StringPiece field_name(fh.GetName());
+      StringPiece field_name(field->GetName());
       if (field_name != offsets[i].java_name) {
         error = true;
       }
@@ -403,8 +403,7 @@
       for (size_t i = 0; i < offsets.size(); i++) {
         CheckOffset& offset = offsets[i];
         mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
-        fh.ChangeField(field);
-        StringPiece field_name(fh.GetName());
+        StringPiece field_name(field->GetName());
         if (field_name != offsets[i].java_name) {
           LOG(ERROR) << "JAVA FIELD ORDER MISMATCH NEXT LINE:";
         }
@@ -731,15 +730,11 @@
   } else {
     EXPECT_EQ(4U, JavaLangObject->NumInstanceFields());
   }
-  FieldHelper fh(JavaLangObject->GetInstanceField(0));
-  EXPECT_STREQ(fh.GetName(), "shadow$_klass_");
-  fh.ChangeField(JavaLangObject->GetInstanceField(1));
-  EXPECT_STREQ(fh.GetName(), "shadow$_monitor_");
+  EXPECT_STREQ(JavaLangObject->GetInstanceField(0)->GetName(), "shadow$_klass_");
+  EXPECT_STREQ(JavaLangObject->GetInstanceField(1)->GetName(), "shadow$_monitor_");
   if (kUseBakerOrBrooksReadBarrier) {
-    fh.ChangeField(JavaLangObject->GetInstanceField(2));
-    EXPECT_STREQ(fh.GetName(), "shadow$_x_rb_ptr_");
-    fh.ChangeField(JavaLangObject->GetInstanceField(3));
-    EXPECT_STREQ(fh.GetName(), "shadow$_x_xpadding_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(2)->GetName(), "shadow$_x_rb_ptr_");
+    EXPECT_STREQ(JavaLangObject->GetInstanceField(3)->GetName(), "shadow$_x_xpadding_");
   }
 
   EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
@@ -850,29 +845,21 @@
   NullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
-  FieldHelper fh(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Byte;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Character;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Double;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Float;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Integer;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Long;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Short;", class_loader);
-  fh.ChangeField(c->GetIFields()->Get(0));
-  EXPECT_STREQ("value", fh.GetName());
+  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
 }
 
 TEST_F(ClassLinkerTest, TwoClassLoadersOneClass) {
@@ -907,58 +894,49 @@
   EXPECT_EQ(9U, statics->NumStaticFields());
 
   mirror::ArtField* s0 = mirror::Class::FindStaticField(soa.Self(), statics, "s0", "Z");
-  FieldHelper fh(s0);
   EXPECT_STREQ(s0->GetClass()->GetDescriptor().c_str(), "Ljava/lang/reflect/ArtField;");
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimBoolean);
+  EXPECT_EQ(s0->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   EXPECT_EQ(true, s0->GetBoolean(statics.Get()));
   s0->SetBoolean<false>(statics.Get(), false);
 
   mirror::ArtField* s1 = mirror::Class::FindStaticField(soa.Self(), statics, "s1", "B");
-  fh.ChangeField(s1);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimByte);
+  EXPECT_EQ(s1->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   EXPECT_EQ(5, s1->GetByte(statics.Get()));
   s1->SetByte<false>(statics.Get(), 6);
 
   mirror::ArtField* s2 = mirror::Class::FindStaticField(soa.Self(), statics, "s2", "C");
-  fh.ChangeField(s2);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimChar);
+  EXPECT_EQ(s2->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   EXPECT_EQ('a', s2->GetChar(statics.Get()));
   s2->SetChar<false>(statics.Get(), 'b');
 
   mirror::ArtField* s3 = mirror::Class::FindStaticField(soa.Self(), statics, "s3", "S");
-  fh.ChangeField(s3);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimShort);
+  EXPECT_EQ(s3->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   EXPECT_EQ(-536, s3->GetShort(statics.Get()));
   s3->SetShort<false>(statics.Get(), -535);
 
   mirror::ArtField* s4 = mirror::Class::FindStaticField(soa.Self(), statics, "s4", "I");
-  fh.ChangeField(s4);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimInt);
+  EXPECT_EQ(s4->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   EXPECT_EQ(2000000000, s4->GetInt(statics.Get()));
   s4->SetInt<false>(statics.Get(), 2000000001);
 
   mirror::ArtField* s5 = mirror::Class::FindStaticField(soa.Self(), statics, "s5", "J");
-  fh.ChangeField(s5);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimLong);
+  EXPECT_EQ(s5->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   EXPECT_EQ(0x1234567890abcdefLL, s5->GetLong(statics.Get()));
   s5->SetLong<false>(statics.Get(), INT64_C(0x34567890abcdef12));
 
   mirror::ArtField* s6 = mirror::Class::FindStaticField(soa.Self(), statics, "s6", "F");
-  fh.ChangeField(s6);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimFloat);
+  EXPECT_EQ(s6->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   EXPECT_EQ(0.5, s6->GetFloat(statics.Get()));
   s6->SetFloat<false>(statics.Get(), 0.75);
 
   mirror::ArtField* s7 = mirror::Class::FindStaticField(soa.Self(), statics, "s7", "D");
-  fh.ChangeField(s7);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimDouble);
+  EXPECT_EQ(s7->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   EXPECT_EQ(16777217, s7->GetDouble(statics.Get()));
   s7->SetDouble<false>(statics.Get(), 16777219);
 
   mirror::ArtField* s8 = mirror::Class::FindStaticField(soa.Self(), statics, "s8",
                                                         "Ljava/lang/String;");
-  fh.ChangeField(s8);
-  EXPECT_TRUE(fh.GetTypeAsPrimitiveType() == Primitive::kPrimNot);
+  EXPECT_EQ(s8->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   EXPECT_TRUE(s8->GetObject(statics.Get())->AsString()->Equals("android"));
   s8->SetObject<false>(s8->GetDeclaringClass(),
                        mirror::String::AllocFromModifiedUtf8(soa.Self(), "robot"));
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 07cb9d1..8e2340c 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1378,8 +1378,7 @@
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtField* f = FromFieldId(field_id);
-  return FieldHelper(f).GetName();
+  return FromFieldId(field_id)->GetName();
 }
 
 /*
@@ -1454,10 +1453,9 @@
 
   for (size_t i = 0; i < instance_field_count + static_field_count; ++i) {
     mirror::ArtField* f = (i < instance_field_count) ? c->GetInstanceField(i) : c->GetStaticField(i - instance_field_count);
-    FieldHelper fh(f);
     expandBufAddFieldId(pReply, ToFieldId(f));
-    expandBufAddUtf8String(pReply, fh.GetName());
-    expandBufAddUtf8String(pReply, fh.GetTypeDescriptor());
+    expandBufAddUtf8String(pReply, f->GetName());
+    expandBufAddUtf8String(pReply, f->GetTypeDescriptor());
     if (with_generic) {
       static const char genericSignature[1] = "";
       expandBufAddUtf8String(pReply, genericSignature);
@@ -1623,7 +1621,7 @@
 void Dbg::OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
                            JDWP::ExpandBuf* pReply) {
   mirror::ArtField* f = FromFieldId(field_id);
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
   OutputJValue(tag, field_value, pReply);
 }
 
@@ -1646,11 +1644,11 @@
 }
 
 JDWP::JdwpTag Dbg::GetFieldBasicTag(JDWP::FieldId field_id) {
-  return BasicTagFromDescriptor(FieldHelper(FromFieldId(field_id)).GetTypeDescriptor());
+  return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor());
 }
 
 JDWP::JdwpTag Dbg::GetStaticFieldBasicTag(JDWP::FieldId field_id) {
-  return BasicTagFromDescriptor(FieldHelper(FromFieldId(field_id)).GetTypeDescriptor());
+  return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor());
 }
 
 static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::ObjectId object_id,
@@ -1694,7 +1692,7 @@
     o = f->GetDeclaringClass();
   }
 
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
   JValue field_value;
   if (tag == JDWP::JT_VOID) {
     LOG(FATAL) << "Unknown tag: " << tag;
@@ -1743,7 +1741,7 @@
     o = f->GetDeclaringClass();
   }
 
-  JDWP::JdwpTag tag = BasicTagFromDescriptor(FieldHelper(f).GetTypeDescriptor());
+  JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
 
   if (IsPrimitiveTag(tag)) {
     if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) {
@@ -1761,7 +1759,14 @@
       return JDWP::ERR_INVALID_OBJECT;
     }
     if (v != NULL) {
-      mirror::Class* field_type = FieldHelper(f).GetType();
+      mirror::Class* field_type;
+      {
+        StackHandleScope<3> hs(Thread::Current());
+        HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
+        HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+        HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
+        field_type = FieldHelper(h_f).GetType();
+      }
       if (!field_type->IsAssignableFrom(v->GetClass())) {
         return JDWP::ERR_INVALID_OBJECT;
       }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 1d5032d..8270a2b 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -26,7 +26,6 @@
 #include "globals.h"
 #include "invoke_type.h"
 #include "jni.h"
-#include "mem_map.h"
 #include "modifiers.h"
 #include "safe_map.h"
 
@@ -41,6 +40,7 @@
   class DexCache;
 }  // namespace mirror
 class ClassLinker;
+class MemMap;
 class Signature;
 template<class T> class Handle;
 class StringPiece;
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 0494f22..0a71d62 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -20,6 +20,7 @@
 
 #include <iomanip>
 
+#include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "utils.h"
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 5d20096..0df8211 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -20,6 +20,7 @@
 #include <unistd.h>
 
 #include "base/logging.h"
+#include "base/stringprintf.h"
 #include "base/stl_util.h"
 #include "utils.h"
 #include "instruction_set.h"
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 58b4286..d0ae746 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -327,8 +327,8 @@
       ThrowIllegalAccessErrorFinalField(referrer, resolved_field);
       return nullptr;  // Failure.
     } else {
-      FieldHelper fh(resolved_field);
-      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive || fh.FieldSize() != expected_size)) {
+      if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+                   resolved_field->FieldSize() != expected_size)) {
         ThrowLocation throw_location = self->GetCurrentLocationForThrow();
         DCHECK(throw_location.GetMethod() == referrer);
         self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
@@ -553,9 +553,8 @@
     // Illegal access.
     return NULL;
   }
-  FieldHelper fh(resolved_field);
-  if (UNLIKELY(fh.IsPrimitiveType() != is_primitive ||
-               fh.FieldSize() != expected_size)) {
+  if (UNLIKELY(resolved_field->IsPrimitiveType() != is_primitive ||
+               resolved_field->FieldSize() != expected_size)) {
     return NULL;
   }
   return resolved_field;
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 844367d..3178cde 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -197,7 +197,7 @@
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
                                           sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
-    if (LIKELY(!FieldHelper(field).IsPrimitiveType())) {
+    if (LIKELY(!field->IsPrimitiveType())) {
       // Compiled code can't use transactional mode.
       field->SetObj<false>(field->GetDeclaringClass(), new_value);
       return 0;  // success
@@ -226,8 +226,12 @@
     return 0;  // success
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
-  field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
-                                                          sizeof(int32_t));
+  {
+    StackHandleScope<1> hs(self);
+    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+    field = FindFieldFromCode<InstancePrimitiveWrite, true>(field_idx, referrer, self,
+                                                            sizeof(int32_t));
+  }
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1d524cb..5374f22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -172,8 +172,12 @@
   // | Padding         |
   // | RDI/Method*     |  <- sp
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
-  static constexpr size_t kNumQuickGprArgs = 5;  // 3 arguments passed in GPRs.
-  static constexpr size_t kNumQuickFprArgs = 8;  // 0 arguments passed in FPRs.
+#ifdef TARGET_REX_SUPPORT
+  static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
+#else
+  static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs if r8..r15 not enabled.
+#endif
+  static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80;  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168;  // Offset of return address.
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 97d3c2f..026f5b9 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -29,7 +29,7 @@
 
 namespace mirror {
 class ArtMethod;
-}       // namespace mirror
+}  // namespace mirror
 
 class FaultHandler;
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 3cb8d94..c294bae 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -72,6 +72,12 @@
 }
 
 template<size_t kAlignment>
+std::string SpaceBitmap<kAlignment>::Dump() const {
+  return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
+                      reinterpret_cast<void*>(HeapLimit()));
+}
+
+template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::Clear() {
   if (bitmap_begin_ != NULL) {
     // This returns the memory to the system.  Successive page faults will return zeroed memory.
@@ -180,11 +186,10 @@
   if (fields != NULL) {
     for (int32_t i = 0; i < fields->GetLength(); i++) {
       mirror::ArtField* field = fields->Get(i);
-      FieldHelper fh(field);
-      if (!fh.IsPrimitiveType()) {
+      if (!field->IsPrimitiveType()) {
         mirror::Object* value = field->GetObj(obj);
         if (value != NULL) {
-          WalkFieldsInOrder(visited, callback, value,  arg);
+          WalkFieldsInOrder(visited, callback, value, arg);
         }
       }
     }
@@ -210,8 +215,7 @@
     if (fields != NULL) {
       for (int32_t i = 0; i < fields->GetLength(); i++) {
         mirror::ArtField* field = fields->Get(i);
-        FieldHelper fh(field);
-        if (!fh.IsPrimitiveType()) {
+        if (!field->IsPrimitiveType()) {
           mirror::Object* value = field->GetObj(NULL);
           if (value != NULL) {
             WalkFieldsInOrder(visited, callback, value, arg);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 50d15c6..0849171 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -26,7 +26,6 @@
 #include "base/mutex.h"
 #include "gc_allocator.h"
 #include "globals.h"
-#include "mem_map.h"
 #include "object_callbacks.h"
 
 namespace art {
@@ -34,6 +33,7 @@
 namespace mirror {
   class Object;
 }  // namespace mirror
+class MemMap;
 
 namespace gc {
 namespace accounting {
@@ -183,10 +183,7 @@
     name_ = name;
   }
 
-  std::string Dump() const {
-    return StringPrintf("%s: %p-%p", name_.c_str(), reinterpret_cast<void*>(HeapBegin()),
-                        reinterpret_cast<void*>(HeapLimit()));
-  }
+  std::string Dump() const;
 
   const void* GetObjectWordAddress(const mirror::Object* obj) const {
     uintptr_t addr = reinterpret_cast<uintptr_t>(obj);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 26f87ca..d7b673e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -150,6 +150,7 @@
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
+      last_gc_time_ns_(NanoTime()),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 91f1718..33339f8 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -919,8 +919,6 @@
 
       rec->AddU2(0);  // empty const pool
 
-      FieldHelper fh;
-
       // Static fields
       if (sFieldCount == 0) {
         rec->AddU2((uint16_t)0);
@@ -932,11 +930,10 @@
 
         for (size_t i = 0; i < sFieldCount; ++i) {
           mirror::ArtField* f = thisClass->GetStaticField(i);
-          fh.ChangeField(f);
 
           size_t size;
-          HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
-          rec->AddStringId(LookupStringId(fh.GetName()));
+          HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
+          rec->AddStringId(LookupStringId(f->GetName()));
           rec->AddU1(t);
           if (size == 1) {
             rec->AddU1(static_cast<uint8_t>(f->Get32(thisClass)));
@@ -957,9 +954,8 @@
       rec->AddU2((uint16_t)iFieldCount);
       for (int i = 0; i < iFieldCount; ++i) {
         mirror::ArtField* f = thisClass->GetInstanceField(i);
-        fh.ChangeField(f);
-        HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), NULL);
-        rec->AddStringId(LookupStringId(fh.GetName()));
+        HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), NULL);
+        rec->AddStringId(LookupStringId(f->GetName()));
         rec->AddU1(t);
       }
     } else if (c->IsArrayClass()) {
@@ -1015,14 +1011,12 @@
       // Write the instance data;  fields for this class, followed by super class fields,
       // and so on. Don't write the klass or monitor fields of Object.class.
       mirror::Class* sclass = c;
-      FieldHelper fh;
       while (!sclass->IsObjectClass()) {
         int ifieldCount = sclass->NumInstanceFields();
         for (int i = 0; i < ifieldCount; ++i) {
           mirror::ArtField* f = sclass->GetInstanceField(i);
-          fh.ChangeField(f);
           size_t size;
-          SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
+          SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
           if (size == 1) {
             rec->AddU1(f->Get32(obj));
           } else if (size == 2) {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 5630862..2dd2cd7 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,15 +17,15 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
+#include <stdint.h>
+#include <set>
+#include <list>
+
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "object_callbacks.h"
 
-#include <stdint.h>
-#include <set>
-#include <list>
-
 namespace art {
 namespace mirror {
   class ArtField;
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 19b85e4..a66bd94 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -334,12 +334,10 @@
     Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
     String* name = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
     ArtField* found = NULL;
-    FieldHelper fh;
     ObjectArray<ArtField>* fields = klass->GetIFields();
     for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
       ArtField* f = fields->Get(i);
-      fh.ChangeField(f);
-      if (name->Equals(fh.GetName())) {
+      if (name->Equals(f->GetName())) {
         found = f;
       }
     }
@@ -347,8 +345,7 @@
       fields = klass->GetSFields();
       for (int32_t i = 0; i < fields->GetLength() && found == NULL; ++i) {
         ArtField* f = fields->Get(i);
-        fh.ChangeField(f);
-        if (name->Equals(fh.GetName())) {
+        if (name->Equals(f->GetName())) {
           found = f;
         }
       }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 029af8d..6e136d6 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -363,9 +363,15 @@
       if (do_assignability_check && reg != nullptr) {
         // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
         // object in the destructor.
-        StackHandleScope<1> hs(self);
-        HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(&obj));
-        Class* field_class = FieldHelper(f).GetType();
+        Class* field_class;
+        {
+          StackHandleScope<3> hs(self);
+          HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+          HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
+          HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
+          FieldHelper fh(h_f);
+          field_class = fh.GetType();
+        }
         if (!reg->VerifierInstanceOf(field_class)) {
           // This should never happen.
           self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 99153c8..623d9c3 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -247,7 +247,7 @@
       // If access checks are required then the dex-to-dex compiler and analysis of
       // whether the class has final fields hasn't been performed. Conservatively
       // perform the memory barrier now.
-      QuasiAtomic::MembarStoreLoad();
+      QuasiAtomic::ThreadFenceForConstructor();
     }
     if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
@@ -266,7 +266,7 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(RETURN_VOID_BARRIER) {
-    QuasiAtomic::MembarStoreLoad();
+    QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     if (UNLIKELY(self->TestAllFlags())) {
       CheckSuspend(self);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 3c7880c..d592a53 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -175,7 +175,7 @@
           // If access checks are required then the dex-to-dex compiler and analysis of
           // whether the class has final fields hasn't been performed. Conservatively
           // perform the memory barrier now.
-          QuasiAtomic::MembarStoreLoad();
+          QuasiAtomic::ThreadFenceForConstructor();
         }
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
@@ -191,7 +191,7 @@
         return result;
       }
       case Instruction::RETURN_VOID_BARRIER: {
-        QuasiAtomic::MembarStoreLoad();
+        QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         if (UNLIKELY(self->TestAllFlags())) {
           CheckSuspend(self);
diff --git a/runtime/jni_internal-inl.h b/runtime/jni_internal-inl.h
new file mode 100644
index 0000000..6cf9a61
--- /dev/null
+++ b/runtime/jni_internal-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JNI_INTERNAL_INL_H_
+#define ART_RUNTIME_JNI_INTERNAL_INL_H_
+
+#include "jni_internal.h"
+
+#include "utils.h"
+
+namespace art {
+
+template<typename T>
+inline T JNIEnvExt::AddLocalReference(mirror::Object* obj) {
+  IndirectRef ref = locals.Add(local_ref_cookie, obj);
+
+  // TODO: fix this to understand PushLocalFrame, so we can turn it on.
+  if (false) {
+    if (check_jni) {
+      size_t entry_count = locals.Capacity();
+      if (entry_count > 16) {
+        locals.Dump(LOG(WARNING) << "Warning: more than 16 JNI local references: "
+            << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n");
+        // TODO: LOG(FATAL) in a later release?
+      }
+    }
+  }
+
+  return reinterpret_cast<T>(ref);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JNI_INTERNAL_INL_H_
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 4072da4..abb71b7 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -24,7 +24,6 @@
 #include "indirect_reference_table.h"
 #include "object_callbacks.h"
 #include "reference_table.h"
-#include "runtime.h"
 
 #include <iosfwd>
 #include <string>
@@ -45,6 +44,7 @@
 union JValue;
 class Libraries;
 class ParsedOptions;
+class Runtime;
 class ScopedObjectAccess;
 template<class T> class Handle;
 class Thread;
@@ -216,25 +216,6 @@
   DISALLOW_COPY_AND_ASSIGN(ScopedJniEnvLocalRefState);
 };
 
-template<typename T>
-inline T JNIEnvExt::AddLocalReference(mirror::Object* obj) {
-  IndirectRef ref = locals.Add(local_ref_cookie, obj);
-
-  // TODO: fix this to understand PushLocalFrame, so we can turn it on.
-  if (false) {
-    if (check_jni) {
-      size_t entry_count = locals.Capacity();
-      if (entry_count > 16) {
-        locals.Dump(LOG(WARNING) << "Warning: more than 16 JNI local references: "
-            << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n");
-        // TODO: LOG(FATAL) in a later release?
-      }
-    }
-  }
-
-  return reinterpret_cast<T>(ref);
-}
-
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs);
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index ad24d0a..686fded 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -116,60 +116,52 @@
 }
 
 inline bool ArtField::GetBoolean(Object* object) {
-  DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimBoolean, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetBoolean(Object* object, bool z) {
-  DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimBoolean, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, z);
 }
 
 inline int8_t ArtField::GetByte(Object* object) {
-  DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimByte, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetByte(Object* object, int8_t b) {
-  DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimByte, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, b);
 }
 
 inline uint16_t ArtField::GetChar(Object* object) {
-  DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
-      << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimChar, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetChar(Object* object, uint16_t c) {
-  DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimChar, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, c);
 }
 
 inline int16_t ArtField::GetShort(Object* object) {
-  DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimShort, GetTypeAsPrimitiveType()) << PrettyField(this);
   return Get32(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetShort(Object* object, int16_t s) {
-  DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimShort, GetTypeAsPrimitiveType()) << PrettyField(this);
   Set32<kTransactionActive>(object, s);
 }
 
 inline int32_t ArtField::GetInt(Object* object) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
   }
   return Get32(object);
@@ -178,7 +170,7 @@
 template<bool kTransactionActive>
 inline void ArtField::SetInt(Object* object, int32_t i) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
   }
   Set32<kTransactionActive>(object, i);
@@ -186,7 +178,7 @@
 
 inline int64_t ArtField::GetLong(Object* object) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
   }
   return Get64(object);
@@ -195,15 +187,14 @@
 template<bool kTransactionActive>
 inline void ArtField::SetLong(Object* object, int64_t j) {
   if (kIsDebugBuild) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
   }
   Set64<kTransactionActive>(object, j);
 }
 
 inline float ArtField::GetFloat(Object* object) {
-  DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetI(Get32(object));
   return bits.GetF();
@@ -211,16 +202,14 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetFloat(Object* object, float f) {
-  DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetF(f);
   Set32<kTransactionActive>(object, bits.GetI());
 }
 
 inline double ArtField::GetDouble(Object* object) {
-  DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetJ(Get64(object));
   return bits.GetD();
@@ -228,26 +217,68 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetDouble(Object* object, double d) {
-  DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetD(d);
   Set64<kTransactionActive>(object, bits.GetJ());
 }
 
 inline Object* ArtField::GetObject(Object* object) {
-  DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   return GetObj(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetObject(Object* object, Object* l) {
-  DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
-       << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   SetObj<kTransactionActive>(object, l);
 }
 
+inline const char* ArtField::GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uint32_t field_index = GetDexFieldIndex();
+  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+    DCHECK(IsStatic());
+    DCHECK_LT(field_index, 2U);
+    return field_index == 0 ? "interfaces" : "throws";
+  }
+  const DexFile* dex_file = GetDexFile();
+  return dex_file->GetFieldName(dex_file->GetFieldId(field_index));
+}
+
+inline const char* ArtField::GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uint32_t field_index = GetDexFieldIndex();
+  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+    DCHECK(IsStatic());
+    DCHECK_LT(field_index, 2U);
+    // 0 == Class[] interfaces; 1 == Class[][] throws;
+    return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
+  }
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+  return dex_file->GetFieldTypeDescriptor(field_id);
+}
+
+inline Primitive::Type ArtField::GetTypeAsPrimitiveType()
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return Primitive::GetType(GetTypeDescriptor()[0]);
+}
+
+inline bool ArtField::IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetTypeAsPrimitiveType() != Primitive::kPrimNot;
+}
+
+inline size_t ArtField::FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return Primitive::FieldSize(GetTypeAsPrimitiveType());
+}
+
+inline mirror::DexCache* ArtField::GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetDeclaringClass()->GetDexCache();
+}
+
+inline const DexFile* ArtField::GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  return GetDexCache()->GetDexFile();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
index b3b1b71..f2729f6 100644
--- a/runtime/mirror/art_field.cc
+++ b/runtime/mirror/art_field.cc
@@ -55,7 +55,7 @@
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   if (kIsDebugBuild && Runtime::Current()->IsCompiler() &&
       !Runtime::Current()->UseCompileTimeClassPath()) {
-    Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
+    Primitive::Type type = GetTypeAsPrimitiveType();
     if (type == Primitive::kPrimDouble || type == Primitive::kPrimLong) {
       DCHECK_ALIGNED(num_bytes.Uint32Value(), 8);
     }
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 30cd180..4858613 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -139,6 +139,14 @@
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Primitive::Type GetTypeAsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 42ef68a..a409093 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -517,11 +517,9 @@
 ArtField* Class::FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class?
   // Interfaces are not relevant because they can't contain instance fields.
-  FieldHelper fh;
   for (size_t i = 0; i < NumInstanceFields(); ++i) {
     ArtField* f = GetInstanceField(i);
-    fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
@@ -566,11 +564,9 @@
 
 ArtField* Class::FindDeclaredStaticField(const StringPiece& name, const StringPiece& type) {
   DCHECK(type != NULL);
-  FieldHelper fh;
   for (size_t i = 0; i < NumStaticFields(); ++i) {
     ArtField* f = GetStaticField(i);
-    fh.ChangeField(f);
-    if (name == fh.GetName() && type == fh.GetTypeDescriptor()) {
+    if (name == f->GetName() && type == f->GetTypeDescriptor()) {
       return f;
     }
   }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 62ab2c1..567ce3e 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -405,11 +405,9 @@
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr);
   if (UNLIKELY(kIsVolatile)) {
-    int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr)));
-    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
-    return result;
+    return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadSequentiallyConsistent();
   } else {
-    return *word_addr;
+    return reinterpret_cast<const Atomic<int32_t>*>(word_addr)->LoadJavaData();
   }
 }
 
@@ -435,11 +433,9 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
-    *word_addr = new_value;
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
+    reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreSequentiallyConsistent(new_value);
   } else {
-    *word_addr = new_value;
+    reinterpret_cast<Atomic<int32_t>*>(word_addr)->StoreJavaData(new_value);
   }
 }
 
@@ -461,6 +457,7 @@
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
+
   return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
@@ -472,11 +469,9 @@
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
   if (kIsVolatile) {
-    int64_t result = QuasiAtomic::Read64(addr);
-    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
-    return result;
+    return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadSequentiallyConsistent();
   } else {
-    return *addr;
+    return reinterpret_cast<const Atomic<int64_t>*>(addr)->LoadJavaData();
   }
 }
 
@@ -502,15 +497,9 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
-    QuasiAtomic::Write64(addr, new_value);
-    if (!QuasiAtomic::LongAtomicsUseMutexes()) {
-      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
-    } else {
-      // Fence from from mutex is enough.
-    }
+    reinterpret_cast<Atomic<int64_t>*>(addr)->StoreSequentiallyConsistent(new_value);
   } else {
-    *addr = new_value;
+    reinterpret_cast<Atomic<int64_t>*>(addr)->StoreJavaData(new_value);
   }
 }
 
@@ -546,7 +535,8 @@
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
   T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
+    // TODO: Refactor to use a SequentiallyConsistent load instead.
+    QuasiAtomic::ThreadFenceAcquire();  // Ensure visibility of operations preceding store.
   }
   if (kVerifyFlags & kVerifyReads) {
     VerifyObject(result);
@@ -584,9 +574,11 @@
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
   if (kIsVolatile) {
-    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
+    // TODO: Refactor to use a SequentiallyConsistent store instead.
+    QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
     objref_addr->Assign(new_value);
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+    QuasiAtomic::ThreadFenceSequentiallyConsistent();
+                                // Ensure this store occurs before any volatile loads.
   } else {
     objref_addr->Assign(new_value);
   }
@@ -598,8 +590,9 @@
   SetFieldObjectWithoutWriteBarrier<kTransactionActive, kCheckTransaction, kVerifyFlags,
       kIsVolatile>(field_offset, new_value);
   if (new_value != nullptr) {
-    CheckFieldAssignment(field_offset, new_value);
     Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+    // TODO: Check field assignment could theoretically cause thread suspension, TODO: fix this.
+    CheckFieldAssignment(field_offset, new_value);
   }
 }
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 69e5a84..422a88b 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -204,7 +204,8 @@
       for (size_t i = 0; i < num_ref_ifields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          FieldHelper fh(field);
+          StackHandleScope<1> hs(Thread::Current());
+          FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
           return;
         }
@@ -222,7 +223,8 @@
       for (size_t i = 0; i < num_ref_sfields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          FieldHelper fh(field);
+          StackHandleScope<1> hs(Thread::Current());
+          FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
           return;
         }
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 442909d..c082443 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -103,6 +103,13 @@
   // avoids the barriers.
   LockWord GetLockWord(bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetLockWord(LockWord new_val, bool as_volatile) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // All Cas operations defined here have C++11 memory_order_seq_cst ordering
+  // semantics: Preceding memory operations become visible to other threads
+  // before the CAS, and subsequent operations become visible after the CAS.
+  // The Cas operations defined here do not fail spuriously, i.e. they
+  // have C++11 "strong" semantics.
+  // TODO: In most, possibly all, cases, these assumptions are too strong.
+  // Confirm and weaken the implementation.
   bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
@@ -316,6 +323,7 @@
 
  private:
   // Verify the type correctness of stores to fields.
+  // TODO: This can cause thread suspension and isn't moving GC safe.
   void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void CheckFieldAssignment(MemberOffset field_offset, Object* new_value)
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 9b6e901..c7540dc 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -19,6 +19,7 @@
 
 #include "object_array.h"
 
+#include "base/stringprintf.h"
 #include "gc/heap.h"
 #include "mirror/art_field.h"
 #include "mirror/class.h"
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 58e6dd4..f73ef1e 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -694,7 +694,7 @@
       case LockWord::kUnlocked: {
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0));
         if (h_obj->CasLockWord(lock_word, thin_locked)) {
-          QuasiAtomic::MembarLoadLoad();
+          // CasLockWord enforces more than the acquire ordering we need here.
           return h_obj.Get();  // Success!
         }
         continue;  // Go again.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 981ea0e..2c24e33 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -40,13 +40,10 @@
 #include "ScopedFd.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
+#include "utils.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-#endif
-
 namespace art {
 
 // A smart pointer that provides read-only access to a Java string's UTF chars.
@@ -250,25 +247,6 @@
   }
 }
 
-static double GetDoubleProperty(const char* property, double minValue, double maxValue, double defaultValue) {
-#ifndef HAVE_ANDROID_OS
-  return defaultValue;
-#else
-  char buf[PROP_VALUE_MAX];
-  char* endptr;
-
-  property_get(property, buf, "");
-  double value = strtod(buf, &endptr);
-
-  if (value == 0 && endptr == buf) {
-    value = defaultValue;
-  } else if (value < minValue || value > maxValue) {
-    value = defaultValue;
-  }
-  return value;
-#endif
-}
-
 static jboolean IsDexOptNeededInternal(JNIEnv* env, const char* filename,
     const char* pkgname, const char* instruction_set, const jboolean defer) {
   const bool kVerboseLogging = false;  // Spammy logging.
@@ -350,7 +328,7 @@
   // If the 'defer' argument is true then this will be retried later.  In this case we
   // need to make sure that the profile file copy is not made so that we will get the
   // same result second time.
-  if (pkgname != nullptr) {
+  if (Runtime::Current()->GetProfilerOptions().IsEnabled() && (pkgname != nullptr)) {
     const std::string profile_file = GetDalvikCacheOrDie("profiles", false /* create_if_absent */)
         + std::string("/") + pkgname;
     const std::string profile_cache_dir = GetDalvikCacheOrDie("profile-cache",
@@ -379,42 +357,46 @@
       // There is a previous profile file.  Check if the profile has changed significantly.
       // A change in profile is considered significant if X% (change_thr property) of the top K%
       // (compile_thr property) samples has changed.
-
-      double topKThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.compile_thr", 10.0, 90.0, 90.0);
-      double changeThreshold = GetDoubleProperty("dalvik.vm.profiler.dex2oat.change_thr", 1.0, 90.0, 10.0);
-      double changePercent = 0.0;
-      std::set<std::string> newTopK, oldTopK;
-      bool newOk = ProfileHelper::LoadTopKSamples(newTopK, profile_file, topKThreshold);
-      bool oldOk = ProfileHelper::LoadTopKSamples(oldTopK, prev_profile_file, topKThreshold);
-      if (!newOk || !oldOk) {
+      double top_k_threshold = Runtime::Current()->GetProfilerOptions().GetTopKThreshold();
+      double change_threshold = Runtime::Current()->GetProfilerOptions().GetTopKChangeThreshold();
+      double change_percent = 0.0;
+      ProfileFile new_profile, old_profile;
+      bool new_ok = new_profile.LoadFile(profile_file);
+      bool old_ok = old_profile.LoadFile(prev_profile_file);
+      if (!new_ok || !old_ok) {
         if (kVerboseLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded Ignoring invalid profiles: "
-                    << (newOk ?  "" : profile_file) << " " << (oldOk ? "" : prev_profile_file);
+                    << (new_ok ?  "" : profile_file) << " " << (old_ok ? "" : prev_profile_file);
         }
-      } else if (newTopK.empty()) {
-        if (kVerboseLogging) {
-          LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file;
-        }
-        // If the new topK is empty we shouldn't optimize so we leave the changePercent at 0.0.
       } else {
-        std::set<std::string> diff;
-        std::set_difference(newTopK.begin(), newTopK.end(), oldTopK.begin(), oldTopK.end(),
-          std::inserter(diff, diff.end()));
-        // TODO: consider using the usedPercentage instead of the plain diff count.
-        changePercent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(newTopK.size());
-        if (kVerboseLogging) {
-          std::set<std::string>::iterator end = diff.end();
-          for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
-            LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it;
+        std::set<std::string> new_top_k, old_top_k;
+        new_profile.GetTopKSamples(new_top_k, top_k_threshold);
+        old_profile.GetTopKSamples(old_top_k, top_k_threshold);
+        if (new_top_k.empty()) {
+          if (kVerboseLogging) {
+            LOG(INFO) << "DexFile_isDexOptNeeded empty profile: " << profile_file;
+          }
+          // If the new topK is empty we shouldn't optimize so we leave the change_percent at 0.0.
+        } else {
+          std::set<std::string> diff;
+          std::set_difference(new_top_k.begin(), new_top_k.end(), old_top_k.begin(), old_top_k.end(),
+            std::inserter(diff, diff.end()));
+          // TODO: consider using the usedPercentage instead of the plain diff count.
+          change_percent = 100.0 * static_cast<double>(diff.size()) / static_cast<double>(new_top_k.size());
+          if (kVerboseLogging) {
+            std::set<std::string>::iterator end = diff.end();
+            for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
+              LOG(INFO) << "DexFile_isDexOptNeeded new in topK: " << *it;
+            }
           }
         }
       }
 
-      if (changePercent > changeThreshold) {
+      if (change_percent > change_threshold) {
         if (kReasonLogging) {
           LOG(INFO) << "DexFile_isDexOptNeeded size of new profile file " << profile_file <<
           " is significantly different from old profile file " << prev_profile_file << " (top "
-          << topKThreshold << "% samples changed in proportion of " << changePercent << "%)";
+          << top_k_threshold << "% samples changed in proportion of " << change_percent << "%)";
         }
         if (!defer) {
           CopyProfileFile(profile_file.c_str(), prev_profile_file.c_str());
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index a369365..f1a987f 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -509,15 +509,13 @@
  * process name.  We use this information to start up the sampling profiler for
  * for ART.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName, jstring appDir, jstring procName) {
+static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
+                                      jstring appDir, jstring procName) {
   const char *pkgNameChars = env->GetStringUTFChars(pkgName, NULL);
-  const char *appDirChars = env->GetStringUTFChars(appDir, NULL);
-  const char *procNameChars = env->GetStringUTFChars(procName, NULL);
-
   std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
-  Runtime::Current()->StartProfiler(profileFile.c_str(), procNameChars);
-  env->ReleaseStringUTFChars(appDir, appDirChars);
-  env->ReleaseStringUTFChars(procName, procNameChars);
+
+  Runtime::Current()->StartProfiler(profileFile.c_str());
+
   env->ReleaseStringUTFChars(pkgName, pkgNameChars);
 }
 
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 0d54772..3564dfd 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -90,12 +90,13 @@
 }
 
 static bool CheckReceiver(const ScopedFastNativeObjectAccess& soa, jobject j_rcvr,
-                          mirror::ArtField* f, mirror::Object** class_or_rcvr)
+                          mirror::ArtField** f, mirror::Object** class_or_rcvr)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   soa.Self()->AssertThreadSuspensionIsAllowable();
-  if (f->IsStatic()) {
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::Class> h_klass(hs.NewHandle(f->GetDeclaringClass()));
+  if ((*f)->IsStatic()) {
+    StackHandleScope<2> hs(soa.Self());
+    HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(f));
+    Handle<mirror::Class> h_klass(hs.NewHandle((*f)->GetDeclaringClass()));
     if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(h_klass, true, true))) {
       DCHECK(soa.Self()->IsExceptionPending());
       *class_or_rcvr = nullptr;
@@ -106,7 +107,7 @@
   }
 
   *class_or_rcvr = soa.Decode<mirror::Object*>(j_rcvr);
-  mirror::Class* declaringClass = f->GetDeclaringClass();
+  mirror::Class* declaringClass = (*f)->GetDeclaringClass();
   if (!VerifyObjectIsClass(*class_or_rcvr, declaringClass)) {
     DCHECK(soa.Self()->IsExceptionPending());
     *class_or_rcvr = nullptr;
@@ -117,10 +118,9 @@
 
 static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
@@ -131,7 +131,7 @@
   }
   // We now don't expect suspension unless an exception is thrown.
   // Get the field's value, boxing if necessary.
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue value;
   if (!GetFieldValue(soa, o, f, field_type, true, &value)) {
     DCHECK(soa.Self()->IsExceptionPending());
@@ -143,10 +143,9 @@
 static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj,
                                 char dst_descriptor, jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return JValue();
   }
@@ -159,7 +158,7 @@
 
   // We now don't expect suspension unless an exception is thrown.
   // Read the value.
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   JValue field_value;
   if (!GetFieldValue(soa, o, f, field_type, false, &field_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
@@ -257,33 +256,29 @@
 static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue,
                       jboolean accessible) {
   ScopedFastNativeObjectAccess soa(env);
-  CHECK(!kMovingFields) << "CheckReceiver may trigger thread suspension for initialization";
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   // Check that the receiver is non-null and an instance of the field's declaring class.
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     DCHECK(soa.Self()->IsExceptionPending());
     return;
   }
-  Primitive::Type field_prim_type;
   mirror::Class* field_type;
-  {
-    FieldHelper fh(f);
-    const char* field_type_desciptor = fh.GetTypeDescriptor();
-    field_prim_type = Primitive::GetType(field_type_desciptor[0]);
-    if (field_prim_type == Primitive::kPrimNot) {
-      StackHandleScope<1> hs(soa.Self());
-      HandleWrapper<mirror::Object> h(hs.NewHandleWrapper(&o));
-      // May cause resolution.
-      CHECK(!kMovingFields) << "Resolution may trigger thread suspension";
-      field_type = fh.GetType(true);
-      if (field_type == nullptr) {
-        DCHECK(soa.Self()->IsExceptionPending());
-        return;
-      }
-    } else {
-      field_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(field_type_desciptor[0]);
+  const char* field_type_desciptor = f->GetTypeDescriptor();
+  Primitive::Type field_prim_type = Primitive::GetType(field_type_desciptor[0]);
+  if (field_prim_type == Primitive::kPrimNot) {
+    StackHandleScope<2> hs(soa.Self());
+    HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
+    HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+    FieldHelper fh(h_f);
+    // May cause resolution.
+    field_type = fh.GetType(true);
+    if (field_type == nullptr) {
+      DCHECK(soa.Self()->IsExceptionPending());
+      return;
     }
+  } else {
+    field_type = Runtime::Current()->GetClassLinker()->FindPrimitiveClass(field_type_desciptor[0]);
   }
   // We now don't expect suspension unless an exception is thrown.
   // Unbox the value, if necessary.
@@ -306,10 +301,10 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::ArtField* f = mirror::ArtField::FromReflectedField(soa, javaField);
   mirror::Object* o = nullptr;
-  if (!CheckReceiver(soa, javaObj, f, &o)) {
+  if (!CheckReceiver(soa, javaObj, &f, &o)) {
     return;
   }
-  Primitive::Type field_type = FieldHelper(f).GetTypeAsPrimitiveType();
+  Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   if (UNLIKELY(field_type == Primitive::kPrimNot)) {
     ThrowIllegalArgumentException(nullptr, StringPrintf("Not a primitive field: %s",
                                                         PrettyField(f).c_str()).c_str());
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 764db5e..d23cfff 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -83,7 +83,7 @@
                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField32<false>(MemberOffset(offset), newValue);
 }
@@ -119,7 +119,7 @@
                                   jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField64<false>(MemberOffset(offset), newValue);
 }
@@ -161,7 +161,7 @@
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
-  QuasiAtomic::MembarStoreStore();
+  QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetFieldObject<false>(MemberOffset(offset), newValue);
 }
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 664ac89..a05ebe6 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -68,68 +68,33 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : field_(nullptr) {}
-  explicit FieldHelper(mirror::ArtField* f) : field_(f) {}
+  explicit FieldHelper(Handle<mirror::ArtField> f) : field_(f) {}
 
   void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(new_f != nullptr);
-    field_ = new_f;
+    field_.Assign(new_f);
   }
 
-  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      DCHECK(field_->IsStatic());
-      DCHECK_LT(field_index, 2U);
-      return field_index == 0 ? "interfaces" : "throws";
-    }
-    const DexFile& dex_file = GetDexFile();
-    return dex_file.GetFieldName(dex_file.GetFieldId(field_index));
+  mirror::ArtField* GetField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return field_.Get();
   }
 
   mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
     if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      return GetClassLinker()->FindSystemClass(Thread::Current(), GetTypeDescriptor());
+      return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
+                                                                   field_->GetTypeDescriptor());
     }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
+    const DexFile* dex_file = field_->GetDexFile();
+    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+    mirror::Class* type = field_->GetDexCache()->GetResolvedType(field_id.type_idx_);
     if (resolve && (type == nullptr)) {
-      type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
+      type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, field_.Get());
       CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
 
-  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uint32_t field_index = field_->GetDexFieldIndex();
-    if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-      DCHECK(field_->IsStatic());
-      DCHECK_LT(field_index, 2U);
-      // 0 == Class[] interfaces; 1 == Class[][] throws;
-      return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
-    }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    return dex_file.GetFieldTypeDescriptor(field_id);
-  }
-
-  Primitive::Type GetTypeAsPrimitiveType()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return Primitive::GetType(GetTypeDescriptor()[0]);
-  }
-
-  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetTypeAsPrimitiveType();
-    return type != Primitive::kPrimNot;
-  }
-
-  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetTypeAsPrimitiveType();
-    return Primitive::FieldSize(type);
-  }
-
   // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
   // If you need it longer, copy it into a std::string.
   const char* GetDeclaringClassDescriptor()
@@ -142,22 +107,13 @@
       declaring_class_descriptor_ = field_->GetDeclaringClass()->GetDescriptor();
       return declaring_class_descriptor_.c_str();
     }
-    const DexFile& dex_file = GetDexFile();
-    const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-    return dex_file.GetFieldDeclaringClassDescriptor(field_id);
+    const DexFile* dex_file = field_->GetDexFile();
+    const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+    return dex_file->GetFieldDeclaringClassDescriptor(field_id);
   }
 
  private:
-  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return field_->GetDeclaringClass()->GetDexCache();
-  }
-  ClassLinker* GetClassLinker() ALWAYS_INLINE {
-    return Runtime::Current()->GetClassLinker();
-  }
-  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return *GetDexCache()->GetDexFile();
-  }
-  mirror::ArtField* field_;
+  Handle<mirror::ArtField> field_;
   std::string declaring_class_descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(FieldHelper);
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 72a868e..cff5ec3 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -15,13 +15,16 @@
  */
 
 #include "parsed_options.h"
-#include "utils.h"
+
 #ifdef HAVE_ANDROID_OS
 #include "cutils/properties.h"
 #endif
 
+#include "base/stringpiece.h"
 #include "debugger.h"
+#include "gc/heap.h"
 #include "monitor.h"
+#include "utils.h"
 
 namespace art {
 
@@ -248,12 +251,6 @@
   method_trace_file_ = "/data/method-trace-file.bin";
   method_trace_file_size_ = 10 * MB;
 
-  profile_ = false;
-  profile_period_s_ = 10;           // Seconds.
-  profile_duration_s_ = 20;          // Seconds.
-  profile_interval_us_ = 500;       // Microseconds.
-  profile_backoff_coefficient_ = 2.0;
-  profile_start_immediately_ = true;
   profile_clock_source_ = kDefaultProfilerClockSource;
 
   verify_ = true;
@@ -534,29 +531,38 @@
       Trace::SetDefaultClockSource(kProfilerClockSourceWall);
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
+    } else if (option == "-Xenable-profiler") {
+      profiler_options_.enabled_ = true;
     } else if (StartsWith(option, "-Xprofile-filename:")) {
       if (!ParseStringAfterChar(option, ':', &profile_output_filename_)) {
         return false;
       }
-      profile_ = true;
     } else if (StartsWith(option, "-Xprofile-period:")) {
-      if (!ParseUnsignedInteger(option, ':', &profile_period_s_)) {
+      if (!ParseUnsignedInteger(option, ':', &profiler_options_.period_s_)) {
         return false;
       }
     } else if (StartsWith(option, "-Xprofile-duration:")) {
-      if (!ParseUnsignedInteger(option, ':', &profile_duration_s_)) {
+      if (!ParseUnsignedInteger(option, ':', &profiler_options_.duration_s_)) {
         return false;
       }
     } else if (StartsWith(option, "-Xprofile-interval:")) {
-      if (!ParseUnsignedInteger(option, ':', &profile_interval_us_)) {
+      if (!ParseUnsignedInteger(option, ':', &profiler_options_.interval_us_)) {
         return false;
       }
     } else if (StartsWith(option, "-Xprofile-backoff:")) {
-      if (!ParseDouble(option, ':', 1.0, 10.0, &profile_backoff_coefficient_)) {
+      if (!ParseDouble(option, ':', 1.0, 10.0, &profiler_options_.backoff_coefficient_)) {
         return false;
       }
-    } else if (option == "-Xprofile-start-lazy") {
-      profile_start_immediately_ = false;
+    } else if (option == "-Xprofile-start-immediately") {
+      profiler_options_.start_immediately_ = true;
+    } else if (StartsWith(option, "-Xprofile-top-k-threshold:")) {
+      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_threshold_)) {
+        return false;
+      }
+    } else if (StartsWith(option, "-Xprofile-top-k-change-threshold:")) {
+      if (!ParseDouble(option, ':', 10.0, 90.0, &profiler_options_.top_k_change_threshold_)) {
+        return false;
+      }
     } else if (StartsWith(option, "-implicit-checks:")) {
       std::string checks;
       if (!ParseStringAfterChar(option, ':', &checks)) {
@@ -791,11 +797,15 @@
   UsageMessage(stream, "  -Xmethod-trace\n");
   UsageMessage(stream, "  -Xmethod-trace-file:filename");
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
+  UsageMessage(stream, "  -Xenable-profiler\n");
   UsageMessage(stream, "  -Xprofile-filename:filename\n");
   UsageMessage(stream, "  -Xprofile-period:integervalue\n");
   UsageMessage(stream, "  -Xprofile-duration:integervalue\n");
   UsageMessage(stream, "  -Xprofile-interval:integervalue\n");
   UsageMessage(stream, "  -Xprofile-backoff:doublevalue\n");
+  UsageMessage(stream, "  -Xprofile-start-immediately\n");
+  UsageMessage(stream, "  -Xprofile-top-k-threshold:doublevalue\n");
+  UsageMessage(stream, "  -Xprofile-top-k-change-threshold:doublevalue\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 25fc12a..d0f3c12 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -19,6 +19,7 @@
 
 #include <string>
 
+#include "gc/collector_type.h"
 #include "runtime.h"
 #include "trace.h"
 
@@ -77,13 +78,8 @@
   std::string compiler_executable_;
   std::vector<std::string> compiler_options_;
   std::vector<std::string> image_compiler_options_;
-  bool profile_;
+  ProfilerOptions profiler_options_;
   std::string profile_output_filename_;
-  uint32_t profile_period_s_;
-  uint32_t profile_duration_s_;
-  uint32_t profile_interval_us_;
-  double profile_backoff_coefficient_;
-  bool profile_start_immediately_;
   ProfilerClockSource profile_clock_source_;
   bool verify_;
   InstructionSet image_isa_;
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 5459ce3..bad79b3 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -53,7 +53,6 @@
 pthread_t BackgroundMethodSamplingProfiler::profiler_pthread_ = 0U;
 volatile bool BackgroundMethodSamplingProfiler::shutting_down_ = false;
 
-
 // TODO: this profiler runs regardless of the state of the machine.  Maybe we should use the
 // wakelock or something to modify the run characteristics.  This can be done when we
 // have some performance data after it's been used for a while.
@@ -75,8 +74,6 @@
   profiler->RecordMethod(method);
 }
 
-
-
 // A closure that is called by the thread checkpoint code.
 class SampleCheckpoint : public Closure {
  public:
@@ -128,6 +125,7 @@
 
   Thread* self = Thread::Current();
 
+  double backoff = 1.0;
   while (true) {
     if (ShuttingDown(self)) {
       break;
@@ -135,13 +133,13 @@
 
     {
       // wait until we need to run another profile
-      uint64_t delay_secs = profiler->period_s_ * profiler->backoff_factor_;
+      uint64_t delay_secs = profiler->options_.GetPeriodS() * backoff;
 
       // Add a startup delay to prevent all the profiles running at once.
       delay_secs += startup_delay;
 
       // Immediate startup for benchmarking?
-      if (profiler->start_immediately_ && startup_delay > 0) {
+      if (profiler->options_.GetStartImmediately() && startup_delay > 0) {
         delay_secs = 0;
       }
 
@@ -152,10 +150,7 @@
       profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
 
       // Expand the backoff by its coefficient, but don't go beyond the max.
-      double new_backoff = profiler->backoff_factor_ * profiler->backoff_coefficient_;
-      if (new_backoff < kMaxBackoffSecs) {
-        profiler->backoff_factor_ = new_backoff;
-      }
+      backoff = std::min(backoff * profiler->options_.GetBackoffCoefficient(), kMaxBackoffSecs);
     }
 
     if (ShuttingDown(self)) {
@@ -164,11 +159,11 @@
 
 
     uint64_t start_us = MicroTime();
-    uint64_t end_us = start_us + profiler->duration_s_ * UINT64_C(1000000);
+    uint64_t end_us = start_us + profiler->options_.GetDurationS() * UINT64_C(1000000);
     uint64_t now_us = start_us;
 
-    VLOG(profiler) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
-
+    VLOG(profiler) << "Starting profiling run now for "
+                   << PrettyDuration((end_us - start_us) * 1000);
 
     SampleCheckpoint check_point(profiler);
 
@@ -178,7 +173,7 @@
         break;
       }
 
-      usleep(profiler->interval_us_);    // Non-interruptible sleep.
+      usleep(profiler->options_.GetIntervalUs());    // Non-interruptible sleep.
 
       ThreadList* thread_list = runtime->GetThreadList();
 
@@ -230,7 +225,7 @@
 
 // Write out the profile file if we are generating a profile.
 uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
-  std::string full_name = profile_file_name_;
+  std::string full_name = output_filename_;
   VLOG(profiler) << "Saving profile to " << full_name;
 
   int fd = open(full_name.c_str(), O_RDWR);
@@ -285,45 +280,35 @@
   return num_methods;
 }
 
-// Start a profile thread with the user-supplied arguments.
-void BackgroundMethodSamplingProfiler::Start(int period, int duration,
-                  const std::string& profile_file_name, const std::string& procName,
-                  int interval_us,
-                  double backoff_coefficient, bool startImmediately) {
+bool BackgroundMethodSamplingProfiler::Start(
+    const std::string& output_filename, const ProfilerOptions& options) {
+  if (!options.IsEnabled()) {
+    LOG(INFO) << "Profiler disabled. To enable setprop dalvik.vm.profiler 1.";
+    return false;
+  }
+
+  CHECK(!output_filename.empty());
+
   Thread* self = Thread::Current();
   {
     MutexLock mu(self, *Locks::profiler_lock_);
     // Don't start two profiler threads.
     if (profiler_ != nullptr) {
-      return;
+      return true;
     }
   }
 
-  // Only on target...
-#ifdef HAVE_ANDROID_OS
-  // Switch off profiler if the dalvik.vm.profiler property has value 0.
-  char buf[PROP_VALUE_MAX];
-  property_get("dalvik.vm.profiler", buf, "0");
-  if (strcmp(buf, "0") == 0) {
-    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
-    return;
-  }
-#endif
-
-  LOG(INFO) << "Starting profile with period " << period << "s, duration " << duration <<
-      "s, interval " << interval_us << "us.  Profile file " << profile_file_name;
-
+  LOG(INFO) << "Starting profiler using output file: " << output_filename
+            << " and options: " << options;
   {
     MutexLock mu(self, *Locks::profiler_lock_);
-    profiler_ = new BackgroundMethodSamplingProfiler(period, duration, profile_file_name,
-                                      procName,
-                                      backoff_coefficient,
-                                      interval_us, startImmediately);
+    profiler_ = new BackgroundMethodSamplingProfiler(output_filename, options);
 
     CHECK_PTHREAD_CALL(pthread_create, (&profiler_pthread_, nullptr, &RunProfilerThread,
         reinterpret_cast<void*>(profiler_)),
                        "Profiler thread");
   }
+  return true;
 }
 
 
@@ -359,14 +344,10 @@
   Stop();
 }
 
-BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(int period, int duration,
-                   const std::string& profile_file_name,
-                   const std::string& process_name,
-                   double backoff_coefficient, int interval_us, bool startImmediately)
-    : profile_file_name_(profile_file_name), process_name_(process_name),
-      period_s_(period), start_immediately_(startImmediately),
-      interval_us_(interval_us), backoff_factor_(1.0),
-      backoff_coefficient_(backoff_coefficient), duration_s_(duration),
+BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(
+  const std::string& output_filename, const ProfilerOptions& options)
+    : output_filename_(output_filename),
+      options_(options),
       wait_lock_("Profile wait lock"),
       period_condition_("Profile condition", wait_lock_),
       profile_table_(wait_lock_),
@@ -443,7 +424,7 @@
   }
 }
 
-// Add a method to the profile table.  If it the first time the method
+// Add a method to the profile table.  If it's the first time the method
 // has been seen, add it with count=1, otherwise increment the count.
 void ProfileSampleResults::Put(mirror::ArtMethod* method) {
   lock_.Lock(Thread::Current());
@@ -468,7 +449,8 @@
   num_null_methods_ += previous_num_null_methods_;
   num_boot_methods_ += previous_num_boot_methods_;
 
-  VLOG(profiler) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
+  VLOG(profiler) << "Profile: "
+                 << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
   os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
   uint32_t num_methods = 0;
   for (int i = 0 ; i < kHashSize; i++) {
@@ -578,7 +560,7 @@
   }
 }
 
-bool ProfileHelper::LoadProfileMap(ProfileMap& profileMap, const std::string& fileName) {
+bool ProfileFile::LoadFile(const std::string& fileName) {
   LOG(VERBOSE) << "reading profile file " << fileName;
   struct stat st;
   int err = stat(fileName.c_str(), &st);
@@ -607,14 +589,11 @@
   std::vector<std::string> summary_info;
   Split(line, '/', summary_info);
   if (summary_info.size() != 3) {
-    // Bad summary info.  It should be count/total/bootpath.
+    // Bad summary info.  It should be total/null/boot.
     return false;
   }
-  // This is the number of hits in all methods.
-  uint32_t total_count = 0;
-  for (int i = 0 ; i < 3; ++i) {
-    total_count += atoi(summary_info[i].c_str());
-  }
+  // This is the number of hits in all profiled methods (without nullptr or boot methods)
+  uint32_t total_count = atoi(summary_info[0].c_str());
 
   // Now read each line until the end of file.  Each line consists of 3 fields separated by '/'.
   // Store the info in descending order given by the most used methods.
@@ -629,7 +608,7 @@
     Split(line, '/', info);
     if (info.size() != 3) {
       // Malformed.
-      break;
+      return false;
     }
     int count = atoi(info[1].c_str());
     countSet.insert(std::make_pair(-count, info));
@@ -652,21 +631,24 @@
 
     // Add it to the profile map.
     ProfileData curData = ProfileData(methodname, count, size, usedPercent, topKPercentage);
-    profileMap[methodname] = curData;
+    profile_map_[methodname] = curData;
     prevData = &curData;
   }
   return true;
 }
 
-bool ProfileHelper::LoadTopKSamples(std::set<std::string>& topKSamples, const std::string& fileName,
-                                    double topKPercentage) {
-  ProfileMap profileMap;
-  bool loadOk = LoadProfileMap(profileMap, fileName);
-  if (!loadOk) {
+bool ProfileFile::GetProfileData(ProfileFile::ProfileData* data, const std::string& method_name) {
+  ProfileMap::iterator i = profile_map_.find(method_name);
+  if (i == profile_map_.end()) {
     return false;
   }
-  ProfileMap::iterator end = profileMap.end();
-  for (ProfileMap::iterator it = profileMap.begin(); it != end; it++) {
+  *data = i->second;
+  return true;
+}
+
+bool ProfileFile::GetTopKSamples(std::set<std::string>& topKSamples, double topKPercentage) {
+  ProfileMap::iterator end = profile_map_.end();
+  for (ProfileMap::iterator it = profile_map_.begin(); it != end; it++) {
     if (it->second.GetTopKUsedPercentage() < topKPercentage) {
       topKSamples.insert(it->first);
     }
diff --git a/runtime/profiler.h b/runtime/profiler.h
index 938fdb7..0b18dbb 100644
--- a/runtime/profiler.h
+++ b/runtime/profiler.h
@@ -28,6 +28,7 @@
 #include "base/mutex.h"
 #include "globals.h"
 #include "instrumentation.h"
+#include "profiler_options.h"
 #include "os.h"
 #include "safe_map.h"
 
@@ -62,17 +63,18 @@
  private:
   uint32_t Hash(mirror::ArtMethod* method);
   static constexpr int kHashSize = 17;
-  Mutex& lock_;                   // Reference to the main profiler lock - we don't need two of them.
-  uint32_t num_samples_;          // Total number of samples taken.
-  uint32_t num_null_methods_;     // Number of samples where can don't know the method.
-  uint32_t num_boot_methods_;     // Number of samples in the boot path.
+  Mutex& lock_;                  // Reference to the main profiler lock - we don't need two of them.
+  uint32_t num_samples_;         // Total number of samples taken.
+  uint32_t num_null_methods_;    // Number of samples where can don't know the method.
+  uint32_t num_boot_methods_;    // Number of samples in the boot path.
 
   typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
   Map *table[kHashSize];
 
   struct PreviousValue {
     PreviousValue() : count_(0), method_size_(0) {}
-    PreviousValue(uint32_t count, uint32_t method_size) : count_(count), method_size_(method_size) {}
+    PreviousValue(uint32_t count, uint32_t method_size)
+      : count_(count), method_size_(method_size) {}
     uint32_t count_;
     uint32_t method_size_;
   };
@@ -101,9 +103,9 @@
 
 class BackgroundMethodSamplingProfiler {
  public:
-  static void Start(int period, int duration, const std::string& profile_filename,
-                    const std::string& procName, int interval_us,
-                    double backoff_coefficient, bool startImmediately)
+  // Start a profile thread with the user-supplied arguments.
+  // Returns true if the profile was started or if it was already running. Returns false otherwise.
+  static bool Start(const std::string& output_filename, const ProfilerOptions& options)
   LOCKS_EXCLUDED(Locks::mutator_lock_,
                  Locks::thread_list_lock_,
                  Locks::thread_suspend_count_lock_,
@@ -119,10 +121,8 @@
   }
 
  private:
-  explicit BackgroundMethodSamplingProfiler(int period, int duration,
-                                            const std::string& profile_filename,
-                                            const std::string& process_name,
-                                            double backoff_coefficient, int interval_us, bool startImmediately);
+  explicit BackgroundMethodSamplingProfiler(
+    const std::string& output_filename, const ProfilerOptions& options);
 
   // The sampling interval in microseconds is passed as an argument.
   static void* RunProfilerThread(void* arg) LOCKS_EXCLUDED(Locks::profiler_lock_);
@@ -141,35 +141,14 @@
   // Sampling thread, non-zero when sampling.
   static pthread_t profiler_pthread_;
 
-  // Some measure of the number of samples that are significant
+  // Some measure of the number of samples that are significant.
   static constexpr uint32_t kSignificantSamples = 10;
 
-  // File to write profile data out to.  Cannot be empty if we are profiling.
-  std::string profile_file_name_;
+  // The name of the file where profile data will be written.
+  std::string output_filename_;
+  // The options used to start the profiler.
+  const ProfilerOptions& options_;
 
-  // Process name.
-  std::string process_name_;
-
-  // Number of seconds between profile runs.
-  uint32_t period_s_;
-
-  // Most of the time we want to delay the profiler startup to prevent everything
-  // running at the same time (all processes).  This is the default, but if we
-  // want to override this, set the 'start_immediately_' to true.  This is done
-  // if the -Xprofile option is given on the command line.
-  bool start_immediately_;
-
-  uint32_t interval_us_;
-
-  // A backoff coefficent to adjust the profile period based on time.
-  double backoff_factor_;
-
-  // How much to increase the backoff by on each profile iteration.
-  double backoff_coefficient_;
-
-  // Duration of each profile run.  The profile file will be written at the end
-  // of each run.
-  uint32_t duration_s_;
 
   // Profile condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -188,52 +167,51 @@
   DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
 };
 
-// TODO: incorporate in ProfileSampleResults
-
-// Profile data.  This is generated from previous runs of the program and stored
+//
+// Contains profile data generated from previous runs of the program and stored
 // in a file.  It is used to determine whether to compile a particular method or not.
-class ProfileData {
+class ProfileFile {
  public:
-  ProfileData() : count_(0), method_size_(0), usedPercent_(0) {}
-  ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
-    double usedPercent, double topKUsedPercentage) :
-    method_name_(method_name), count_(count), method_size_(method_size),
-    usedPercent_(usedPercent), topKUsedPercentage_(topKUsedPercentage) {
-    // TODO: currently method_size_ and count_ are unused.
-    UNUSED(method_size_);
-    UNUSED(count_);
-  }
+  class ProfileData {
+   public:
+    ProfileData() : count_(0), method_size_(0), used_percent_(0) {}
+    ProfileData(const std::string& method_name, uint32_t count, uint32_t method_size,
+      double used_percent, double top_k_used_percentage) :
+      method_name_(method_name), count_(count), method_size_(method_size),
+      used_percent_(used_percent), top_k_used_percentage_(top_k_used_percentage) {
+      // TODO: currently method_size_ is unused
+      UNUSED(method_size_);
+    }
 
-  bool IsAbove(double v) const { return usedPercent_ >= v; }
-  double GetUsedPercent() const { return usedPercent_; }
-  uint32_t GetCount() const { return count_; }
-  double GetTopKUsedPercentage() const { return topKUsedPercentage_; }
+    double GetUsedPercent() const { return used_percent_; }
+    uint32_t GetCount() const { return count_; }
+    double GetTopKUsedPercentage() const { return top_k_used_percentage_; }
 
- private:
-  std::string method_name_;    // Method name.
-  uint32_t count_;             // Number of times it has been called.
-  uint32_t method_size_;       // Size of the method on dex instructions.
-  double usedPercent_;         // Percentage of how many times this method was called.
-  double topKUsedPercentage_;  // The percentage of the group that comprise K% of the total used
-                               // methods this methods belongs to.
-};
-
-// Profile data is stored in a map, indexed by the full method name.
-typedef std::map<std::string, ProfileData> ProfileMap;
-
-class ProfileHelper {
- private:
-  ProfileHelper();
+   private:
+    std::string method_name_;       // Method name.
+    uint32_t count_;                // Number of times it has been called.
+    uint32_t method_size_;          // Size of the method on dex instructions.
+    double used_percent_;           // Percentage of how many times this method was called.
+    double top_k_used_percentage_;  // The percentage of the group that comprise K% of the total
+                                    // used methods this methods belongs to.
+  };
 
  public:
-  // Read the profile data from the given file.  Calculates the percentage for each method.
-  // Returns false if there was no profile file or it was malformed.
-  static bool LoadProfileMap(ProfileMap& profileMap, const std::string& fileName);
+  // Loads profile data from the given file. The new data are merged with any existing data.
+  // Returns true if the file was loaded successfully and false otherwise.
+  bool LoadFile(const std::string& filename);
 
-  // Read the profile data from the given file and computes the group that comprise
-  // topKPercentage of the total used methods.
-  static bool LoadTopKSamples(std::set<std::string>& topKMethods, const std::string& fileName,
-                              double topKPercentage);
+  // Computes the group that comprise top_k_percentage of the total used methods.
+  bool GetTopKSamples(std::set<std::string>& top_k_methods, double top_k_percentage);
+
+  // If the given method has an entry in the profile table it updates the data
+  // and returns true. Otherwise returns false and leaves the data unchanged.
+  bool GetProfileData(ProfileData* data, const std::string& method_name);
+
+ private:
+  // Profile data is stored in a map, indexed by the full method name.
+  typedef std::map<std::string, ProfileData> ProfileMap;
+  ProfileMap profile_map_;
 };
 
 }  // namespace art
diff --git a/runtime/profiler_options.h b/runtime/profiler_options.h
new file mode 100644
index 0000000..08e32cc
--- /dev/null
+++ b/runtime/profiler_options.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_PROFILER_OPTIONS_H_
+#define ART_RUNTIME_PROFILER_OPTIONS_H_
+
+#include <string>
+#include <ostream>
+
+namespace art {
+
+class ProfilerOptions {
+ public:
+  static constexpr bool kDefaultEnabled = false;
+  static constexpr uint32_t kDefaultPeriodS = 10;
+  static constexpr uint32_t kDefaultDurationS = 20;
+  static constexpr uint32_t kDefaultIntervalUs = 500;
+  static constexpr double kDefaultBackoffCoefficient = 2.0;
+  static constexpr bool kDefaultStartImmediately = false;
+  static constexpr double kDefaultTopKThreshold = 90.0;
+  static constexpr double kDefaultChangeInTopKThreshold = 10.0;
+
+  ProfilerOptions() :
+    enabled_(kDefaultEnabled),
+    period_s_(kDefaultPeriodS),
+    duration_s_(kDefaultDurationS),
+    interval_us_(kDefaultIntervalUs),
+    backoff_coefficient_(kDefaultBackoffCoefficient),
+    start_immediately_(kDefaultStartImmediately),
+    top_k_threshold_(kDefaultTopKThreshold),
+    top_k_change_threshold_(kDefaultChangeInTopKThreshold) {}
+
+  ProfilerOptions(bool enabled,
+                 uint32_t period_s,
+                 uint32_t duration_s,
+                 uint32_t interval_us,
+                 double backoff_coefficient,
+                 bool start_immediately,
+                 double top_k_threshold,
+                 double top_k_change_threshold):
+    enabled_(enabled),
+    period_s_(period_s),
+    duration_s_(duration_s),
+    interval_us_(interval_us),
+    backoff_coefficient_(backoff_coefficient),
+    start_immediately_(start_immediately),
+    top_k_threshold_(top_k_threshold),
+    top_k_change_threshold_(top_k_change_threshold) {}
+
+  bool IsEnabled() const {
+    return enabled_;
+  }
+
+  uint32_t GetPeriodS() const {
+    return period_s_;
+  }
+
+  uint32_t GetDurationS() const {
+    return duration_s_;
+  }
+
+  uint32_t GetIntervalUs() const {
+    return interval_us_;
+  }
+
+  double GetBackoffCoefficient() const {
+    return backoff_coefficient_;
+  }
+
+  bool GetStartImmediately() const {
+    return start_immediately_;
+  }
+
+  double GetTopKThreshold() const {
+    return top_k_threshold_;
+  }
+
+  double GetTopKChangeThreshold() const {
+    return top_k_change_threshold_;
+  }
+
+ private:
+  friend std::ostream & operator<<(std::ostream &os, const ProfilerOptions& po) {
+    os << "enabled=" << po.enabled_
+       << ", period_s=" << po.period_s_
+       << ", duration_s=" << po.duration_s_
+       << ", interval_us=" << po.interval_us_
+       << ", backoff_coefficient=" << po.backoff_coefficient_
+       << ", start_immediately=" << po.start_immediately_
+       << ", top_k_threshold=" << po.top_k_threshold_
+       << ", top_k_change_threshold=" << po.top_k_change_threshold_;
+    return os;
+  }
+
+  friend class ParsedOptions;
+
+  // Whether or not the applications should be profiled.
+  bool enabled_;
+  // Generate profile every n seconds.
+  uint32_t period_s_;
+  // Run profile for n seconds.
+  uint32_t duration_s_;
+  // Microseconds between samples.
+  uint32_t interval_us_;
+  // Coefficient to exponential backoff.
+  double backoff_coefficient_;
+  // Whether the profile should start upon app startup or be delayed by some random offset.
+  bool start_immediately_;
+  // Top K% of samples that are considered relevant when deciding if the app should be recompiled.
+  double top_k_threshold_;
+  // How much the top K% samples needs to change in order for the app to be recompiled.
+  double top_k_change_threshold_;
+};
+
+}  // namespace art
+
+
+#endif  // ART_RUNTIME_PROFILER_OPTIONS_H_
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 9724bcc..093c129 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -140,52 +140,60 @@
 TEST_F(ProxyTest, ProxyFieldHelper) {
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
-  StackHandleScope<1> hs(soa.Self());
+  StackHandleScope<9> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(
       hs.NewHandle(soa.Decode<mirror::ClassLoader*>(jclass_loader)));
 
-  mirror::Class* I = class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader);
-  mirror::Class* J = class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader);
-  ASSERT_TRUE(I != nullptr);
-  ASSERT_TRUE(J != nullptr);
-  std::vector<mirror::Class*> interfaces;
-  interfaces.push_back(I);
-  interfaces.push_back(J);
+  Handle<mirror::Class> I(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$I;", class_loader)));
+  Handle<mirror::Class> J(hs.NewHandle(
+      class_linker_->FindClass(soa.Self(), "LInterfaces$J;", class_loader)));
+  ASSERT_TRUE(I.Get() != nullptr);
+  ASSERT_TRUE(J.Get() != nullptr);
 
-  mirror::Class* proxyClass = GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces);
-  ASSERT_TRUE(proxyClass != nullptr);
+  Handle<mirror::Class> proxyClass;
+  {
+    std::vector<mirror::Class*> interfaces;
+    interfaces.push_back(I.Get());
+    interfaces.push_back(J.Get());
+    proxyClass = hs.NewHandle(GenerateProxyClass(soa, jclass_loader, "$Proxy1234", interfaces));
+  }
+
+  ASSERT_TRUE(proxyClass.Get() != nullptr);
   ASSERT_TRUE(proxyClass->IsProxyClass());
   ASSERT_TRUE(proxyClass->IsInitialized());
 
-  mirror::ObjectArray<mirror::ArtField>* instance_fields = proxyClass->GetIFields();
-  EXPECT_TRUE(instance_fields == nullptr);
+  Handle<mirror::ObjectArray<mirror::ArtField>> instance_fields(
+      hs.NewHandle(proxyClass->GetIFields()));
+  EXPECT_TRUE(instance_fields.Get() == nullptr);
 
-  mirror::ObjectArray<mirror::ArtField>* static_fields = proxyClass->GetSFields();
-  ASSERT_TRUE(static_fields != nullptr);
+  Handle<mirror::ObjectArray<mirror::ArtField>> static_fields(
+      hs.NewHandle(proxyClass->GetSFields()));
+  ASSERT_TRUE(static_fields.Get() != nullptr);
   ASSERT_EQ(2, static_fields->GetLength());
 
-  mirror::Class* interfacesFieldClass = class_linker_->FindSystemClass(soa.Self(),
-                                                                       "[Ljava/lang/Class;");
-  ASSERT_TRUE(interfacesFieldClass != nullptr);
-  mirror::Class* throwsFieldClass = class_linker_->FindSystemClass(soa.Self(),
-                                                                   "[[Ljava/lang/Class;");
-  ASSERT_TRUE(throwsFieldClass != nullptr);
+  Handle<mirror::Class> interfacesFieldClass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Class;")));
+  ASSERT_TRUE(interfacesFieldClass.Get() != nullptr);
+  Handle<mirror::Class> throwsFieldClass(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Class;")));
+  ASSERT_TRUE(throwsFieldClass.Get() != nullptr);
 
   // Test "Class[] interfaces" field.
-  FieldHelper fh(static_fields->Get(0));
-  EXPECT_EQ("interfaces", std::string(fh.GetName()));
-  EXPECT_EQ("[Ljava/lang/Class;", std::string(fh.GetTypeDescriptor()));
-  EXPECT_EQ(interfacesFieldClass, fh.GetType());
+  FieldHelper fh(hs.NewHandle(static_fields->Get(0)));
+  EXPECT_EQ("interfaces", std::string(fh.GetField()->GetName()));
+  EXPECT_EQ("[Ljava/lang/Class;", std::string(fh.GetField()->GetTypeDescriptor()));
+  EXPECT_EQ(interfacesFieldClass.Get(), fh.GetType());
   EXPECT_EQ("L$Proxy1234;", std::string(fh.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh.IsPrimitiveType());
+  EXPECT_FALSE(fh.GetField()->IsPrimitiveType());
 
   // Test "Class[][] throws" field.
   fh.ChangeField(static_fields->Get(1));
-  EXPECT_EQ("throws", std::string(fh.GetName()));
-  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fh.GetTypeDescriptor()));
-  EXPECT_EQ(throwsFieldClass, fh.GetType());
+  EXPECT_EQ("throws", std::string(fh.GetField()->GetName()));
+  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fh.GetField()->GetTypeDescriptor()));
+  EXPECT_EQ(throwsFieldClass.Get(), fh.GetType());
   EXPECT_EQ("L$Proxy1234;", std::string(fh.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh.IsPrimitiveType());
+  EXPECT_FALSE(fh.GetField()->IsPrimitiveType());
 }
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 68b10cc..89058c8 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -44,6 +44,7 @@
 #include "atomic.h"
 #include "class_linker.h"
 #include "debugger.h"
+#include "fault_handler.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
@@ -123,12 +124,7 @@
       abort_(nullptr),
       stats_enabled_(false),
       running_on_valgrind_(RUNNING_ON_VALGRIND > 0),
-      profile_(false),
-      profile_period_s_(0),
-      profile_duration_s_(0),
-      profile_interval_us_(0),
-      profile_backoff_coefficient_(0),
-      profile_start_immediately_(true),
+      profiler_started_(false),
       method_trace_(false),
       method_trace_file_size_(0),
       instrumentation_(),
@@ -166,7 +162,7 @@
     shutting_down_ = true;
   }
   // Shut down background profiler before the runtime exits.
-  if (profile_) {
+  if (profiler_started_) {
     BackgroundMethodSamplingProfiler::Shutdown();
   }
 
@@ -416,17 +412,16 @@
   }
 
   VLOG(startup) << "Runtime::Start exiting";
-
   finished_starting_ = true;
 
-  if (profile_) {
-    // User has asked for a profile using -Xprofile
+  if (profiler_options_.IsEnabled() && !profile_output_filename_.empty()) {
+    // User has asked for a profile using -Xenable-profiler.
     // Create the profile file if it doesn't exist.
     int fd = open(profile_output_filename_.c_str(), O_RDWR|O_CREAT|O_EXCL, 0660);
     if (fd >= 0) {
       close(fd);
     }
-    StartProfiler(profile_output_filename_.c_str(), "");
+    StartProfiler(profile_output_filename_.c_str());
   }
 
   return true;
@@ -664,15 +659,9 @@
   method_trace_file_ = options->method_trace_file_;
   method_trace_file_size_ = options->method_trace_file_size_;
 
-  // Extract the profile options.
-  // TODO: move into a Trace options struct?
-  profile_period_s_ = options->profile_period_s_;
-  profile_duration_s_ = options->profile_duration_s_;
-  profile_interval_us_ = options->profile_interval_us_;
-  profile_backoff_coefficient_ = options->profile_backoff_coefficient_;
-  profile_start_immediately_ = options->profile_start_immediately_;
-  profile_ = options->profile_;
   profile_output_filename_ = options->profile_output_filename_;
+  profiler_options_ = options->profiler_options_;
+
   // TODO: move this to just be an Trace::Start argument
   Trace::SetDefaultClockSource(options->profile_clock_source_);
 
@@ -1125,9 +1114,10 @@
   method_verifiers_.erase(it);
 }
 
-void Runtime::StartProfiler(const char* appDir, const char* procName) {
-  BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir,
-      procName, profile_interval_us_, profile_backoff_coefficient_, profile_start_immediately_);
+void Runtime::StartProfiler(const char* profile_output_filename) {
+  profile_output_filename_ = profile_output_filename;
+  profiler_started_ =
+    BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
 }
 
 // Transaction support.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index afb5aa7..8776a59 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -25,25 +25,21 @@
 #include <utility>
 #include <vector>
 
-#include "base/macros.h"
-#include "base/stringpiece.h"
-#include "gc/collector_type.h"
-#include "gc/heap.h"
-#include "globals.h"
-#include "instruction_set.h"
 #include "instrumentation.h"
+#include "instruction_set.h"
 #include "jobject_comparator.h"
 #include "object_callbacks.h"
+#include "offsets.h"
+#include "profiler_options.h"
 #include "quick/quick_method_frame_info.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
-#include "fault_handler.h"
 
 namespace art {
 
 namespace gc {
   class Heap;
-}
+}  // namespace gc
 namespace mirror {
   class ArtMethod;
   class ClassLoader;
@@ -64,7 +60,10 @@
 class JavaVMExt;
 class MonitorList;
 class MonitorPool;
+class NullPointerHandler;
 class SignalCatcher;
+class StackOverflowHandler;
+class SuspensionHandler;
 class ThreadList;
 class Trace;
 class Transaction;
@@ -114,6 +113,10 @@
     return image_compiler_options_;
   }
 
+  const ProfilerOptions& GetProfilerOptions() const {
+    return profiler_options_;
+  }
+
   // Starts a runtime, which may cause threads to be started and code to run.
   bool Start() UNLOCK_FUNCTION(Locks::mutator_lock_);
 
@@ -386,7 +389,7 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
-  void StartProfiler(const char* appDir, const char* procName);
+  void StartProfiler(const char* profile_output_filename);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -559,15 +562,9 @@
 
   const bool running_on_valgrind_;
 
-  // Runtime profile support.
-  bool profile_;
   std::string profile_output_filename_;
-  uint32_t profile_period_s_;           // Generate profile every n seconds.
-  uint32_t profile_duration_s_;         // Run profile for n seconds.
-  uint32_t profile_interval_us_;        // Microseconds between samples.
-  double profile_backoff_coefficient_;  // Coefficient to exponential backoff.
-  bool profile_start_immediately_;      // Whether the profile should start upon app
-                                        // startup or be delayed by some random offset.
+  ProfilerOptions profiler_options_;
+  bool profiler_started_;
 
   bool method_trace_;
   std::string method_trace_file_;
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index d56495e..7ce68c6 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_SCOPED_THREAD_STATE_CHANGE_H_
 
 #include "base/casts.h"
+#include "jni_internal-inl.h"
 #include "thread-inl.h"
 #include "verify_object.h"
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index fc886d5..b1180bd 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -21,9 +21,11 @@
 
 #include <pthread.h>
 
+#include "cutils/atomic-inline.h"
+
 #include "base/casts.h"
 #include "base/mutex-inl.h"
-#include "cutils/atomic-inline.h"
+#include "gc/heap.h"
 #include "jni_internal.h"
 
 namespace art {
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 3645ed2..a03b389 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -115,48 +115,48 @@
   // Lookup fields.
   mirror::ArtField* booleanField = h_klass->FindDeclaredStaticField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
-  ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
+  ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_klass.Get()), false);
 
   mirror::ArtField* byteField = h_klass->FindDeclaredStaticField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
-  ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
+  ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_klass.Get()), 0);
 
   mirror::ArtField* charField = h_klass->FindDeclaredStaticField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
-  ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
+  ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_klass.Get()), 0u);
 
   mirror::ArtField* shortField = h_klass->FindDeclaredStaticField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
-  ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
+  ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_klass.Get()), 0);
 
   mirror::ArtField* intField = h_klass->FindDeclaredStaticField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
-  ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_klass.Get()), 0);
 
   mirror::ArtField* longField = h_klass->FindDeclaredStaticField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
-  ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
+  ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_klass.Get()), static_cast<int64_t>(0));
 
   mirror::ArtField* floatField = h_klass->FindDeclaredStaticField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
-  ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
+  ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_EQ(floatField->GetFloat(h_klass.Get()), static_cast<float>(0.0f));
 
   mirror::ArtField* doubleField = h_klass->FindDeclaredStaticField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
-  ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
+  ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_EQ(doubleField->GetDouble(h_klass.Get()), static_cast<double>(0.0));
 
   mirror::ArtField* objectField = h_klass->FindDeclaredStaticField("objectField",
                                                                       "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
-  ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+  ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   ASSERT_EQ(objectField->GetObject(h_klass.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
@@ -214,48 +214,48 @@
   // Lookup fields.
   mirror::ArtField* booleanField = h_klass->FindDeclaredInstanceField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
-  ASSERT_EQ(FieldHelper(booleanField).GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
+  ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_instance.Get()), false);
 
   mirror::ArtField* byteField = h_klass->FindDeclaredInstanceField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
-  ASSERT_EQ(FieldHelper(byteField).GetTypeAsPrimitiveType(), Primitive::kPrimByte);
+  ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_instance.Get()), 0);
 
   mirror::ArtField* charField = h_klass->FindDeclaredInstanceField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
-  ASSERT_EQ(FieldHelper(charField).GetTypeAsPrimitiveType(), Primitive::kPrimChar);
+  ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_instance.Get()), 0u);
 
   mirror::ArtField* shortField = h_klass->FindDeclaredInstanceField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
-  ASSERT_EQ(FieldHelper(shortField).GetTypeAsPrimitiveType(), Primitive::kPrimShort);
+  ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_instance.Get()), 0);
 
   mirror::ArtField* intField = h_klass->FindDeclaredInstanceField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
-  ASSERT_EQ(FieldHelper(intField).GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_instance.Get()), 0);
 
   mirror::ArtField* longField = h_klass->FindDeclaredInstanceField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
-  ASSERT_EQ(FieldHelper(longField).GetTypeAsPrimitiveType(), Primitive::kPrimLong);
+  ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_instance.Get()), static_cast<int64_t>(0));
 
   mirror::ArtField* floatField = h_klass->FindDeclaredInstanceField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
-  ASSERT_EQ(FieldHelper(floatField).GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
+  ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_EQ(floatField->GetFloat(h_instance.Get()), static_cast<float>(0.0f));
 
   mirror::ArtField* doubleField = h_klass->FindDeclaredInstanceField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
-  ASSERT_EQ(FieldHelper(doubleField).GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
+  ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_EQ(doubleField->GetDouble(h_instance.Get()), static_cast<double>(0.0));
 
   mirror::ArtField* objectField = h_klass->FindDeclaredInstanceField("objectField",
                                                                         "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
-  ASSERT_EQ(FieldHelper(objectField).GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+  ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   ASSERT_EQ(objectField->GetObject(h_instance.Get()), nullptr);
 
   // Create a java.lang.Object instance to set objectField.
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f562252..ef2047b 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -290,15 +290,15 @@
   if (f == NULL) {
     return "null";
   }
-  FieldHelper fh(f);
   std::string result;
   if (with_type) {
-    result += PrettyDescriptor(fh.GetTypeDescriptor());
+    result += PrettyDescriptor(f->GetTypeDescriptor());
     result += ' ';
   }
-  result += PrettyDescriptor(fh.GetDeclaringClassDescriptor());
+  StackHandleScope<1> hs(Thread::Current());
+  result += PrettyDescriptor(FieldHelper(hs.NewHandle(f)).GetDeclaringClassDescriptor());
   result += '.';
-  result += fh.GetName();
+  result += f->GetName();
   return result;
 }
 
@@ -552,6 +552,18 @@
   }
 }
 
+std::string PrintableChar(uint16_t ch) {
+  std::string result;
+  result += '\'';
+  if (NeedsEscaping(ch)) {
+    StringAppendF(&result, "\\u%04x", ch);
+  } else {
+    result += ch;
+  }
+  result += '\'';
+  return result;
+}
+
 std::string PrintableString(const std::string& utf) {
   std::string result;
   result += '"';
diff --git a/runtime/utils.h b/runtime/utils.h
index 4a9236a..6a4198f 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -23,11 +23,14 @@
 #include <vector>
 
 #include "base/logging.h"
-#include "base/stringprintf.h"
 #include "globals.h"
 #include "instruction_set.h"
 #include "primitive.h"
 
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 namespace art {
 
 class DexFile;
@@ -201,17 +204,7 @@
   return (ch < ' ' || ch > '~');
 }
 
-static inline std::string PrintableChar(uint16_t ch) {
-  std::string result;
-  result += '\'';
-  if (NeedsEscaping(ch)) {
-    StringAppendF(&result, "\\u%04x", ch);
-  } else {
-    result += ch;
-  }
-  result += '\'';
-  return result;
-}
+std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
 // Java escapes are used for non-ASCII characters.
diff --git a/runtime/verifier/instruction_flags.h b/runtime/verifier/instruction_flags.h
index e50ba13..f8abca0 100644
--- a/runtime/verifier/instruction_flags.h
+++ b/runtime/verifier/instruction_flags.h
@@ -17,11 +17,11 @@
 #ifndef ART_RUNTIME_VERIFIER_INSTRUCTION_FLAGS_H_
 #define ART_RUNTIME_VERIFIER_INSTRUCTION_FLAGS_H_
 
-#include "base/logging.h"
-
 #include <stdint.h>
 #include <string>
 
+#include "base/logging.h"
+
 namespace art {
 namespace verifier {
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b5c07aa..c7bb20c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -507,6 +507,11 @@
   return *failure_message;
 }
 
+std::ostream& MethodVerifier::LogVerifyInfo() {
+  return info_messages_ << "VFY: " << PrettyMethod(dex_method_idx_, *dex_file_)
+                        << '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
+}
+
 void MethodVerifier::PrependToLastFailMessage(std::string prepend) {
   size_t failure_num = failure_messages_.size();
   DCHECK_NE(failure_num, 0U);
@@ -3513,13 +3518,17 @@
   }
   const RegType* field_type = nullptr;
   if (field != NULL) {
-    FieldHelper fh(field);
-    mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+    Thread* self = Thread::Current();
+    mirror::Class* field_type_class;
+    {
+      StackHandleScope<1> hs(self);
+      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+      field_type_class = FieldHelper(h_field).GetType(can_load_classes_);
+    }
     if (field_type_class != nullptr) {
-      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+      field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                          field_type_class->CannotBeAssignedFromOtherTypes());
     } else {
-      Thread* self = Thread::Current();
       DCHECK(!can_load_classes_ || self->IsExceptionPending());
       self->ClearException();
     }
@@ -3580,10 +3589,15 @@
                                       << " from other class " << GetDeclaringClass();
       return;
     }
-    FieldHelper fh(field);
-    mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+    mirror::Class* field_type_class;
+    {
+      StackHandleScope<1> hs(Thread::Current());
+      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+      FieldHelper fh(h_field);
+      field_type_class = fh.GetType(can_load_classes_);
+    }
     if (field_type_class != nullptr) {
-      field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+      field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                          field_type_class->CannotBeAssignedFromOtherTypes());
     } else {
       Thread* self = Thread::Current();
@@ -3643,18 +3657,23 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
   }
-  FieldHelper fh(field);
-  mirror::Class* field_type_class = fh.GetType(can_load_classes_);
+  mirror::Class* field_type_class;
+  {
+    StackHandleScope<1> hs(Thread::Current());
+    HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
+    FieldHelper fh(h_field);
+    field_type_class = fh.GetType(can_load_classes_);
+  }
   const RegType* field_type;
   if (field_type_class != nullptr) {
-    field_type = &reg_types_.FromClass(fh.GetTypeDescriptor(), field_type_class,
+    field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                        field_type_class->CannotBeAssignedFromOtherTypes());
   } else {
     Thread* self = Thread::Current();
     DCHECK(!can_load_classes_ || self->IsExceptionPending());
     self->ClearException();
     field_type = &reg_types_.FromDescriptor(field->GetDeclaringClass()->GetClassLoader(),
-                                            fh.GetTypeDescriptor(), false);
+                                            field->GetTypeDescriptor(), false);
   }
   DCHECK(field_type != nullptr);
   const uint32_t vregA = inst->VRegA_22c();
@@ -3698,7 +3717,7 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
   }
-  const char* descriptor = FieldHelper(field).GetTypeDescriptor();
+  const char* descriptor = field->GetTypeDescriptor();
   mirror::ClassLoader* loader = field->GetDeclaringClass()->GetClassLoader();
   const RegType& field_type = reg_types_.FromDescriptor(loader, descriptor, false);
   if (field != NULL) {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index a23e80d..451c9e2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -29,9 +29,8 @@
 #include "dex_instruction.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
-#include "mirror/object.h"
 #include "reg_type.h"
-#include "reg_type_cache-inl.h"
+#include "reg_type_cache.h"
 #include "register_line.h"
 #include "safe_map.h"
 
@@ -170,10 +169,7 @@
   std::ostream& Fail(VerifyError error);
 
   // Log for verification information.
-  std::ostream& LogVerifyInfo() {
-    return info_messages_ << "VFY: " << PrettyMethod(dex_method_idx_, *dex_file_)
-                          << '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
-  }
+  std::ostream& LogVerifyInfo();
 
   // Dump the failures encountered by the verifier.
   std::ostream& DumpFailures(std::ostream& os);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index a23b8c4..64001d3 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -17,18 +17,18 @@
 #ifndef ART_RUNTIME_VERIFIER_REG_TYPE_H_
 #define ART_RUNTIME_VERIFIER_REG_TYPE_H_
 
-#include "base/macros.h"
-#include "globals.h"
-#include "object_callbacks.h"
-#include "primitive.h"
-
-#include "jni.h"
-
 #include <limits>
 #include <stdint.h>
 #include <set>
 #include <string>
 
+#include "jni.h"
+
+#include "base/macros.h"
+#include "globals.h"
+#include "object_callbacks.h"
+#include "primitive.h"
+
 namespace art {
 namespace mirror {
 class Class;
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index b3a2847..0989cd0 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -18,7 +18,9 @@
 #define ART_RUNTIME_VERIFIER_REGISTER_LINE_INL_H_
 
 #include "register_line.h"
+
 #include "method_verifier.h"
+#include "reg_type_cache-inl.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index a3e3e3b..d21f39b 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -16,6 +16,7 @@
 
 #include "register_line.h"
 
+#include "base/stringprintf.h"
 #include "dex_instruction-inl.h"
 #include "method_verifier.h"
 #include "register_line-inl.h"
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index dade203..57c7517 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -51,7 +51,7 @@
 class RegisterLine {
  public:
   static RegisterLine* Create(size_t num_regs, MethodVerifier* verifier) {
-    uint8_t* memory = new uint8_t[sizeof(RegisterLine) + (num_regs * sizeof(uint16_t))];
+    void* memory = operator new(sizeof(RegisterLine) + (num_regs * sizeof(uint16_t)));
     RegisterLine* rl = new (memory) RegisterLine(num_regs, verifier);
     return rl;
   }
diff --git a/test/700-LoadArgRegs/expected.txt b/test/700-LoadArgRegs/expected.txt
new file mode 100644
index 0000000..4908e5b
--- /dev/null
+++ b/test/700-LoadArgRegs/expected.txt
@@ -0,0 +1,75 @@
+11
+21, 22
+31, 32, 33
+41, 42, 43, 44
+51, 52, 53, 54, 55
+61, 62, 63, 64, 65, 66
+71, 72, 73, 74, 75, 76, 77
+81, 82, 83, 84, 85, 86, 87, 88
+91, 92, 93, 94, 95, 96, 97, 98, 99
+101, 102, 103, 104, 105, 106, 107, 108, 109, 110
+111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111
+121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212
+61, 62, 63, 64, 65, 66
+true
+true, false
+true, false, true
+true, false, true, false
+true, false, true, false, true
+true, false, true, false, true, false
+true, false, true, false, true, false, true
+a
+a, b
+a, b, c
+a, b, c, d
+a, b, c, d, e
+a, b, c, d, e, f
+a, b, c, d, e, f, g
+11
+11, b
+11, b, true
+11, b, true, 12
+11, b, true, 12, e
+11, b, true, 12, e, false
+11, b, true, 12, e, false, 13
+1.1
+2.1, 2.2
+3.1, 3.2, 3.3
+4.1, 4.2, 4.3, 4.4
+5.1, 5.2, 5.3, 5.4, 5.5
+6.1, 6.2, 6.3, 6.4, 6.5, 6.6
+7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
+8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8
+9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9
+10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 10.1
+1.01
+2.01, 2.02
+3.01, 3.02, 3.03
+4.01, 4.02, 4.03, 4.04
+5.01, 5.02, 5.03, 5.04, 5.05
+6.01, 6.02, 6.03, 6.04, 6.05, 6.06
+7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07
+8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08
+9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+-1.1, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01
+100011
+100021, 100022
+100031, 100032, 100033
+100041, 100042, 100043, 100044
+100051, 100052, 100053, 100054, 100055
+100061, 100062, 100063, 100064, 100065, 100066
+100071, 100072, 100073, 100074, 100075, 100076, 100077
+100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088
+100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099
+100100100100011
+-11
+-21, -22
+-31, -32, -33
+-41, -42, -43, -44
+-51, -52, -53, -54, -55
+-61, -62, -63, -64, -65, -66
+-71, -72, -73, -74, -75, -76, -77
+-81, -82, -83, -84, -85, -86, -87, -88
+-91, -92, -93, -94, -95, -96, -97, -98, -99
+-1, -91, -92, -93, -94, -95, -96, -97, -98, -99
diff --git a/test/700-LoadArgRegs/info.txt b/test/700-LoadArgRegs/info.txt
new file mode 100644
index 0000000..dcaa46e
--- /dev/null
+++ b/test/700-LoadArgRegs/info.txt
@@ -0,0 +1 @@
+Simple tests for passing int/float/long/double arguments.
diff --git a/test/700-LoadArgRegs/src/Main.java b/test/700-LoadArgRegs/src/Main.java
new file mode 100644
index 0000000..281ab16
--- /dev/null
+++ b/test/700-LoadArgRegs/src/Main.java
@@ -0,0 +1,288 @@
+public class Main {
+
+  static void testI1(int p1) {
+     System.out.println(p1);
+  }
+  static void testI2(int p1, int p2) {
+     System.out.println(p1+", "+p2);
+  }
+  static void testI3(int p1, int p2, int p3) {
+     System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testI4(int p1, int p2, int p3, int p4) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testI5(int p1, int p2, int p3, int p4, int p5) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testI6(int p1, int p2, int p3, int p4, int p5, int p6) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testI7(int p1, int p2, int p3, int p4, int p5, int p6, int p7) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+  static void testI8(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+  }
+  static void testI9(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+  }
+  static void testI10(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+  }
+  static void testI11(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11);
+  }
+  static void testI12(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11, int p12) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11+", "+p12);
+  }
+  void testI6_nonstatic(int p1, int p2, int p3, int p4, int p5, int p6) {
+     System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+
+  static void testB1(boolean p1) {
+    System.out.println(p1);
+  }
+  static void testB2(boolean p1, boolean p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testB3(boolean p1, boolean p2, boolean p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testB4(boolean p1, boolean p2, boolean p3, boolean p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testB5(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testB6(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testB7(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6, boolean p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testO1(Object p1) {
+    System.out.println(p1);
+  }
+  static void testO2(Object p1, Object p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testO3(Object p1, Object p2, Object p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testO4(Object p1, Object p2, Object p3, Object p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testO5(Object p1, Object p2, Object p3, Object p4, Object p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testO6(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testO7(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6, Object p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testIOB1(int p1) {
+    System.out.println(p1);
+  }
+  static void testIOB2(int p1, Object p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testIOB3(int p1, Object p2, boolean p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testIOB4(int p1, Object p2, boolean p3, int p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testIOB5(int p1, Object p2, boolean p3, int p4, Object p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testIOB6(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testIOB7(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6, int p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+
+  static void testF1(float p1) {
+    System.out.println(p1);
+  }
+  static void testF2(float p1, float p2) {
+    System.out.println(p1+", "+p2);
+  }
+  static void testF3(float p1, float p2, float p3) {
+    System.out.println(p1+", "+p2+", "+p3);
+  }
+  static void testF4(float p1, float p2, float p3, float p4) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4);
+  }
+  static void testF5(float p1, float p2, float p3, float p4, float p5) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+  }
+  static void testF6(float p1, float p2, float p3, float p4, float p5, float p6) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+  }
+  static void testF7(float p1, float p2, float p3, float p4, float p5, float p6, float p7) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+  }
+  static void testF8(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+  }
+  static void testF9(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+  }
+  static void testF10(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9, float p10) {
+    System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+  }
+
+  static void testD1 (double p1) { System.out.println(p1); }
+  static void testD2 (double p1, double p2) { System.out.println(p1+", "+p2); }
+  static void testD3 (double p1, double p2, double p3) { System.out.println(p1+", "+p2+", "+p3); }
+  static void testD4 (double p1, double p2, double p3, double p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+  static void testD5 (double p1, double p2, double p3, double p4, double p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+  static void testD6 (double p1, double p2, double p3, double p4, double p5, double p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+  static void testD7 (double p1, double p2, double p3, double p4, double p5, double p6, double p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+  static void testD8 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+  static void testD9 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+  static void testD9f (float p0, double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+  static void testD10(double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9, double p10) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); }
+
+  static void testI() {
+    testI1(11);
+    testI2(21, 22);
+    testI3(31, 32, 33);
+    testI4(41, 42, 43, 44);
+    testI5(51, 52, 53, 54, 55);
+    testI6(61, 62, 63, 64, 65, 66);
+    testI7(71, 72, 73, 74, 75, 76, 77);
+    testI8(81, 82, 83, 84, 85, 86, 87, 88);
+    testI9(91, 92, 93, 94, 95, 96, 97, 98, 99);
+    testI10(101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+    testI11(111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111);
+    testI12(121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212);
+    new Main().testI6_nonstatic(61, 62, 63, 64, 65, 66);
+  }
+
+  static void testB() {
+    testB1(true);
+    testB2(true, false);
+    testB3(true, false, true);
+    testB4(true, false, true, false);
+    testB5(true, false, true, false, true);
+    testB6(true, false, true, false, true, false);
+    testB7(true, false, true, false, true, false, true);
+  }
+
+  static void testO() {
+    testO1("a");
+    testO2("a", "b");
+    testO3("a", "b", "c");
+    testO4("a", "b", "c", "d");
+    testO5("a", "b", "c", "d", "e");
+    testO6("a", "b", "c", "d", "e", "f");
+    testO7("a", "b", "c", "d", "e", "f", "g");
+  }
+
+  static void testIOB() {
+    testIOB1(11);
+    testIOB2(11, "b");
+    testIOB3(11, "b", true);
+    testIOB4(11, "b", true, 12);
+    testIOB5(11, "b", true, 12, "e");
+    testIOB6(11, "b", true, 12, "e", false);
+    testIOB7(11, "b", true, 12, "e", false, 13);
+  }
+
+  static void testF() {
+    testF1(1.1f);
+    testF2(2.1f, 2.2f);
+    testF3(3.1f, 3.2f, 3.3f);
+    testF4(4.1f, 4.2f, 4.3f, 4.4f);
+    testF5(5.1f, 5.2f, 5.3f, 5.4f, 5.5f);
+    testF6(6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f);
+    testF7(7.1f, 7.2f, 7.3f, 7.4f, 7.5f, 7.6f, 7.7f);
+    testF8(8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f);
+    testF9(9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f);
+    testF10(10.1f, 10.2f, 10.3f, 10.4f, 10.5f, 10.6f, 10.7f, 10.8f, 10.9f, 10.1f);
+  }
+
+  static void testD() {
+
+    testD1(1.01);
+    testD2(2.01, 2.02);
+    testD3(3.01, 3.02, 3.03);
+    testD4(4.01, 4.02, 4.03, 4.04);
+    testD5(5.01, 5.02, 5.03, 5.04, 5.05);
+    testD6(6.01, 6.02, 6.03, 6.04, 6.05, 6.06);
+    testD7(7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07);
+    testD8(8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08);
+    testD9(9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+    testD9f(-1.1f, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+
+    // TODO: 10.01 as first arg fails: 10.009994506835938
+    testD10(10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01);
+  }
+
+  static void testL1(long p1) { System.out.println(p1); }
+//  static void testL2x(long p1, long p2) { testL2(p1+p2, p2); }  // TODO(64) GenAddLong 64BIT_TEMP
+  static void testL2(long p1, long p2) { System.out.println(p1+", "+p2); }
+  static void testL3(long p1, long p2, long p3) { System.out.println(p1+", "+p2+", "+p3); }
+  static void testL4(long p1, long p2, long p3, long p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+  static void testL5(long p1, long p2, long p3, long p4, long p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+  static void testL6(long p1, long p2, long p3, long p4, long p5, long p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+  static void testL7(long p1, long p2, long p3, long p4, long p5, long p6, long p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+  static void testL8(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+  static void testL9(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+  static void testL9i(int p0, long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+  static void testL() {
+//        testL2x(100021, 100022);
+        testL1(100011);
+        testL2(100021, 100022);
+        testL3(100031, 100032, 100033);
+        testL4(100041, 100042, 100043, 100044);
+        testL5(100051, 100052, 100053, 100054, 100055);
+        testL6(100061, 100062, 100063, 100064, 100065, 100066);
+        testL7(100071, 100072, 100073, 100074, 100075, 100076, 100077);
+        testL8(100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088);
+        testL9(100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099);
+  }
+
+  static void testLL() {
+        testL1(100100100100011L);
+
+        testL1(-11L);
+        testL2(-21L, -22L);
+        testL3(-31L, -32L, -33L);
+        testL4(-41L, -42L, -43L, -44L);
+        testL5(-51L, -52L, -53L, -54L, -55L);
+        testL6(-61L, -62L, -63L, -64L, -65L, -66L);
+        testL7(-71L, -72L, -73L, -74L, -75L, -76L, -77L);
+        testL8(-81L, -82L, -83L, -84L, -85L, -86L, -87L, -88L);
+        testL9(-91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+        testL9i(-1, -91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+
+        // TODO(64) GenAddLong 64BIT_TEMP
+//        testL2x(100100100100011L, 1L);
+//        testL2x(100100100100011L, 100100100100011L);
+  }
+
+  static public void main(String[] args) throws Exception {
+
+    testI();
+    testB();
+    testO();
+    testIOB();
+    testF();
+
+    testD();
+
+    testL();
+
+    testLL();
+
+  }
+}