Use the thumb2 assembler for the optimizing compiler.

Change-Id: I2b058f4433504dc3299c06f5cb0b5ab12f34aa82
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index ed07129..470ddfd 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -23,7 +23,8 @@
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
 ART_TEST_KNOWN_BROKEN := \
   test-art-host-oat-optimizing-SignalTest64 \
-  test-art-host-oat-optimizing-SignalTest32
+  test-art-host-oat-optimizing-SignalTest32 \
+  test-art-target-oat-optimizing-SignalTest32
 
 # List of known failing tests that when executed won't cause test execution to not finish.
 # The test name must be the full rule name such as test-art-host-oat-optimizing-HelloWorld64.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 73c2d48..9d17fb1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -97,7 +97,8 @@
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
-      move_resolver_(graph->GetArena(), this) {}
+      move_resolver_(graph->GetArena(), this),
+      assembler_(true) {}
 
 size_t CodeGeneratorARM::FrameEntrySpillSize() const {
   return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -948,9 +949,11 @@
              ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
       __ b(&less, LT);
       __ b(&greater, GT);
+      // Do LoadImmediate before any `cmp`, as LoadImmediate might affect
+      // the status flags.
+      __ LoadImmediate(output, 0);
       __ cmp(left.AsRegisterPairLow(),
              ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
-      __ LoadImmediate(output, 0);
       __ b(&done, EQ);
       __ b(&less, CC);
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1b5974f..3cc16aa 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_arm32.h"
+#include "utils/arm/assembler_thumb2.h"
 
 namespace art {
 namespace arm {
@@ -172,7 +172,7 @@
   }
 
   virtual InstructionSet GetInstructionSet() const OVERRIDE {
-    return InstructionSet::kArm;
+    return InstructionSet::kThumb2;
   }
 
  private:
@@ -184,7 +184,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Arm32Assembler assembler_;
+  Thumb2Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7ec0c84..bfdc30f 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -48,10 +48,17 @@
 };
 
 #if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
-static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
+static void Run(const InternalCodeAllocator& allocator,
+                const CodeGenerator& codegen,
+                bool has_result,
+                int32_t expected) {
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  if (codegen.GetInstructionSet() == kThumb2) {
+    // For thumb we need the bottom bit set.
+    f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+  }
   int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
@@ -71,19 +78,19 @@
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86);
   codegen->CompileBaseline(&allocator);
 #if defined(__i386__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kArm);
   codegen->CompileBaseline(&allocator);
 #if defined(__arm__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kX86_64);
   codegen->CompileBaseline(&allocator);
 #if defined(__x86_64__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b621e51..8a5077b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -79,13 +79,14 @@
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
-  // The optimizing compiler currently does not have a Thumb2 assembler.
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
+  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
+  // overflow checks) assume thumb2.
+  if (instruction_set == kArm) {
+    instruction_set = kThumb2;
   }
 
   // Do not attempt to compile on architectures we do not support.
-  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kArm) {
+  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kThumb2) {
     return nullptr;
   }
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 8a34928..5fe8246 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -169,9 +169,7 @@
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
 
     case MOV:
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
-        return true;
-      }
+      // TODO: Support less than or equal to 12bits.
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
     case MVN:
     default:
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 604f59e..2ce4fd2 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -877,6 +877,7 @@
            rn_shift = 8;
         } else {
           thumb_opcode = 0b1010;
+          rd = rn;
           rn = so.GetRegister();
         }
 
@@ -1470,6 +1471,7 @@
     // branch the size may change if it so happens that other branches change size that change
     // the distance to the target and that distance puts this branch over the limit for 16 bits.
     if (size == Branch::k16Bit) {
+      DCHECK(!force_32bit_branches_);
       Emit16(0);          // Space for a 16 bit branch.
     } else {
       Emit32(0);            // Space for a 32 bit branch.
@@ -1477,7 +1479,7 @@
   } else {
     // Branch is to an unbound label.  Emit space for it.
     uint16_t branch_id = AddBranch(branch_type, pc, cond);    // Unresolved branch.
-    if (force_32bit_) {
+    if (force_32bit_branches_ || force_32bit_) {
       Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
       Emit16(0);                   // another 16 bits.
     } else {
@@ -2073,6 +2075,7 @@
     uint32_t branch_location = branch->GetLocation();
     uint16_t next = buffer_.Load<uint16_t>(branch_location);       // Get next in chain.
     if (changed) {
+      DCHECK(!force_32bit_branches_);
       MakeHoleForBranch(branch->GetLocation(), 2);
       if (branch->IsCompareAndBranch()) {
         // A cbz/cbnz instruction has changed size.  There is no valid encoding for
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5f24e4e..f1ae3d8 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -29,10 +29,13 @@
 namespace art {
 namespace arm {
 
-
 class Thumb2Assembler FINAL : public ArmAssembler {
  public:
-  Thumb2Assembler() : force_32bit_(false), it_cond_index_(kNoItCondition), next_condition_(AL) {
+  Thumb2Assembler(bool force_32bit_branches = false)
+      : force_32bit_branches_(force_32bit_branches),
+        force_32bit_(false),
+        it_cond_index_(kNoItCondition),
+        next_condition_(AL) {
   }
 
   virtual ~Thumb2Assembler() {
@@ -49,6 +52,10 @@
     return force_32bit_;
   }
 
+  bool IsForced32BitBranches() const {
+    return force_32bit_branches_;
+  }
+
   void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
     EmitBranches();
     Assembler::FinalizeInstructions(region);
@@ -412,7 +419,8 @@
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, bool setcc = false);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm, bool setcc = false);
 
-  bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
+  bool force_32bit_branches_; // Force the assembler to use 32 bit branch instructions.
+  bool force_32bit_;          // Force the assembler to use 32 bit thumb2 instructions.
 
   // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
   Condition it_conditions_[4];
@@ -605,6 +613,9 @@
    private:
     // Calculate the size of the branch instruction based on its type and offset.
     Size CalculateSize() const {
+      if (assembler_->IsForced32BitBranches()) {
+        return k32Bit;
+      }
       if (target_ == kUnresolved) {
         if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
           return k32Bit;