ARM64: Enable implicit suspend checks and madvise()...

... away the alternate signal stack in the entrypoint.

Also fix 706-checker-scheduler to check the correct LDR.
During development of implicit suspend checks, the LDR
was emitted in the loop header but the final version
emits it at the back edge.

Also make `GenerateSuspendCheck()` consistently emit
the branch to `successor` if not null.

Test: run-gtests.sh
Test: testrunner.py --target --64 --optimizing
Bug: 38383823
Bug: 209235730
Change-Id: Ie3e2513d1b22522219f0f8a4ab8bcfd8921e8cdf
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1002f2c..cc57f6a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1987,6 +1987,9 @@
   if (codegen_->CanUseImplicitSuspendCheck()) {
     __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+    if (successor != nullptr) {
+      __ B(codegen_->GetLabelOf(successor));
+    }
     return;
   }
 
@@ -3583,9 +3586,7 @@
   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
-    if (!codegen_->CanUseImplicitSuspendCheck()) {
-      return;  // `GenerateSuspendCheck()` emitted the jump.
-    }
+    return;  // `GenerateSuspendCheck()` emitted the jump.
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 48b2c93..b8271d8 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -833,8 +833,7 @@
     // Set the compilation target's implicit checks options.
     switch (compiler_options_->GetInstructionSet()) {
       case InstructionSet::kArm64:
-        // TODO: Investigate implicit suspend check regressions. Bug: 209235730, 213121241.
-        compiler_options_->implicit_suspend_checks_ = false;
+        compiler_options_->implicit_suspend_checks_ = true;
         FALLTHROUGH_INTENDED;
       case InstructionSet::kArm:
       case InstructionSet::kThumb2:
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 6145d9a..a3db9f6 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1286,10 +1286,11 @@
     bx     lr
 END art_quick_test_suspend
 
+    .extern artImplicitSuspendFromCode
 ENTRY art_quick_implicit_suspend
     mov    r0, rSELF
     SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
-    bl     artTestSuspendFromCode             @ (Thread*)
+    bl     artImplicitSuspendFromCode         @ (Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     REFRESH_MARKING_REGISTER
     bx     lr
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 3683fdd..7fb6ff0 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1624,12 +1624,12 @@
     /*
      * Redirection point from implicit suspend check fault handler.
      */
-    .extern artTestSuspendFromCode
+    .extern artImplicitSuspendFromCode
 ENTRY art_quick_implicit_suspend
                                         // Save callee saves for stack crawl.
     SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
     mov    x0, xSELF
-    bl     artTestSuspendFromCode       // (Thread*)
+    bl     artImplicitSuspendFromCode   // (Thread*)
     RESTORE_SAVE_EVERYTHING_FRAME
     REFRESH_MARKING_REGISTER
     REFRESH_SUSPEND_CHECK_REGISTER
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index d8c1ee2..93422cf 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -22,11 +22,17 @@
 namespace art {
 
 extern "C" void artTestSuspendFromCode(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Called when suspend count check value is 0 and thread->suspend_count_ != 0
+  // Called when there is a pending checkpoint or suspend request.
   ScopedQuickEntrypointChecks sqec(self);
   self->CheckSuspend();
 }
 
+extern "C" void artImplicitSuspendFromCode(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Called when there is a pending checkpoint or suspend request.
+  ScopedQuickEntrypointChecks sqec(self);
+  self->CheckSuspend(/*implicit=*/ true);
+}
+
 extern "C" void artCompileOptimized(ArtMethod* method, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
diff --git a/runtime/oat.h b/runtime/oat.h
index e44187e..36ef459 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: ARM64: Disable implicit suspend checks.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '8', '\0' } };
+  // Last oat version changed reason: ARM64: Enable implicit suspend checks; madvise().
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '9', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7048eaa..54e9d38 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1663,8 +1663,7 @@
   // Change the implicit checks flags based on runtime architecture.
   switch (kRuntimeISA) {
     case InstructionSet::kArm64:
-      // TODO: Investigate implicit suspend check regressions. Bug: 209235730, 213121241.
-      implicit_suspend_checks_ = false;
+      implicit_suspend_checks_ = true;
       FALLTHROUGH_INTENDED;
     case InstructionSet::kArm:
     case InstructionSet::kThumb2:
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index fc8e6cb..3c1e7a0 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -46,7 +46,7 @@
   PoisonObjectPointers();
 }
 
-inline void Thread::CheckSuspend() {
+inline void Thread::CheckSuspend(bool implicit) {
   DCHECK_EQ(Thread::Current(), this);
   while (true) {
     StateAndFlags state_and_flags = GetStateAndFlags(std::memory_order_relaxed);
@@ -55,12 +55,18 @@
     } else if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) {
       RunCheckpointFunction();
     } else if (state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) {
-      FullSuspendCheck();
+      FullSuspendCheck(implicit);
+      implicit = false;  // We do not need to `MadviseAwayAlternateSignalStack()` anymore.
     } else {
       DCHECK(state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest));
       RunEmptyCheckpoint();
     }
   }
+  if (implicit) {
+    // For implicit suspend check we want to `madvise()` away
+    // the alternate signal stack to avoid wasting memory.
+    MadviseAwayAlternateSignalStack();
+  }
 }
 
 inline void Thread::CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mutex) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7988f88..25d493f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1861,12 +1861,17 @@
   }
 }
 
-void Thread::FullSuspendCheck() {
+void Thread::FullSuspendCheck(bool implicit) {
   ScopedTrace trace(__FUNCTION__);
   VLOG(threads) << this << " self-suspending";
   // Make thread appear suspended to other threads, release mutator_lock_.
   // Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
   ScopedThreadSuspension(this, ThreadState::kSuspended);  // NOLINT
+  if (implicit) {
+    // For implicit suspend check we want to `madvise()` away
+    // the alternate signal stack to avoid wasting memory.
+    MadviseAwayAlternateSignalStack();
+  }
   VLOG(threads) << this << " self-reviving";
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 3c358d8..1085a56 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -230,7 +230,7 @@
   void AllowThreadSuspension() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Process pending thread suspension request and handle if pending.
-  void CheckSuspend() REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckSuspend(bool implicit = false) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Process a pending empty checkpoint if pending.
   void CheckEmptyCheckpointFromWeakRefAccess(BaseMutex* cond_var_mutex);
@@ -367,7 +367,7 @@
 
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
-  void FullSuspendCheck()
+  void FullSuspendCheck(bool implicit = false)
       REQUIRES(!Locks::thread_suspend_count_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -1479,6 +1479,7 @@
 
   void SetUpAlternateSignalStack();
   void TearDownAlternateSignalStack();
+  void MadviseAwayAlternateSignalStack();
 
   ALWAYS_INLINE void TransitionToSuspendedAndRunCheckpoints(ThreadState new_state)
       REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_)
diff --git a/runtime/thread_android.cc b/runtime/thread_android.cc
index f333400..fadfc09 100644
--- a/runtime/thread_android.cc
+++ b/runtime/thread_android.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include <signal.h>
+#include <sys/mman.h>
+
 #include "thread.h"
 
 namespace art {
@@ -26,4 +29,12 @@
   // Bionic does this for us.
 }
 
+void Thread::MadviseAwayAlternateSignalStack() {
+  stack_t old_ss;
+  int result = sigaltstack(nullptr, &old_ss);
+  CHECK_EQ(result, 0);
+  result = madvise(old_ss.ss_sp, old_ss.ss_size, MADV_FREE);
+  CHECK_EQ(result, 0);
+}
+
 }  // namespace art
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 3ed4276..afce796 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -70,4 +70,8 @@
   delete[] allocated_signal_stack;
 }
 
+void Thread::MadviseAwayAlternateSignalStack() {
+  // We do not `madvise()` away the alternate signal stack on host.
+}
+
 }  // namespace art
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index dc07a08..1b8377d 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -610,10 +610,10 @@
   /// CHECK:     beq
 
   /// CHECK-START-ARM64: void Main.testCrossItersDependencies() disassembly (after)
-  /// CHECK:     ldr
   /// CHECK:     sub
   /// CHECK:     add
   /// CHECK:     add
+  /// CHECK:     ldr
   /// CHECK:     b
   private static void testCrossItersDependencies() {
     int[] data = {1, 2, 3, 0};