bootstat: refine "Kernel panic - not syncing"

Provide some easy kernel panic subreasons mined from last kmesg,
generates a canonical boot reason (system boot reason) that
may aid triage.

Notably report kernel_panic,hung if [khungtaskd] triggers on
a livelock condition, forms a signals on the dashboards.

Helper function getSubreason modified to optionally enable checking
for a single quote resulting in a refactoring to ease maintenance of
the termination detection in the face of single bit errors heuristics.

Test: boot_reason_test.sh
Bug: 63736262
Bug: 33808187
Change-Id: I7fdd1e57e7a26095738175074306f0d2d59b1d69
diff --git a/bootstat/boot_reason_test.sh b/bootstat/boot_reason_test.sh
index 1043df1..b194bbe 100755
--- a/bootstat/boot_reason_test.sh
+++ b/bootstat/boot_reason_test.sh
@@ -808,6 +808,63 @@
   exitPstore
 }
 
+[ "USAGE: test_kernel_panic_subreason
+
+kernel_panic_subreason test:
+- echo SysRq : Trigger a crash : 'test' | adb shell su root tee /dev/kmsg
+- echo c | adb shell su root tee /proc/sysrq-trigger
+- (wait until screen is up, boot has completed)
+- adb shell getprop sys.boot.reason
+- NB: should report kernel_panic,sysrq,test" ]
+test_kernel_panic_subreason() {
+  checkDebugBuild || return
+  duration_test ">90"
+  panic_msg="kernel_panic,sysrq,test"
+  enterPstore
+  if [ ${?} != 0 ]; then
+    echo "         or functional bootloader" >&2
+    panic_msg="\(kernel_panic,sysrq,test\|kernel_panic\)"
+    pstore_ok=true
+  fi
+  echo "SysRq : Trigger a crash : 'test'" | adb shell su root tee /dev/kmsg
+  echo c | adb shell su root tee /proc/sysrq-trigger >/dev/null
+  wait_for_screen
+  EXPECT_PROPERTY sys.boot.reason ${panic_msg}
+  EXPECT_PROPERTY persist.sys.boot.reason ${panic_msg}
+  report_bootstat_logs kernel_panic,sysrq,test \
+    "-bootstat: Unknown boot reason: kernel_panic,sysrq,test"
+  exitPstore
+}
+
+[ "USAGE: test_kernel_panic_hung
+
+kernel_panic_hung test:
+- echo Kernel panic - not synching: hung_task: blocked tasks |
+  adb shell su root tee /dev/kmsg
+- adb reboot warm
+- (wait until screen is up, boot has completed)
+- adb shell getprop sys.boot.reason
+- NB: should report kernel_panic,hung" ]
+test_kernel_panic_hung() {
+  checkDebugBuild || return
+  duration_test
+  panic_msg="kernel_panic,hung"
+  enterPstore
+  if [ ${?} != 0 ]; then
+    echo "         or functional bootloader" >&2
+    panic_msg="\(kernel_panic,hung\|reboot,hung\)"
+    pstore_ok=true
+  fi
+  echo "Kernel panic - not syncing: hung_task: blocked tasks" |
+    adb shell su root tee /dev/kmsg
+  adb reboot warm
+  wait_for_screen
+  EXPECT_PROPERTY sys.boot.reason ${panic_msg}
+  EXPECT_PROPERTY persist.sys.boot.reason ${panic_msg}
+  report_bootstat_logs kernel_panic,hung
+  exitPstore
+}
+
 [ "USAGE: test_warm
 
 warm test
@@ -1067,12 +1124,12 @@
   if [ -z "${2}" ]; then
     # Hard coded should shell fail to find them above (search/permission issues)
     eval set properties ota cold factory_reset hard battery unknown \
-             kernel_panic warm thermal_shutdown userrequested_shutdown \
-             shell_reboot adb_reboot Its_Just_So_Hard_reboot \
-             bootloader_normal bootloader_watchdog bootloader_kernel_panic \
-             bootloader_oem_powerkey bootloader_wdog_reset \
-             bootloader_wdog_reset bootloader_wdog_reset bootloader_hard \
-             bootloader_recovery
+             kernel_panic kernel_panic_subreason kernel_panic_hung warm \
+             thermal_shutdown userrequested_shutdown shell_reboot adb_reboot \
+             Its_Just_So_Hard_reboot bootloader_normal bootloader_watchdog \
+             bootloader_kernel_panic bootloader_oem_powerkey \
+             bootloader_wdog_reset bootloader_wdog_reset bootloader_wdog_reset \
+             bootloader_hard bootloader_recovery
   fi
   if [ X"nothing" = X"${1}" ]; then
     shift 1
diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp
index 29988c8..e60e6be 100644
--- a/bootstat/bootstat.cpp
+++ b/bootstat/bootstat.cpp
@@ -295,6 +295,13 @@
     {"reboot,tool", 151},
     {"reboot,wdt", 152},
     {"reboot,unknown", 153},
+    {"kernel_panic,audit", 154},
+    {"kernel_panic,atomic", 155},
+    {"kernel_panic,hung", 156},
+    {"kernel_panic,hung,rcu", 157},
+    {"kernel_panic,init", 158},
+    {"kernel_panic,oom", 159},
+    {"kernel_panic,stack", 160},
 };
 
 // Converts a string value representing the reason the system booted to an
@@ -519,9 +526,100 @@
                  [](char c) { return ::isprint(c) ? c : '?'; });
 }
 
-// Pull out and correct quoted (') subreason, pos just beyond first quote.
-// Check subreasons for reboot,<subreason> and kernel_panic,sysrq,<subreason>
-std::string getSubreason(const std::string& content, size_t pos) {
+// Check subreasons for reboot,<subreason> kernel_panic,sysrq,<subreason> or
+// kernel_panic,<subreason>.
+//
+// If quoted flag is set, pull out and correct single quoted ('), newline (\n)
+// or unprintable character terminated subreason, pos is supplied just beyond
+// first quote.  if quoted false, pull out and correct newline (\n) or
+// unprintable character terminated subreason.
+//
+// Heuristics to find termination is painted into a corner:
+
+// single bit error for quote ' that we can block.  It is acceptable for
+// the others 7, g in reason.  2/9 chance will miss the terminating quote,
+// but there is always the terminating newline that usually immediately
+// follows to fortify our chances.
+bool likely_single_quote(char c) {
+  switch (static_cast<uint8_t>(c)) {
+    case '\'':         // '\''
+    case '\'' ^ 0x01:  // '&'
+    case '\'' ^ 0x02:  // '%'
+    case '\'' ^ 0x04:  // '#'
+    case '\'' ^ 0x08:  // '/'
+      return true;
+    case '\'' ^ 0x10:  // '7'
+      break;
+    case '\'' ^ 0x20:  // '\a' (unprintable)
+      return true;
+    case '\'' ^ 0x40:  // 'g'
+      break;
+    case '\'' ^ 0x80:  // 0xA7 (unprintable)
+      return true;
+  }
+  return false;
+}
+
+// ::isprint(c) and likely_space() will prevent us from being called for
+// fundamentally printable entries, except for '\r' and '\b'.
+//
+// Except for * and J, single bit errors for \n, all others are non-
+// printable so easy catch.  It is _acceptable_ for *, J or j to exist in
+// the reason string, so 2/9 chance we will miss the terminating newline.
+//
+// NB: J might not be acceptable, except if at the beginning or preceded
+//     with a space, '(' or any of the quotes and their BER aliases.
+// NB: * might not be acceptable, except if at the beginning or preceded
+//     with a space, another *, or any of the quotes or their BER aliases.
+//
+// To reduce the chances to closer to 1/9 is too complicated for the gain.
+bool likely_newline(char c) {
+  switch (static_cast<uint8_t>(c)) {
+    case '\n':         // '\n' (unprintable)
+    case '\n' ^ 0x01:  // '\r' (unprintable)
+    case '\n' ^ 0x02:  // '\b' (unprintable)
+    case '\n' ^ 0x04:  // 0x0E (unprintable)
+    case '\n' ^ 0x08:  // 0x02 (unprintable)
+    case '\n' ^ 0x10:  // 0x1A (unprintable)
+      return true;
+    case '\n' ^ 0x20:  // '*'
+    case '\n' ^ 0x40:  // 'J'
+      break;
+    case '\n' ^ 0x80:  // 0x8A (unprintable)
+      return true;
+  }
+  return false;
+}
+
+// ::isprint(c) will prevent us from being called for all the printable
+// matches below.  If we let unprintables through because of this, they
+// get converted to underscore (_) by the validation phase.
+bool likely_space(char c) {
+  switch (static_cast<uint8_t>(c)) {
+    case ' ':          // ' '
+    case ' ' ^ 0x01:   // '!'
+    case ' ' ^ 0x02:   // '"'
+    case ' ' ^ 0x04:   // '$'
+    case ' ' ^ 0x08:   // '('
+    case ' ' ^ 0x10:   // '0'
+    case ' ' ^ 0x20:   // '\0' (unprintable)
+    case ' ' ^ 0x40:   // 'P'
+    case ' ' ^ 0x80:   // 0xA0 (unprintable)
+    case '\t':         // '\t'
+    case '\t' ^ 0x01:  // '\b' (unprintable) (likely_newline counters)
+    case '\t' ^ 0x02:  // '\v' (unprintable)
+    case '\t' ^ 0x04:  // '\r' (unprintable) (likely_newline counters)
+    case '\t' ^ 0x08:  // 0x01 (unprintable)
+    case '\t' ^ 0x10:  // 0x19 (unprintable)
+    case '\t' ^ 0x20:  // ')'
+    case '\t' ^ 0x40:  // '1'
+    case '\t' ^ 0x80:  // 0x89 (unprintable)
+      return true;
+  }
+  return false;
+}
+
+std::string getSubreason(const std::string& content, size_t pos, bool quoted) {
   static constexpr size_t max_reason_length = 256;
 
   std::string subReason(content.substr(pos, max_reason_length));
@@ -529,20 +627,24 @@
   for (const auto& s : knownReasons) {
     correctForBitErrorOrUnderline(subReason, s);
   }
+  std::string terminator(quoted ? "'" : "");
   for (const auto& m : kBootReasonMap) {
     if (m.first.length() <= strlen("cold")) continue;  // too short?
-    if (correctForBitErrorOrUnderline(subReason, m.first + "'")) continue;
+    if (correctForBitErrorOrUnderline(subReason, m.first + terminator)) continue;
     if (m.first.length() <= strlen("reboot,cold")) continue;  // short?
     if (android::base::StartsWith(m.first, "reboot,")) {
-      correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("reboot,")) + "'");
+      correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("reboot,")) + terminator);
     } else if (android::base::StartsWith(m.first, "kernel_panic,sysrq,")) {
-      correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("kernel_panic,sysrq,")) + "'");
+      correctForBitErrorOrUnderline(subReason,
+                                    m.first.substr(strlen("kernel_panic,sysrq,")) + terminator);
+    } else if (android::base::StartsWith(m.first, "kernel_panic,")) {
+      correctForBitErrorOrUnderline(subReason, m.first.substr(strlen("kernel_panic,")) + terminator);
     }
   }
   for (pos = 0; pos < subReason.length(); ++pos) {
     char c = subReason[pos];
-    // #, &, %, / are common single bit error for ' that we can block
-    if (!::isprint(c) || (c == '\'') || (c == '#') || (c == '&') || (c == '%') || (c == '/')) {
+    if (!(::isprint(c) || likely_space(c)) || likely_newline(c) ||
+        (quoted && likely_single_quote(c))) {
       subReason.erase(pos);
       break;
     }
@@ -561,7 +663,7 @@
     static constexpr char sysrqSubreason[] = "SysRq : Trigger a crash : '";
     auto pos = console.rfind(sysrqSubreason);
     if (pos != std::string::npos) {
-      ret += "," + getSubreason(console, pos + strlen(sysrqSubreason));
+      ret += "," + getSubreason(console, pos + strlen(sysrqSubreason), /* quoted */ true);
     }
     return true;
   }
@@ -574,6 +676,43 @@
     ret = "kernel_panic,bug";
     return true;
   }
+
+  std::string panic("Kernel panic - not syncing: ");
+  auto pos = console.rfind(panic);
+  if (pos != std::string::npos) {
+    static const std::vector<std::pair<const std::string, const std::string>> panicReasons = {
+        {"Out of memory", "oom"},
+        {"out of memory", "oom"},
+        {"Oh boy, that early out of memory", "oom"},  // omg
+        {"BUG!", "bug"},
+        {"hung_task: blocked tasks", "hung"},
+        {"audit: ", "audit"},
+        {"scheduling while atomic", "atomic"},
+        {"Attempted to kill init!", "init"},
+        {"Requested init", "init"},
+        {"No working init", "init"},
+        {"Could not decompress init", "init"},
+        {"RCU Stall", "hung,rcu"},
+        {"stack-protector", "stack"},
+        {"kernel stack overflow", "stack"},
+        {"Corrupt kernel stack", "stack"},
+        {"low stack detected", "stack"},
+        {"corrupted stack end", "stack"},
+    };
+
+    ret = "kernel_panic";
+    for (auto& s : panicReasons) {
+      if (console.find(panic + s.first, pos) != std::string::npos) {
+        ret += "," + s.second;
+        return true;
+      }
+    }
+    auto reason = getSubreason(console, pos + panic.length(), /* newline */ false);
+    if (reason.length() > 3) {
+      ret += "," + reason;
+    }
+    return true;
+  }
   return false;
 }
 
@@ -689,7 +828,7 @@
       static const char cmd[] = "reboot: Restarting system with command '";
       size_t pos = console.rfind(cmd);
       if (pos != std::string::npos) {
-        std::string subReason(getSubreason(content, pos + strlen(cmd)));
+        std::string subReason(getSubreason(content, pos + strlen(cmd), /* quoted */ true));
         if (subReason != "") {  // Will not land "reboot" as that is too blunt.
           if (isKernelRebootReason(subReason)) {
             ret = "reboot," + subReason;  // User space can't talk kernel reasons.