Add timeout for dump_file.

It turns out dump_file is used on a number of /proc and system files.
In one case, the read of a file stalled and caused a bugreport to
hang forever. It's still possible if there is a kernel bug that this
could stall forever, but less likely.

Also, change the return type of nanotime to uint64_t.

Testing:
- Created a named fifo and verified that dump_file fails with a timeout.
- Created a large /data/anr/traces.txt to verify that large files still
  dump properly and that the additional NONBLOCK parameter doesn't cause
  a problem.
- Created a dummy /data/tombstones/tombstone_00 to verify that the
  dump of these files still works.
- Compared a dump using the old dumpstate to the new dumpstate to verify
  nothing obviously different.

Bug: 19117030
Change-Id: I0d3dd27583c853cdaccd2fd278748cb5f9ccd4fb
diff --git a/cmds/dumpstate/utils.c b/cmds/dumpstate/utils.c
index 7ad9cf0..27b9fb1 100644
--- a/cmds/dumpstate/utils.c
+++ b/cmds/dumpstate/utils.c
@@ -53,6 +53,12 @@
         NULL,
 };
 
+static uint64_t nanotime() {
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (uint64_t)ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec;
+}
+
 void for_each_userid(void (*func)(int), const char *header) {
     DIR *d;
     struct dirent *de;
@@ -98,7 +104,7 @@
 
         sprintf(cmdpath,"/proc/%d/cmdline", pid);
         memset(cmdline, 0, sizeof(cmdline));
-        if ((fd = open(cmdpath, O_RDONLY)) < 0) {
+        if ((fd = TEMP_FAILURE_RETRY(open(cmdpath, O_RDONLY))) < 0) {
             strcpy(cmdline, "N/A");
         } else {
             read(fd, cmdline, sizeof(cmdline) - 1);
@@ -149,7 +155,7 @@
 
         sprintf(commpath,"/proc/%d/comm", tid);
         memset(comm, 0, sizeof(comm));
-        if ((fd = open(commpath, O_RDONLY)) < 0) {
+        if ((fd = TEMP_FAILURE_RETRY(open(commpath, O_RDONLY))) < 0) {
             strcpy(comm, "N/A");
         } else {
             char *c;
@@ -180,7 +186,7 @@
     memset(buffer, 0, sizeof(buffer));
 
     sprintf(path, "/proc/%d/wchan", tid);
-    if ((fd = open(path, O_RDONLY)) < 0) {
+    if ((fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY))) < 0) {
         printf("Failed to open '%s' (%s)\n", path, strerror(errno));
         return;
     }
@@ -250,22 +256,7 @@
     run_command(title, 10, SU_PATH, "root", "showmap", arg, NULL);
 }
 
-/* prints the contents of a file */
-int dump_file(const char *title, const char *path) {
-    int fd = open(path, O_RDONLY);
-    if (fd < 0) {
-        int err = errno;
-        if (title) printf("------ %s (%s) ------\n", title, path);
-        printf("*** %s: %s\n", path, strerror(err));
-        if (title) printf("\n");
-        return -1;
-    }
-    return dump_file_from_fd(title, path, fd);
-}
-
-int dump_file_from_fd(const char *title, const char *path, int fd) {
-    char buffer[32768];
-
+static int _dump_file_from_fd(const char *title, const char *path, int fd) {
     if (title) printf("------ %s (%s", title, path);
 
     if (title) {
@@ -279,26 +270,76 @@
         printf(") ------\n");
     }
 
-    int newline = 0;
-    for (;;) {
-        int ret = read(fd, buffer, sizeof(buffer));
-        if (ret > 0) {
-            newline = (buffer[ret - 1] == '\n');
-            ret = fwrite(buffer, ret, 1, stdout);
+    bool newline = false;
+    fd_set read_set;
+    struct timeval tm;
+    while (1) {
+        FD_ZERO(&read_set);
+        FD_SET(fd, &read_set);
+        /* Timeout if no data is read for 30 seconds. */
+        tm.tv_sec = 30;
+        tm.tv_usec = 0;
+        uint64_t elapsed = nanotime();
+        int ret = TEMP_FAILURE_RETRY(select(fd + 1, &read_set, NULL, NULL, &tm));
+        if (ret == -1) {
+            printf("*** %s: select failed: %s\n", path, strerror(errno));
+            newline = true;
+            break;
+        } else if (ret == 0) {
+            elapsed = nanotime() - elapsed;
+            printf("*** %s: Timed out after %.3fs\n", path,
+                   (float) elapsed / NANOS_PER_SEC);
+            newline = true;
+            break;
+        } else {
+            char buffer[65536];
+            ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, sizeof(buffer)));
+            if (bytes_read > 0) {
+                fwrite(buffer, bytes_read, 1, stdout);
+                newline = (buffer[bytes_read-1] == '\n');
+            } else {
+                if (bytes_read == -1) {
+                    printf("*** %s: Failed to read from fd: %s", path, strerror(errno));
+                    newline = true;
+                }
+                break;
+            }
         }
-        if (ret <= 0) break;
     }
-    close(fd);
+    TEMP_FAILURE_RETRY(close(fd));
 
     if (!newline) printf("\n");
     if (title) printf("\n");
     return 0;
 }
 
-static int64_t nanotime() {
-    struct timespec ts;
-    clock_gettime(CLOCK_MONOTONIC, &ts);
-    return (int64_t)ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec;
+/* prints the contents of a file */
+int dump_file(const char *title, const char *path) {
+    int fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC));
+    if (fd < 0) {
+        int err = errno;
+        if (title) printf("------ %s (%s) ------\n", title, path);
+        printf("*** %s: %s\n", path, strerror(err));
+        if (title) printf("\n");
+        return -1;
+    }
+    return _dump_file_from_fd(title, path, fd);
+}
+
+/* fd must have been opened with the flag O_NONBLOCK. With this flag set,
+ * it's possible to avoid issues where opening the file itself can get
+ * stuck.
+ */
+int dump_file_from_fd(const char *title, const char *path, int fd) {
+    int flags = fcntl(fd, F_GETFL);
+    if (flags == -1) {
+        printf("*** %s: failed to get flags on fd %d: %s\n", path, fd, strerror(errno));
+        return -1;
+    } else if (!(flags & O_NONBLOCK)) {
+        printf("*** %s: fd must have O_NONBLOCK set.\n", path);
+        return -1;
+    }
+    return _dump_file_from_fd(title, path, fd);
 }
 
 bool waitpid_with_timeout(pid_t pid, int timeout_seconds, int* status) {
@@ -348,7 +389,7 @@
 /* forks a command and waits for it to finish */
 int run_command(const char *title, int timeout_seconds, const char *command, ...) {
     fflush(stdout);
-    int64_t start = nanotime();
+    uint64_t start = nanotime();
     pid_t pid = fork();
 
     /* handle error case */
@@ -550,7 +591,8 @@
     }
 
     /* create a new, empty traces.txt file to receive stack dumps */
-    int fd = open(traces_path, O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW, 0666);  /* -rw-rw-rw- */
+    int fd = TEMP_FAILURE_RETRY(open(traces_path, O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW,
+                                     0666));  /* -rw-rw-rw- */
     if (fd < 0) {
         fprintf(stderr, "%s: %s\n", traces_path, strerror(errno));
         return NULL;
@@ -600,7 +642,7 @@
         if (!strncmp(data, "/system/bin/app_process", strlen("/system/bin/app_process"))) {
             /* skip zygote -- it won't dump its stack anyway */
             snprintf(path, sizeof(path), "/proc/%d/cmdline", pid);
-            int cfd = open(path, O_RDONLY);
+            int cfd = TEMP_FAILURE_RETRY(open(path, O_RDONLY));
             len = read(cfd, data, sizeof(data) - 1);
             close(cfd);
             if (len <= 0) {
@@ -612,7 +654,7 @@
             }
 
             ++dalvik_found;
-            int64_t start = nanotime();
+            uint64_t start = nanotime();
             if (kill(pid, SIGQUIT)) {
                 fprintf(stderr, "kill(%d, SIGQUIT): %s\n", pid, strerror(errno));
                 continue;
@@ -642,7 +684,7 @@
                 fprintf(stderr, "lseek: %s\n", strerror(errno));
             } else {
                 static uint16_t timeout_failures = 0;
-                int64_t start = nanotime();
+                uint64_t start = nanotime();
 
                 /* If 3 backtrace dumps fail in a row, consider debuggerd dead. */
                 if (timeout_failures == 3) {