Add timeout for dump_file.
It turns out dump_file is used on a number of /proc and system files.
In one case, the read of a file stalled and caused a bugreport to
hang forever. It's still possible if there is a kernel bug that this
could stall forever, but less likely.
Also, change the return type of nanotime to uint64_t.
Testing:
- Created a named fifo and verified that dump_file fails with a timeout.
- Created a large /data/anr/traces.txt to verify that large files still
dump properly and that the additional NONBLOCK parameter doesn't cause
a problem.
- Created a dummy /data/tombstones/tombstone_00 to verify that the
dump of these files still works.
- Compared a dump using the old dumpstate to the new dumpstate to verify
nothing obviously different.
Bug: 19117030
Change-Id: I0d3dd27583c853cdaccd2fd278748cb5f9ccd4fb
diff --git a/cmds/dumpstate/utils.c b/cmds/dumpstate/utils.c
index 7ad9cf0..27b9fb1 100644
--- a/cmds/dumpstate/utils.c
+++ b/cmds/dumpstate/utils.c
@@ -53,6 +53,12 @@
NULL,
};
+static uint64_t nanotime() {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (uint64_t)ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec;
+}
+
void for_each_userid(void (*func)(int), const char *header) {
DIR *d;
struct dirent *de;
@@ -98,7 +104,7 @@
sprintf(cmdpath,"/proc/%d/cmdline", pid);
memset(cmdline, 0, sizeof(cmdline));
- if ((fd = open(cmdpath, O_RDONLY)) < 0) {
+ if ((fd = TEMP_FAILURE_RETRY(open(cmdpath, O_RDONLY))) < 0) {
strcpy(cmdline, "N/A");
} else {
read(fd, cmdline, sizeof(cmdline) - 1);
@@ -149,7 +155,7 @@
sprintf(commpath,"/proc/%d/comm", tid);
memset(comm, 0, sizeof(comm));
- if ((fd = open(commpath, O_RDONLY)) < 0) {
+ if ((fd = TEMP_FAILURE_RETRY(open(commpath, O_RDONLY))) < 0) {
strcpy(comm, "N/A");
} else {
char *c;
@@ -180,7 +186,7 @@
memset(buffer, 0, sizeof(buffer));
sprintf(path, "/proc/%d/wchan", tid);
- if ((fd = open(path, O_RDONLY)) < 0) {
+ if ((fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY))) < 0) {
printf("Failed to open '%s' (%s)\n", path, strerror(errno));
return;
}
@@ -250,22 +256,7 @@
run_command(title, 10, SU_PATH, "root", "showmap", arg, NULL);
}
-/* prints the contents of a file */
-int dump_file(const char *title, const char *path) {
- int fd = open(path, O_RDONLY);
- if (fd < 0) {
- int err = errno;
- if (title) printf("------ %s (%s) ------\n", title, path);
- printf("*** %s: %s\n", path, strerror(err));
- if (title) printf("\n");
- return -1;
- }
- return dump_file_from_fd(title, path, fd);
-}
-
-int dump_file_from_fd(const char *title, const char *path, int fd) {
- char buffer[32768];
-
+static int _dump_file_from_fd(const char *title, const char *path, int fd) {
if (title) printf("------ %s (%s", title, path);
if (title) {
@@ -279,26 +270,76 @@
printf(") ------\n");
}
- int newline = 0;
- for (;;) {
- int ret = read(fd, buffer, sizeof(buffer));
- if (ret > 0) {
- newline = (buffer[ret - 1] == '\n');
- ret = fwrite(buffer, ret, 1, stdout);
+ bool newline = false;
+ fd_set read_set;
+ struct timeval tm;
+ while (1) {
+ FD_ZERO(&read_set);
+ FD_SET(fd, &read_set);
+ /* Timeout if no data is read for 30 seconds. */
+ tm.tv_sec = 30;
+ tm.tv_usec = 0;
+ uint64_t elapsed = nanotime();
+ int ret = TEMP_FAILURE_RETRY(select(fd + 1, &read_set, NULL, NULL, &tm));
+ if (ret == -1) {
+ printf("*** %s: select failed: %s\n", path, strerror(errno));
+ newline = true;
+ break;
+ } else if (ret == 0) {
+ elapsed = nanotime() - elapsed;
+ printf("*** %s: Timed out after %.3fs\n", path,
+ (float) elapsed / NANOS_PER_SEC);
+ newline = true;
+ break;
+ } else {
+ char buffer[65536];
+ ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, sizeof(buffer)));
+ if (bytes_read > 0) {
+ fwrite(buffer, bytes_read, 1, stdout);
+ newline = (buffer[bytes_read-1] == '\n');
+ } else {
+ if (bytes_read == -1) {
+ printf("*** %s: Failed to read from fd: %s", path, strerror(errno));
+ newline = true;
+ }
+ break;
+ }
}
- if (ret <= 0) break;
}
- close(fd);
+ TEMP_FAILURE_RETRY(close(fd));
if (!newline) printf("\n");
if (title) printf("\n");
return 0;
}
-static int64_t nanotime() {
- struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC, &ts);
- return (int64_t)ts.tv_sec * NANOS_PER_SEC + ts.tv_nsec;
+/* prints the contents of a file */
+int dump_file(const char *title, const char *path) {
+ int fd = TEMP_FAILURE_RETRY(open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC));
+ if (fd < 0) {
+ int err = errno;
+ if (title) printf("------ %s (%s) ------\n", title, path);
+ printf("*** %s: %s\n", path, strerror(err));
+ if (title) printf("\n");
+ return -1;
+ }
+ return _dump_file_from_fd(title, path, fd);
+}
+
+/* fd must have been opened with the flag O_NONBLOCK. With this flag set,
+ * it's possible to avoid issues where opening the file itself can get
+ * stuck.
+ */
+int dump_file_from_fd(const char *title, const char *path, int fd) {
+ int flags = fcntl(fd, F_GETFL);
+ if (flags == -1) {
+ printf("*** %s: failed to get flags on fd %d: %s\n", path, fd, strerror(errno));
+ return -1;
+ } else if (!(flags & O_NONBLOCK)) {
+ printf("*** %s: fd must have O_NONBLOCK set.\n", path);
+ return -1;
+ }
+ return _dump_file_from_fd(title, path, fd);
}
bool waitpid_with_timeout(pid_t pid, int timeout_seconds, int* status) {
@@ -348,7 +389,7 @@
/* forks a command and waits for it to finish */
int run_command(const char *title, int timeout_seconds, const char *command, ...) {
fflush(stdout);
- int64_t start = nanotime();
+ uint64_t start = nanotime();
pid_t pid = fork();
/* handle error case */
@@ -550,7 +591,8 @@
}
/* create a new, empty traces.txt file to receive stack dumps */
- int fd = open(traces_path, O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW, 0666); /* -rw-rw-rw- */
+ int fd = TEMP_FAILURE_RETRY(open(traces_path, O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW,
+ 0666)); /* -rw-rw-rw- */
if (fd < 0) {
fprintf(stderr, "%s: %s\n", traces_path, strerror(errno));
return NULL;
@@ -600,7 +642,7 @@
if (!strncmp(data, "/system/bin/app_process", strlen("/system/bin/app_process"))) {
/* skip zygote -- it won't dump its stack anyway */
snprintf(path, sizeof(path), "/proc/%d/cmdline", pid);
- int cfd = open(path, O_RDONLY);
+ int cfd = TEMP_FAILURE_RETRY(open(path, O_RDONLY));
len = read(cfd, data, sizeof(data) - 1);
close(cfd);
if (len <= 0) {
@@ -612,7 +654,7 @@
}
++dalvik_found;
- int64_t start = nanotime();
+ uint64_t start = nanotime();
if (kill(pid, SIGQUIT)) {
fprintf(stderr, "kill(%d, SIGQUIT): %s\n", pid, strerror(errno));
continue;
@@ -642,7 +684,7 @@
fprintf(stderr, "lseek: %s\n", strerror(errno));
} else {
static uint16_t timeout_failures = 0;
- int64_t start = nanotime();
+ uint64_t start = nanotime();
/* If 3 backtrace dumps fail in a row, consider debuggerd dead. */
if (timeout_failures == 3) {