Let's reinvent storage, yet again!

Now that we're treating storage as a runtime permission, we need to
grant read/write access without killing the app.  This is really
tricky, since we had been using GIDs for access control, and they're
set in stone once Zygote drops privileges.

The only thing left that can change dynamically is the filesystem
itself, so let's do that.  This means changing the FUSE daemon to
present itself as three different views:

/mnt/runtime_default/foo - view for apps with no access
/mnt/runtime_read/foo - view for apps with read access
/mnt/runtime_write/foo - view for apps with write access

There is still a single location for all the backing files, and
filesystem permissions are derived the same way for each view, but
the file modes are masked off differently for each mountpoint.

During Zygote fork, it wires up the appropriate storage access into
an isolated mount namespace based on the current app permissions.  When
the app is granted permissions dynamically at runtime, the system
asks vold to jump into the existing mount namespace and bind mount
the newly granted access model into place.

Bug: 21858077
Change-Id: Iade538e4bc7af979fe20095f74416e8a0f165a4a
diff --git a/VolumeManager.cpp b/VolumeManager.cpp
index f1667f2..6caa5c0 100755
--- a/VolumeManager.cpp
+++ b/VolumeManager.cpp
@@ -26,6 +26,7 @@
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #include <linux/kdev_t.h>
@@ -480,6 +481,108 @@
     return 0;
 }
 
+int VolumeManager::remountUid(uid_t uid, const std::string& mode) {
+    LOG(DEBUG) << "Remounting " << uid << " as mode " << mode;
+
+    DIR* dir;
+    struct dirent* de;
+    char rootName[PATH_MAX];
+    char pidName[PATH_MAX];
+    int pidFd;
+    int nsFd;
+    struct stat sb;
+    pid_t child;
+
+    if (!(dir = opendir("/proc"))) {
+        PLOG(ERROR) << "Failed to opendir";
+        return -1;
+    }
+
+    // Figure out root namespace to compare against below
+    if (readlinkat(dirfd(dir), "1/ns/mnt", rootName, PATH_MAX) == -1) {
+        PLOG(ERROR) << "Failed to readlink";
+        closedir(dir);
+        return -1;
+    }
+
+    // Poke through all running PIDs look for apps running as UID
+    while ((de = readdir(dir))) {
+        pidFd = -1;
+        nsFd = -1;
+
+        pidFd = openat(dirfd(dir), de->d_name, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+        if (pidFd < 0) {
+            goto next;
+        }
+        if (fstat(pidFd, &sb) != 0) {
+            PLOG(WARNING) << "Failed to stat " << de->d_name;
+            goto next;
+        }
+        if (sb.st_uid != uid) {
+            goto next;
+        }
+
+        // Matches so far, but refuse to touch if in root namespace
+        LOG(DEBUG) << "Found matching PID " << de->d_name;
+        if (readlinkat(pidFd, "ns/mnt", pidName, PATH_MAX) == -1) {
+            PLOG(WARNING) << "Failed to read namespace for " << de->d_name;
+            goto next;
+        }
+        if (!strcmp(rootName, pidName)) {
+            LOG(WARNING) << "Skipping due to root namespace";
+            goto next;
+        }
+
+        // We purposefully leave the namespace open across the fork
+        nsFd = openat(pidFd, "ns/mnt", O_RDONLY);
+        if (nsFd < 0) {
+            PLOG(WARNING) << "Failed to open namespace";
+            goto next;
+        }
+
+        if (!(child = fork())) {
+            if (setns(nsFd, CLONE_NEWNS) != 0) {
+                PLOG(ERROR) << "Failed to setns";
+                _exit(1);
+            }
+
+            // Unmount current view and replace with requested view
+            umount2("/storage", MNT_FORCE);
+
+            std::string storageSource;
+            if (mode == "default") {
+                storageSource = "/mnt/runtime_default";
+            } else if (mode == "read") {
+                storageSource = "/mnt/runtime_read";
+            } else if (mode == "write") {
+                storageSource = "/mnt/runtime_write";
+            } else {
+                // Sane default of no storage visible
+                _exit(0);
+            }
+            if (TEMP_FAILURE_RETRY(mount(storageSource.c_str(), "/storage",
+                    NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
+                PLOG(WARNING) << "Failed to mount " << storageSource;
+                return false;
+            }
+            _exit(0);
+        }
+
+        if (child == -1) {
+            PLOG(ERROR) << "Failed to fork";
+            goto next;
+        } else {
+            TEMP_FAILURE_RETRY(waitpid(child, nullptr, 0));
+        }
+
+next:
+        close(nsFd);
+        close(pidFd);
+    }
+    closedir(dir);
+    return 0;
+}
+
 int VolumeManager::reset() {
     // Tear down all existing disks/volumes and start from a blank slate so
     // newly connected framework hears all events.