Let's reinvent storage, yet again!

Now that we're treating storage as a runtime permission, we need to
grant read/write access without killing the app.  This is really
tricky, since we had been using GIDs for access control, and they're
set in stone once Zygote drops privileges.

The only thing left that can change dynamically is the filesystem
itself, so let's do that.  This means changing the FUSE daemon to
present itself as three different views:

/mnt/runtime_default/foo - view for apps with no access
/mnt/runtime_read/foo - view for apps with read access
/mnt/runtime_write/foo - view for apps with write access

There is still a single location for all the backing files, and
filesystem permissions are derived the same way for each view, but
the file modes are masked off differently for each mountpoint.

During Zygote fork, it wires up the appropriate storage access into
an isolated mount namespace based on the current app permissions.  When
the app is granted permissions dynamically at runtime, the system
asks vold to jump into the existing mount namespace and bind mount
the newly granted access model into place.

Bug: 21858077
Change-Id: Iade538e4bc7af979fe20095f74416e8a0f165a4a
diff --git a/CommandListener.cpp b/CommandListener.cpp
index ee99479..32be05c 100644
--- a/CommandListener.cpp
+++ b/CommandListener.cpp
@@ -263,6 +263,12 @@
         // forget_partition [partGuid]
         std::string partGuid(argv[2]);
         return sendGenericOkFail(cli, vm->forgetPartition(partGuid));
+
+    } else if (cmd == "remount_uid" && argc > 3) {
+        // remount_uid [uid] [none|default|read|write]
+        uid_t uid = atoi(argv[2]);
+        std::string mode(argv[3]);
+        return sendGenericOkFail(cli, vm->remountUid(uid, mode));
     }
 
     return cli->sendMsg(ResponseCode::CommandSyntaxError, nullptr, false);
diff --git a/EmulatedVolume.cpp b/EmulatedVolume.cpp
index c2a7d9c..e906fa7 100644
--- a/EmulatedVolume.cpp
+++ b/EmulatedVolume.cpp
@@ -39,37 +39,45 @@
 EmulatedVolume::EmulatedVolume(const std::string& rawPath) :
         VolumeBase(Type::kEmulated), mFusePid(0) {
     setId("emulated");
-    mFusePath = "/storage/emulated";
     mRawPath = rawPath;
+    mLabel = "emulated";
 }
 
 EmulatedVolume::EmulatedVolume(const std::string& rawPath, dev_t device,
         const std::string& fsUuid) : VolumeBase(Type::kEmulated), mFusePid(0) {
     setId(StringPrintf("emulated:%u,%u", major(device), minor(device)));
-    mFusePath = StringPrintf("/storage/%s", fsUuid.c_str());
     mRawPath = rawPath;
+    mLabel = fsUuid;
 }
 
 EmulatedVolume::~EmulatedVolume() {
 }
 
 status_t EmulatedVolume::doMount() {
-    if (fs_prepare_dir(mFusePath.c_str(), 0700, AID_ROOT, AID_ROOT)) {
-        PLOG(ERROR) << getId() << " failed to create mount point " << mFusePath;
+    mFuseDefault = StringPrintf("/mnt/runtime_default/%s", mLabel.c_str());
+    mFuseRead = StringPrintf("/mnt/runtime_read/%s", mLabel.c_str());
+    mFuseWrite = StringPrintf("/mnt/runtime_write/%s", mLabel.c_str());
+
+    setInternalPath(mRawPath);
+    setPath(StringPrintf("/storage/%s", mLabel.c_str()));
+
+    if (fs_prepare_dir(mFuseDefault.c_str(), 0700, AID_ROOT, AID_ROOT) ||
+            fs_prepare_dir(mFuseRead.c_str(), 0700, AID_ROOT, AID_ROOT) ||
+            fs_prepare_dir(mFuseWrite.c_str(), 0700, AID_ROOT, AID_ROOT)) {
+        PLOG(ERROR) << getId() << " failed to create mount points";
         return -errno;
     }
 
-    setInternalPath(mRawPath);
-    setPath(mFusePath);
+    dev_t before = GetDevice(mFuseWrite);
 
     if (!(mFusePid = fork())) {
-        // TODO: protect when not mounted as visible
         if (execl(kFusePath, kFusePath,
                 "-u", "1023", // AID_MEDIA_RW
                 "-g", "1023", // AID_MEDIA_RW
-                "-l",
+                "-m",
+                "-w",
                 mRawPath.c_str(),
-                mFusePath.c_str(),
+                mLabel.c_str(),
                 NULL)) {
             PLOG(ERROR) << "Failed to exec";
         }
@@ -83,6 +91,11 @@
         return -errno;
     }
 
+    while (before == GetDevice(mFuseWrite)) {
+        LOG(VERBOSE) << "Waiting for FUSE to spin up...";
+        usleep(50000); // 50ms
+    }
+
     return OK;
 }
 
@@ -93,13 +106,17 @@
         mFusePid = 0;
     }
 
-    ForceUnmount(mFusePath);
-    ForceUnmount(mRawPath);
+    ForceUnmount(mFuseDefault);
+    ForceUnmount(mFuseRead);
+    ForceUnmount(mFuseWrite);
 
-    if (TEMP_FAILURE_RETRY(rmdir(mFusePath.c_str()))) {
-        PLOG(ERROR) << getId() << " failed to rmdir mount point " << mFusePath;
-        return -errno;
-    }
+    rmdir(mFuseDefault.c_str());
+    rmdir(mFuseRead.c_str());
+    rmdir(mFuseWrite.c_str());
+
+    mFuseDefault.clear();
+    mFuseRead.clear();
+    mFuseWrite.clear();
 
     return OK;
 }
diff --git a/EmulatedVolume.h b/EmulatedVolume.h
index 04d4508..09686c1 100644
--- a/EmulatedVolume.h
+++ b/EmulatedVolume.h
@@ -46,10 +46,13 @@
     status_t doUnmount() override;
 
 private:
-    /* Mount point of raw storage */
     std::string mRawPath;
-    /* Mount point of visible storage */
-    std::string mFusePath;
+    std::string mLabel;
+
+    std::string mFuseDefault;
+    std::string mFuseRead;
+    std::string mFuseWrite;
+
     /* PID of FUSE wrapper */
     pid_t mFusePid;
 
diff --git a/PublicVolume.cpp b/PublicVolume.cpp
index d17853b..e4fdb86 100644
--- a/PublicVolume.cpp
+++ b/PublicVolume.cpp
@@ -111,16 +111,19 @@
     }
 
     mRawPath = StringPrintf("/mnt/media_rw/%s", stableName.c_str());
-    mFusePath = StringPrintf("/storage/%s", stableName.c_str());
-    setInternalPath(mRawPath);
-    setPath(mFusePath);
 
-    if (fs_prepare_dir(mRawPath.c_str(), 0700, AID_ROOT, AID_ROOT)) {
-        PLOG(ERROR) << getId() << " failed to create mount point " << mRawPath;
-        return -errno;
-    }
-    if (fs_prepare_dir(mFusePath.c_str(), 0700, AID_ROOT, AID_ROOT)) {
-        PLOG(ERROR) << getId() << " failed to create mount point " << mFusePath;
+    mFuseDefault = StringPrintf("/mnt/runtime_default/%s", stableName.c_str());
+    mFuseRead = StringPrintf("/mnt/runtime_read/%s", stableName.c_str());
+    mFuseWrite = StringPrintf("/mnt/runtime_write/%s", stableName.c_str());
+
+    setInternalPath(mRawPath);
+    setPath(StringPrintf("/storage/%s", stableName.c_str()));
+
+    if (fs_prepare_dir(mRawPath.c_str(), 0700, AID_ROOT, AID_ROOT) ||
+            fs_prepare_dir(mFuseDefault.c_str(), 0700, AID_ROOT, AID_ROOT) ||
+            fs_prepare_dir(mFuseRead.c_str(), 0700, AID_ROOT, AID_ROOT) ||
+            fs_prepare_dir(mFuseWrite.c_str(), 0700, AID_ROOT, AID_ROOT)) {
+        PLOG(ERROR) << getId() << " failed to create mount points";
         return -errno;
     }
 
@@ -134,25 +137,18 @@
         initAsecStage();
     }
 
-    // TODO: teach FUSE daemon to protect itself with user-specific GID
+    dev_t before = GetDevice(mFuseWrite);
+
     if (!(mFusePid = fork())) {
         if (!(getMountFlags() & MountFlags::kVisible)) {
-            // TODO: mount so that only system apps can access
-            if (execl(kFusePath, kFusePath,
-                    "-u", "1023", // AID_MEDIA_RW
-                    "-g", "1023", // AID_MEDIA_RW
-                    mRawPath.c_str(),
-                    mFusePath.c_str(),
-                    NULL)) {
-                PLOG(ERROR) << "Failed to exec";
-            }
+            // TODO: do we need to wrap this device?
         } else if (getMountFlags() & MountFlags::kPrimary) {
             if (execl(kFusePath, kFusePath,
                     "-u", "1023", // AID_MEDIA_RW
                     "-g", "1023", // AID_MEDIA_RW
-                    "-d",
+                    "-w",
                     mRawPath.c_str(),
-                    mFusePath.c_str(),
+                    stableName.c_str(),
                     NULL)) {
                 PLOG(ERROR) << "Failed to exec";
             }
@@ -160,10 +156,8 @@
             if (execl(kFusePath, kFusePath,
                     "-u", "1023", // AID_MEDIA_RW
                     "-g", "1023", // AID_MEDIA_RW
-                    "-w", "1023", // AID_MEDIA_RW
-                    "-d",
                     mRawPath.c_str(),
-                    mFusePath.c_str(),
+                    stableName.c_str(),
                     NULL)) {
                 PLOG(ERROR) << "Failed to exec";
             }
@@ -178,6 +172,11 @@
         return -errno;
     }
 
+    while (before == GetDevice(mFuseWrite)) {
+        LOG(VERBOSE) << "Waiting for FUSE to spin up...";
+        usleep(50000); // 50ms
+    }
+
     return OK;
 }
 
@@ -189,17 +188,20 @@
     }
 
     ForceUnmount(kAsecPath);
-    ForceUnmount(mFusePath);
+
+    ForceUnmount(mFuseDefault);
+    ForceUnmount(mFuseRead);
+    ForceUnmount(mFuseWrite);
     ForceUnmount(mRawPath);
 
-    if (TEMP_FAILURE_RETRY(rmdir(mRawPath.c_str()))) {
-        PLOG(ERROR) << getId() << " failed to rmdir mount point " << mRawPath;
-    }
-    if (TEMP_FAILURE_RETRY(rmdir(mFusePath.c_str()))) {
-        PLOG(ERROR) << getId() << " failed to rmdir mount point " << mFusePath;
-    }
+    rmdir(mFuseDefault.c_str());
+    rmdir(mFuseRead.c_str());
+    rmdir(mFuseWrite.c_str());
+    rmdir(mRawPath.c_str());
 
-    mFusePath.clear();
+    mFuseDefault.clear();
+    mFuseRead.clear();
+    mFuseWrite.clear();
     mRawPath.clear();
 
     return OK;
diff --git a/PublicVolume.h b/PublicVolume.h
index fd400f2..3aa7a73 100644
--- a/PublicVolume.h
+++ b/PublicVolume.h
@@ -59,8 +59,11 @@
     std::string mDevPath;
     /* Mount point of raw partition */
     std::string mRawPath;
-    /* Mount point of FUSE wrapper */
-    std::string mFusePath;
+
+    std::string mFuseDefault;
+    std::string mFuseRead;
+    std::string mFuseWrite;
+
     /* PID of FUSE wrapper */
     pid_t mFusePid;
 
diff --git a/Utils.cpp b/Utils.cpp
index 2ccd45f..ec9c906 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -522,5 +522,15 @@
     return StringPrintf("%s/expand_%s.key", kKeyPath, partGuid.c_str());
 }
 
+dev_t GetDevice(const std::string& path) {
+    struct stat sb;
+    if (stat(path.c_str(), &sb)) {
+        PLOG(WARNING) << "Failed to stat " << path;
+        return 0;
+    } else {
+        return sb.st_dev;
+    }
+}
+
 }  // namespace vold
 }  // namespace android
diff --git a/Utils.h b/Utils.h
index 8d6bf38..ce0f7c8 100644
--- a/Utils.h
+++ b/Utils.h
@@ -90,6 +90,8 @@
 
 std::string BuildKeyPath(const std::string& partGuid);
 
+dev_t GetDevice(const std::string& path);
+
 }  // namespace vold
 }  // namespace android
 
diff --git a/VolumeBase.cpp b/VolumeBase.cpp
index 7d733cb..4dcdb0e 100644
--- a/VolumeBase.cpp
+++ b/VolumeBase.cpp
@@ -219,7 +219,6 @@
     }
 
     setState(State::kEjecting);
-
     for (auto vol : mVolumes) {
         if (vol->destroy()) {
             LOG(WARNING) << getId() << " failed to destroy " << vol->getId()
diff --git a/VolumeManager.cpp b/VolumeManager.cpp
index f1667f2..6caa5c0 100755
--- a/VolumeManager.cpp
+++ b/VolumeManager.cpp
@@ -26,6 +26,7 @@
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <unistd.h>
 
 #include <linux/kdev_t.h>
@@ -480,6 +481,108 @@
     return 0;
 }
 
+int VolumeManager::remountUid(uid_t uid, const std::string& mode) {
+    LOG(DEBUG) << "Remounting " << uid << " as mode " << mode;
+
+    DIR* dir;
+    struct dirent* de;
+    char rootName[PATH_MAX];
+    char pidName[PATH_MAX];
+    int pidFd;
+    int nsFd;
+    struct stat sb;
+    pid_t child;
+
+    if (!(dir = opendir("/proc"))) {
+        PLOG(ERROR) << "Failed to opendir";
+        return -1;
+    }
+
+    // Figure out root namespace to compare against below
+    if (readlinkat(dirfd(dir), "1/ns/mnt", rootName, PATH_MAX) == -1) {
+        PLOG(ERROR) << "Failed to readlink";
+        closedir(dir);
+        return -1;
+    }
+
+    // Poke through all running PIDs look for apps running as UID
+    while ((de = readdir(dir))) {
+        pidFd = -1;
+        nsFd = -1;
+
+        pidFd = openat(dirfd(dir), de->d_name, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+        if (pidFd < 0) {
+            goto next;
+        }
+        if (fstat(pidFd, &sb) != 0) {
+            PLOG(WARNING) << "Failed to stat " << de->d_name;
+            goto next;
+        }
+        if (sb.st_uid != uid) {
+            goto next;
+        }
+
+        // Matches so far, but refuse to touch if in root namespace
+        LOG(DEBUG) << "Found matching PID " << de->d_name;
+        if (readlinkat(pidFd, "ns/mnt", pidName, PATH_MAX) == -1) {
+            PLOG(WARNING) << "Failed to read namespace for " << de->d_name;
+            goto next;
+        }
+        if (!strcmp(rootName, pidName)) {
+            LOG(WARNING) << "Skipping due to root namespace";
+            goto next;
+        }
+
+        // We purposefully leave the namespace open across the fork
+        nsFd = openat(pidFd, "ns/mnt", O_RDONLY);
+        if (nsFd < 0) {
+            PLOG(WARNING) << "Failed to open namespace";
+            goto next;
+        }
+
+        if (!(child = fork())) {
+            if (setns(nsFd, CLONE_NEWNS) != 0) {
+                PLOG(ERROR) << "Failed to setns";
+                _exit(1);
+            }
+
+            // Unmount current view and replace with requested view
+            umount2("/storage", MNT_FORCE);
+
+            std::string storageSource;
+            if (mode == "default") {
+                storageSource = "/mnt/runtime_default";
+            } else if (mode == "read") {
+                storageSource = "/mnt/runtime_read";
+            } else if (mode == "write") {
+                storageSource = "/mnt/runtime_write";
+            } else {
+                // Sane default of no storage visible
+                _exit(0);
+            }
+            if (TEMP_FAILURE_RETRY(mount(storageSource.c_str(), "/storage",
+                    NULL, MS_BIND | MS_REC | MS_SLAVE, NULL)) == -1) {
+                PLOG(WARNING) << "Failed to mount " << storageSource;
+                return false;
+            }
+            _exit(0);
+        }
+
+        if (child == -1) {
+            PLOG(ERROR) << "Failed to fork";
+            goto next;
+        } else {
+            TEMP_FAILURE_RETRY(waitpid(child, nullptr, 0));
+        }
+
+next:
+        close(nsFd);
+        close(pidFd);
+    }
+    closedir(dir);
+    return 0;
+}
+
 int VolumeManager::reset() {
     // Tear down all existing disks/volumes and start from a blank slate so
     // newly connected framework hears all events.
diff --git a/VolumeManager.h b/VolumeManager.h
index 8620d25..6c094fc 100644
--- a/VolumeManager.h
+++ b/VolumeManager.h
@@ -129,6 +129,8 @@
 
     int setPrimary(const std::shared_ptr<android::vold::VolumeBase>& vol);
 
+    int remountUid(uid_t uid, const std::string& mode);
+
     /* Reset all internal state, typically during framework boot */
     int reset();
     /* Prepare for device shutdown, safely unmounting all devices */