vfs: add i_op->atomic_open()
Add a new inode operation which is called on the last component of an open.
Using this the filesystem can look up, possibly create and open the file in one
atomic operation. If it cannot perform this (e.g. the file type turned out to
be wrong) it may signal this by returning NULL instead of an open struct file
pointer.
i_op->atomic_open() is only called if the last component is negative or needs
lookup. Handling cached positive dentries here doesn't add much value: these
can be opened using f_op->open(). If the cached file turns out to be invalid,
the open can be retried, this time using ->atomic_open() with a fresh dentry.
For now leave the old way of using open intents in lookup and revalidate in
place. This will be removed once all the users are converted.
David Howells noticed that if ->atomic_open() opens the file but does not create
it, handle_truncate() will be called on it even if it is not a regular file.
Fix this by checking the file type in this case too.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/fs/namei.c b/fs/namei.c
index ccb0eb1..9e11ae8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,6 +2196,176 @@
return flag;
}
+static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+{
+ int error = security_path_mknod(dir, dentry, mode, 0);
+ if (error)
+ return error;
+
+ error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+
+ return security_inode_create(dir->dentry->d_inode, dentry, mode);
+}
+
+static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
+ struct path *path, const struct open_flags *op,
+ int *want_write, bool need_lookup,
+ bool *created)
+{
+ struct inode *dir = nd->path.dentry->d_inode;
+ unsigned open_flag = open_to_namei_flags(op->open_flag);
+ umode_t mode;
+ int error;
+ int acc_mode;
+ struct opendata od;
+ struct file *filp;
+ int create_error = 0;
+ struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+
+ BUG_ON(dentry->d_inode);
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir))) {
+ filp = ERR_PTR(-ENOENT);
+ goto out;
+ }
+
+ mode = op->mode & S_IALLUGO;
+ if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
+ mode &= ~current_umask();
+
+ if (open_flag & O_EXCL) {
+ open_flag &= ~O_TRUNC;
+ *created = true;
+ }
+
+ /*
+ * Checking write permission is tricky, bacuse we don't know if we are
+ * going to actually need it: O_CREAT opens should work as long as the
+ * file exists. But checking existence breaks atomicity. The trick is
+ * to check access and if not granted clear O_CREAT from the flags.
+ *
+ * Another problem is returing the "right" error value (e.g. for an
+ * O_EXCL open we want to return EEXIST not EROFS).
+ */
+ if ((open_flag & (O_CREAT | O_TRUNC)) ||
+ (open_flag & O_ACCMODE) != O_RDONLY) {
+ error = mnt_want_write(nd->path.mnt);
+ if (!error) {
+ *want_write = 1;
+ } else if (!(open_flag & O_CREAT)) {
+ /*
+ * No O_CREATE -> atomicity not a requirement -> fall
+ * back to lookup + open
+ */
+ goto no_open;
+ } else if (open_flag & (O_EXCL | O_TRUNC)) {
+ /* Fall back and fail with the right error */
+ create_error = error;
+ goto no_open;
+ } else {
+ /* No side effects, safe to clear O_CREAT */
+ create_error = error;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (open_flag & O_CREAT) {
+ error = may_o_create(&nd->path, dentry, op->mode);
+ if (error) {
+ create_error = error;
+ if (open_flag & O_EXCL)
+ goto no_open;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (nd->flags & LOOKUP_DIRECTORY)
+ open_flag |= O_DIRECTORY;
+
+ od.dentry = DENTRY_NOT_SET;
+ od.mnt = nd->path.mnt;
+ od.filp = &nd->intent.open.file;
+ filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+ created);
+ if (IS_ERR(filp)) {
+ if (WARN_ON(od.dentry != DENTRY_NOT_SET))
+ dput(od.dentry);
+
+ if (create_error && PTR_ERR(filp) == -ENOENT)
+ filp = ERR_PTR(create_error);
+ goto out;
+ }
+
+ acc_mode = op->acc_mode;
+ if (*created) {
+ fsnotify_create(dir, dentry);
+ acc_mode = MAY_OPEN;
+ }
+
+ if (!filp) {
+ if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+ filp = ERR_PTR(-EIO);
+ goto out;
+ }
+ if (od.dentry) {
+ dput(dentry);
+ dentry = od.dentry;
+ }
+ goto looked_up;
+ }
+
+ /*
+ * We didn't have the inode before the open, so check open permission
+ * here.
+ */
+ error = may_open(&filp->f_path, acc_mode, open_flag);
+ if (error)
+ goto out_fput;
+
+ error = open_check_o_direct(filp);
+ if (error)
+ goto out_fput;
+
+out:
+ dput(dentry);
+ return filp;
+
+out_fput:
+ fput(filp);
+ filp = ERR_PTR(error);
+ goto out;
+
+no_open:
+ if (need_lookup) {
+ dentry = lookup_real(dir, dentry, nd);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+ if (create_error) {
+ int open_flag = op->open_flag;
+
+ filp = ERR_PTR(create_error);
+ if ((open_flag & O_EXCL)) {
+ if (!dentry->d_inode)
+ goto out;
+ } else if (!dentry->d_inode) {
+ goto out;
+ } else if ((open_flag & O_TRUNC) &&
+ S_ISREG(dentry->d_inode->i_mode)) {
+ goto out;
+ }
+ /* will fail later, go on to get the right error */
+ }
+ }
+looked_up:
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return NULL;
+}
+
/*
* Lookup, maybe create and open the last component
*
@@ -2219,6 +2389,15 @@
if (IS_ERR(dentry))
return ERR_CAST(dentry);
+ /* Cached positive dentry: will open in f_op->open */
+ if (!need_lookup && dentry->d_inode)
+ goto out_no_open;
+
+ if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
+ return atomic_open(nd, dentry, path, op, want_write,
+ need_lookup, created);
+ }
+
if (need_lookup) {
BUG_ON(dentry->d_inode);
@@ -2251,6 +2430,7 @@
if (error)
goto out_dput;
}
+out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
return NULL;
@@ -2344,8 +2524,16 @@
filp = lookup_open(nd, path, op, &want_write, &created);
mutex_unlock(&dir->d_inode->i_mutex);
- if (IS_ERR(filp))
- goto out;
+ if (filp) {
+ if (IS_ERR(filp))
+ goto out;
+
+ if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+ will_truncate = 0;
+
+ audit_inode(pathname, filp->f_path.dentry);
+ goto opened;
+ }
if (created) {
/* Don't check for write permission, don't truncate */
@@ -2361,6 +2549,16 @@
*/
audit_inode(pathname, path->dentry);
+ /*
+ * If atomic_open() acquired write access it is dropped now due to
+ * possible mount and symlink following (this might be optimized away if
+ * necessary...)
+ */
+ if (want_write) {
+ mnt_drop_write(nd->path.mnt);
+ want_write = 0;
+ }
+
error = -EEXIST;
if (open_flag & O_EXCL)
goto exit_dput;
@@ -2444,6 +2642,7 @@
retried = true;
goto retry_lookup;
}
+opened:
if (!IS_ERR(filp)) {
error = ima_file_check(filp, op->acc_mode);
if (error) {