orangefs: Fix revalidate.
Previously, it would update a live inode. This was fixed, but it did not
ever check that the inode attributes in the dcache are correct. This
checks all inode attributes and rejects any that are not correct, which
causes a lookup and thus a new getattr.
Perhaps inode_operations->permission should replace or augment some of
this.
There is no actual caching, and this does a rather excessive amount of
network operations back to the filesystem server.
Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index 0419981..e8fb79d 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -43,24 +43,34 @@
err = service_operation(new_op, "orangefs_lookup",
get_interruptible_flag(parent_inode));
- if (err)
- goto out_drop;
- if (new_op->downcall.status != 0 ||
- !match_handle(new_op->downcall.resp.lookup.refn.khandle, inode)) {
- gossip_debug(GOSSIP_DCACHE_DEBUG,
- "%s:%s:%d "
- "lookup failure |%s| or no match |%s|.\n",
- __FILE__,
- __func__,
- __LINE__,
- new_op->downcall.status ? "true" : "false",
- match_handle(new_op->downcall.resp.lookup.refn.khandle,
- inode) ? "false" : "true");
- gossip_debug(GOSSIP_DCACHE_DEBUG,
- "%s:%s:%d revalidate failed\n",
- __FILE__, __func__, __LINE__);
- goto out_drop;
+ /* Positive dentry: reject if error or not the same inode. */
+ if (inode) {
+ if (err) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d lookup failure.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
+ if (!match_handle(new_op->downcall.resp.lookup.refn.khandle,
+ inode)) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d no match.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
+
+ /* Negative dentry: reject if success or error other than ENOENT. */
+ } else {
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n",
+ __func__);
+ if (!err || err != -ENOENT) {
+ if (new_op->downcall.status != 0)
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d lookup failure.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
}
ret = 1;
@@ -70,6 +80,8 @@
dput(parent_dentry);
return ret;
out_drop:
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n",
+ __FILE__, __func__, __LINE__);
d_drop(dentry);
goto out_release_op;
}
@@ -81,8 +93,7 @@
*/
static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct inode *inode;
- int ret = 0;
+ int ret;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -90,29 +101,42 @@
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n",
__func__, dentry);
- /* find inode from dentry */
- if (!dentry->d_inode) {
- gossip_debug(GOSSIP_DCACHE_DEBUG,
- "%s: negative dentry.\n",
- __func__);
- goto out;
- }
-
- gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: inode valid.\n", __func__);
- inode = dentry->d_inode;
-
/* skip root handle lookups. */
- if (is_root_handle(inode)) {
- ret = 1;
- goto out;
+ if (dentry->d_inode && is_root_handle(dentry->d_inode))
+ return 1;
+
+ /*
+ * If this passes, the positive dentry still exists or the negative
+ * dentry still does not exist.
+ */
+ if (!orangefs_revalidate_lookup(dentry)) {
+ d_drop(dentry);
+ return 0;
}
- /* lookup the object. */
- if (orangefs_revalidate_lookup(dentry))
- ret = 1;
+ /* We do not need to continue with negative dentries. */
+ if (!dentry->d_inode)
+ goto out;
+
+ /* Now we must perform a getattr to validate the inode contents. */
+ ret = orangefs_inode_getattr(dentry->d_inode,
+ ORANGEFS_ATTR_SYS_ALL_NOHINT, 1);
+ if (ret < 0) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n",
+ __FILE__, __func__, __LINE__);
+ d_drop(dentry);
+ return 0;
+ }
+ if (ret == 0) {
+ d_drop(dentry);
+ return 0;
+ }
out:
- return ret;
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s: negative dentry or positive dentry and inode valid.\n",
+ __func__);
+ return 1;
}
const struct dentry_operations orangefs_dentry_operations = {
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index c585063..7e6fe8d 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -467,7 +467,7 @@
/* Make sure generic_write_checks sees an up to date inode size. */
if (file->f_flags & O_APPEND) {
rc = orangefs_inode_getattr(file->f_mapping->host,
- ORANGEFS_ATTR_SYS_SIZE);
+ ORANGEFS_ATTR_SYS_SIZE, 0);
if (rc) {
gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n",
__func__, rc);
@@ -681,7 +681,7 @@
* NOTE: We are only interested in file size here,
* so we set mask accordingly.
*/
- ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE);
+ ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE, 0);
if (ret) {
gossip_debug(GOSSIP_FILE_DEBUG,
"%s:%s:%d calling make bad inode\n",
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 4724c92b..040cd95 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -273,7 +273,7 @@
* fields/attributes of the inode would be refreshed. So again, we
* dont have too much of a choice but refresh all the attributes.
*/
- ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT);
+ ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
if (ret == 0) {
generic_fillattr(inode, kstat);
/* override block size reported to stat */
@@ -392,7 +392,7 @@
if (!inode || !(inode->i_state & I_NEW))
return inode;
- error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT);
+ error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
if (error) {
iget_failed(inode);
return ERR_PTR(error);
@@ -437,7 +437,7 @@
orangefs_set_inode(inode, ref);
inode->i_ino = hash; /* needed for stat etc */
- error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT);
+ error = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_ALL_NOHINT, 0);
if (error)
goto out_iput;
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 9c87676..3e25855 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -561,7 +561,7 @@
size_t size,
int flags);
-int orangefs_inode_getattr(struct inode *inode, __u32 mask);
+int orangefs_inode_getattr(struct inode *inode, __u32 mask, int check);
int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 035f050..6cf29a4 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -353,11 +353,91 @@
return 0;
}
+static int compare_attributes_to_inode(struct inode *inode,
+ struct ORANGEFS_sys_attr_s *attrs,
+ char *symname)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ loff_t inode_size, rounded_up_size;
+
+ /* Compare file size. */
+
+ switch (attrs->objtype) {
+ case ORANGEFS_TYPE_METAFILE:
+ if(inode->i_flags != orangefs_inode_flags(attrs))
+ return 0;
+ inode_size = attrs->size;
+ rounded_up_size = inode_size + (4096 - (inode_size % 4096));
+ if (inode->i_bytes != inode_size ||
+ inode->i_blocks != rounded_up_size/512)
+ return 0;
+ break;
+ case ORANGEFS_TYPE_SYMLINK:
+ if (symname && strlen(symname) != inode->i_size)
+ return 0;
+ break;
+ default:
+ if (inode->i_size != PAGE_CACHE_SIZE &&
+ inode_get_bytes(inode) != PAGE_CACHE_SIZE)
+ return 0;
+ }
+
+ /* Compare general attributes. */
+
+ if (!uid_eq(inode->i_uid, make_kuid(&init_user_ns, attrs->owner)) ||
+ !gid_eq(inode->i_gid, make_kgid(&init_user_ns, attrs->group)) ||
+ inode->i_atime.tv_sec != attrs->atime ||
+ inode->i_mtime.tv_sec != attrs->mtime ||
+ inode->i_ctime.tv_sec != attrs->ctime ||
+ inode->i_atime.tv_nsec != 0 ||
+ inode->i_mtime.tv_nsec != 0 ||
+ inode->i_ctime.tv_nsec != 0)
+ return 0;
+
+ if ((inode->i_mode & ~(S_ISVTX|S_IFREG|S_IFDIR|S_IFLNK)) !=
+ orangefs_inode_perms(attrs))
+ return 0;
+
+ if (is_root_handle(inode))
+ if (!(inode->i_mode & S_ISVTX))
+ return 0;
+
+ /* Compare file type. */
+
+ switch (attrs->objtype) {
+ case ORANGEFS_TYPE_METAFILE:
+ if (!(inode->i_mode & S_IFREG))
+ return 0;
+ break;
+ case ORANGEFS_TYPE_DIRECTORY:
+ if (!(inode->i_mode & S_IFDIR))
+ return 0;
+ if (inode->i_nlink != 1)
+ return 0;
+ break;
+ case ORANGEFS_TYPE_SYMLINK:
+ if (!(inode->i_mode & S_IFLNK))
+ return 0;
+ if (orangefs_inode && symname)
+ if (strcmp(orangefs_inode->link_target, symname))
+ return 0;
+ break;
+ default:
+ gossip_err("orangefs: compare_attributes_to_inode: got invalid attribute type %x\n",
+ attrs->objtype);
+
+ }
+
+ return 1;
+}
+
/*
- * issues a orangefs getattr request and fills in the appropriate inode
- * attributes if successful. returns 0 on success; -errno otherwise
+ * Issues a orangefs getattr request and fills in the appropriate inode
+ * attributes if successful. When check is 0, returns 0 on success and -errno
+ * otherwise. When check is 1, returns 1 on success where the inode is valid
+ * and 0 on success where the inode is stale and -errno otherwise.
*/
-int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask)
+int orangefs_inode_getattr(struct inode *inode, __u32 getattr_mask, int check)
{
struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
struct orangefs_kernel_op_s *new_op;
@@ -379,27 +459,46 @@
if (ret != 0)
goto out;
- if (copy_attributes_to_inode(inode,
- &new_op->downcall.resp.getattr.attributes,
- new_op->downcall.resp.getattr.link_target)) {
- gossip_err("%s: failed to copy attributes\n", __func__);
- ret = -ENOENT;
- goto out;
- }
+ if (check) {
+ ret = compare_attributes_to_inode(inode,
+ &new_op->downcall.resp.getattr.attributes,
+ new_op->downcall.resp.getattr.link_target);
- /*
- * Store blksize in orangefs specific part of inode structure; we are
- * only going to use this to report to stat to make sure it doesn't
- * perturb any inode related code paths.
- */
- if (new_op->downcall.resp.getattr.attributes.objtype ==
- ORANGEFS_TYPE_METAFILE) {
- orangefs_inode->blksize =
- new_op->downcall.resp.getattr.attributes.blksize;
+ if (new_op->downcall.resp.getattr.attributes.objtype ==
+ ORANGEFS_TYPE_METAFILE) {
+ if (orangefs_inode->blksize !=
+ new_op->downcall.resp.getattr.attributes.blksize)
+ ret = 0;
+ } else {
+ if (orangefs_inode->blksize != 1 << inode->i_blkbits)
+ ret = 0;
+ }
} else {
- /* mimic behavior of generic_fillattr() for other types. */
- orangefs_inode->blksize = (1 << inode->i_blkbits);
+ if (copy_attributes_to_inode(inode,
+ &new_op->downcall.resp.getattr.attributes,
+ new_op->downcall.resp.getattr.link_target)) {
+ gossip_err("%s: failed to copy attributes\n", __func__);
+ ret = -ENOENT;
+ goto out;
+ }
+ /*
+ * Store blksize in orangefs specific part of inode structure;
+ * we are only going to use this to report to stat to make sure
+ * it doesn't perturb any inode related code paths.
+ */
+ if (new_op->downcall.resp.getattr.attributes.objtype ==
+ ORANGEFS_TYPE_METAFILE) {
+ orangefs_inode->blksize = new_op->downcall.resp.
+ getattr.attributes.blksize;
+ } else {
+ /*
+ * mimic behavior of generic_fillattr() for other file
+ * types.
+ */
+ orangefs_inode->blksize = (1 << inode->i_blkbits);
+
+ }
}
out: