nfsd: support ext4 i_version
ext4 supports a real NFSv4 change attribute, which is bumped whenever
the ctime would be updated, including times when two updates arrive
within a jiffy of each other. (Note that although ext4 has space for
nanosecond-precision ctime, the real resolution is lower: it actually
uses jiffies as the time-source.) This ensures clients will invalidate
their caches when they need to.
There is some fear that keeping the i_version up-to-date could have
performance drawbacks, so for now it's turned on only by a mount option.
We hope to do something better eventually.
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Theodore Tso <tytso@mit.edu>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4a71fcd..12d36a7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1490,13 +1490,41 @@
memcpy(p, ptr, nbytes); \
p += XDR_QUADLEN(nbytes); \
}} while (0)
-#define WRITECINFO(c) do { \
- *p++ = htonl(c.atomic); \
- *p++ = htonl(c.before_ctime_sec); \
- *p++ = htonl(c.before_ctime_nsec); \
- *p++ = htonl(c.after_ctime_sec); \
- *p++ = htonl(c.after_ctime_nsec); \
-} while (0)
+
+static void write32(__be32 **p, u32 n)
+{
+ *(*p)++ = n;
+}
+
+static void write64(__be32 **p, u64 n)
+{
+ write32(p, (u32)(n >> 32));
+ write32(p, (u32)n);
+}
+
+static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+{
+ if (IS_I_VERSION(inode)) {
+ write64(p, inode->i_version);
+ } else {
+ write32(p, stat->ctime.tv_sec);
+ write32(p, stat->ctime.tv_nsec);
+ }
+}
+
+static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+{
+ write32(p, c->atomic);
+ if (c->change_supported) {
+ write64(p, c->before_change);
+ write64(p, c->after_change);
+ } else {
+ write32(p, c->before_ctime_sec);
+ write32(p, c->before_ctime_nsec);
+ write32(p, c->after_ctime_sec);
+ write32(p, c->after_ctime_nsec);
+ }
+}
#define RESERVE_SPACE(nbytes) do { \
p = resp->p; \
@@ -1849,16 +1877,9 @@
WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
}
if (bmval0 & FATTR4_WORD0_CHANGE) {
- /*
- * Note: This _must_ be consistent with the scheme for writing
- * change_info, so any changes made here must be reflected there
- * as well. (See xdr4.h:set_change_info() and the WRITECINFO()
- * macro above.)
- */
if ((buflen -= 8) < 0)
goto out_resource;
- WRITE32(stat.ctime.tv_sec);
- WRITE32(stat.ctime.tv_nsec);
+ write_change(&p, &stat, dentry->d_inode);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
if ((buflen -= 8) < 0)
@@ -2364,7 +2385,7 @@
if (!nfserr) {
RESERVE_SPACE(32);
- WRITECINFO(create->cr_cinfo);
+ write_cinfo(&p, &create->cr_cinfo);
WRITE32(2);
WRITE32(create->cr_bmval[0]);
WRITE32(create->cr_bmval[1]);
@@ -2475,7 +2496,7 @@
if (!nfserr) {
RESERVE_SPACE(20);
- WRITECINFO(link->li_cinfo);
+ write_cinfo(&p, &link->li_cinfo);
ADJUST_ARGS();
}
return nfserr;
@@ -2493,7 +2514,7 @@
nfsd4_encode_stateid(resp, &open->op_stateid);
RESERVE_SPACE(40);
- WRITECINFO(open->op_cinfo);
+ write_cinfo(&p, &open->op_cinfo);
WRITE32(open->op_rflags);
WRITE32(2);
WRITE32(open->op_bmval[0]);
@@ -2771,7 +2792,7 @@
if (!nfserr) {
RESERVE_SPACE(20);
- WRITECINFO(remove->rm_cinfo);
+ write_cinfo(&p, &remove->rm_cinfo);
ADJUST_ARGS();
}
return nfserr;
@@ -2784,8 +2805,8 @@
if (!nfserr) {
RESERVE_SPACE(40);
- WRITECINFO(rename->rn_sinfo);
- WRITECINFO(rename->rn_tinfo);
+ write_cinfo(&p, &rename->rn_sinfo);
+ write_cinfo(&p, &rename->rn_tinfo);
ADJUST_ARGS();
}
return nfserr;