ceph: sync read/write considers page cache
In the cases where we either do a sync read or a write, we
need to make sure that everything in the page cache is flushed.
In the case of a sync write we invalidate the relevant pages,
so that subsequent read/write reflects the new data written.
Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 43bd2f2..bbf1ccf 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -409,7 +409,7 @@
i++;
}
while (len >= PAGE_CACHE_SIZE) {
- dout("zeroing %d %p\n", i, pages[i]);
+ dout("zeroing %d %p len=%d\n", i, pages[i], len);
zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
len -= PAGE_CACHE_SIZE;
i++;
@@ -542,13 +542,16 @@
* but it will at least behave sensibly when they are
* in sequence.
*/
- filemap_write_and_wait(inode->i_mapping);
} else {
pages = alloc_page_vector(num_pages);
}
if (IS_ERR(pages))
return PTR_ERR(pages);
+ ret = filemap_write_and_wait(inode->i_mapping);
+ if (ret < 0)
+ goto done;
+
ret = striped_read(inode, off, len, pages, num_pages);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
@@ -556,6 +559,7 @@
if (ret >= 0)
*poff = off + ret;
+done:
if (file->f_flags & O_DIRECT)
put_page_vector(pages, num_pages);
else
@@ -617,6 +621,16 @@
else
pos = *offset;
+ ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
+ if (ret < 0)
+ return ret;
+
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + left) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
+
flags = CEPH_OSD_FLAG_ORDERSNAP |
CEPH_OSD_FLAG_ONDISK |
CEPH_OSD_FLAG_WRITE;