Btrfs: properly honor wbc->nr_to_write changes
When btrfs fills a delayed allocation, it tries to increase
the wbc nr_to_write to cover a big part of allocation. The
theory is that we're doing contiguous IO and writing a few
more blocks will save seeks overall at a very low cost.
The problem is that extent_write_cache_pages could ignore
the new higher nr_to_write if nr_to_write had already gone
down to zero. We fix that by rechecking the nr_to_write
for every page that is processed in the pagevec.
This updates the math around bumping the nr_to_write value
to make sure we don't leave a tiny amount of IO hanging
around for the very end of a new extent.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a102422..7e16c6d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2182,7 +2182,7 @@
delalloc_end = 0;
page_started = 0;
if (!epd->extent_locked) {
- u64 delalloc_to_write;
+ u64 delalloc_to_write = 0;
/*
* make sure the wbc mapping index is at least updated
* to this page.
@@ -2202,16 +2202,24 @@
tree->ops->fill_delalloc(inode, page, delalloc_start,
delalloc_end, &page_started,
&nr_written);
- delalloc_to_write = (delalloc_end -
- max_t(u64, page_offset(page),
- delalloc_start) + 1) >>
- PAGE_CACHE_SHIFT;
- if (wbc->nr_to_write < delalloc_to_write) {
- wbc->nr_to_write = min_t(long, 8192,
- delalloc_to_write);
- }
+ /*
+ * delalloc_end is already one less than the total
+ * length, so we don't subtract one from
+ * PAGE_CACHE_SIZE
+ */
+ delalloc_to_write += (delalloc_end - delalloc_start +
+ PAGE_CACHE_SIZE) >>
+ PAGE_CACHE_SHIFT;
delalloc_start = delalloc_end + 1;
}
+ if (wbc->nr_to_write < delalloc_to_write) {
+ int thresh = 8192;
+
+ if (delalloc_to_write < thresh * 2)
+ thresh = delalloc_to_write;
+ wbc->nr_to_write = min_t(u64, delalloc_to_write,
+ thresh);
+ }
/* did the fill delalloc function already unlock and start
* the IO?
@@ -2388,6 +2396,7 @@
{
int ret = 0;
int done = 0;
+ int nr_to_write_done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t index;
@@ -2407,7 +2416,7 @@
scanned = 1;
}
retry:
- while (!done && (index <= end) &&
+ while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY, min(end - index,
(pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2458,8 +2467,15 @@
unlock_page(page);
ret = 0;
}
- if (ret || wbc->nr_to_write <= 0)
+ if (ret)
done = 1;
+
+ /*
+ * the filesystem may choose to bump up nr_to_write.
+ * We have to make sure to honor the new nr_to_write
+ * at any time
+ */
+ nr_to_write_done = wbc->nr_to_write <= 0;
}
pagevec_release(&pvec);
cond_resched();