Btrfs: set page->private to the eb
We spend a lot of time looking up extent buffers from pages when we could just
store the pointer to the eb the page is associated with in page->private. This
patch does just that, and it makes things a little simpler and reduces a bit of
CPU overhead involved with doing metadata IO. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 68fc93e..bc88649 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -403,39 +403,28 @@
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 found_start;
- unsigned long len;
struct extent_buffer *eb;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
- goto out;
- if (!page->private) {
- WARN_ON(1);
- goto out;
- }
- len = page->private >> 2;
- WARN_ON(len == 0);
-
- eb = find_extent_buffer(tree, start, len);
+ eb = (struct extent_buffer *)page->private;
+ if (page != eb->pages[0])
+ return 0;
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
WARN_ON(1);
- goto err;
+ return 0;
}
if (eb->pages[0] != page) {
WARN_ON(1);
- goto err;
+ return 0;
}
if (!PageUptodate(page)) {
WARN_ON(1);
- goto err;
+ return 0;
}
csum_tree_block(root, eb, 0);
-err:
- free_extent_buffer(eb);
-out:
return 0;
}
@@ -566,7 +555,6 @@
struct extent_io_tree *tree;
u64 found_start;
int found_level;
- unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
int ret = 0;
@@ -576,13 +564,8 @@
goto out;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- len = page->private >> 2;
+ eb = (struct extent_buffer *)page->private;
- eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
- if (!eb) {
- ret = -EIO;
- goto out;
- }
reads_done = atomic_dec_and_test(&eb->pages_reading);
if (!reads_done)
goto err;
@@ -631,7 +614,6 @@
if (ret && eb)
clear_extent_buffer_uptodate(tree, eb, NULL);
- free_extent_buffer(eb);
out:
return ret;
}
@@ -640,31 +622,17 @@
struct page *page, u64 start, u64 end,
int mirror_num, struct extent_state *state)
{
- struct extent_io_tree *tree;
- unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
- goto out;
- if (!page->private)
- goto out;
-
- len = page->private >> 2;
- WARN_ON(len == 0);
-
- eb = alloc_extent_buffer(tree, start, len);
- if (eb == NULL)
- goto out;
+ eb = (struct extent_buffer *)page->private;
+ if (page != eb->pages[0])
+ return -EIO;
if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
btree_readahead_hook(root, eb, eb->start, -EIO);
}
- free_extent_buffer(eb);
-
-out:
return -EIO; /* we fixed nothing */
}
@@ -955,10 +923,8 @@
static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct extent_io_tree *tree;
struct extent_map_tree *map;
- struct extent_buffer *eb;
- struct btrfs_root *root;
+ struct extent_io_tree *tree;
int ret;
if (PageWriteback(page) || PageDirty(page))
@@ -967,13 +933,6 @@
tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree;
- root = BTRFS_I(page->mapping->host)->root;
- if (page->private == EXTENT_PAGE_PRIVATE) {
- eb = find_eb_for_page(tree, page, max(root->leafsize, root->nodesize));
- free_extent_buffer(eb);
- if (eb)
- return 0;
- }
/*
* We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
* slab allocation from alloc_extent_state down the callchain where
@@ -985,14 +944,7 @@
if (!ret)
return 0;
- ret = try_release_extent_buffer(tree, page);
- if (ret == 1) {
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page_cache_release(page);
- }
-
- return ret;
+ return try_release_extent_buffer(tree, page);
}
static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -3219,17 +3171,21 @@
{
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_buffer *eb;
- unsigned long len;
- u64 bytenr = page_offset(page);
- if (page->private == EXTENT_PAGE_PRIVATE)
+ /*
+ * We culled this eb but the page is still hanging out on the mapping,
+ * carry on.
+ */
+ if (!PagePrivate(page))
goto out;
- len = page->private >> 2;
- eb = find_extent_buffer(io_tree, bytenr, len);
- if (!eb)
+ eb = (struct extent_buffer *)page->private;
+ if (!eb) {
+ WARN_ON(1);
+ goto out;
+ }
+ if (page != eb->pages[0])
goto out;
if (!btrfs_try_tree_write_lock(eb)) {
@@ -3248,7 +3204,6 @@
}
btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
out:
if (!trylock_page(page)) {
flush_fn(data);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c6c9ce4..0381b60 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2473,6 +2473,17 @@
return ret;
}
+void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
+{
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ set_page_private(page, (unsigned long)eb);
+ } else {
+ WARN_ON(page->private != (unsigned long)eb);
+ }
+}
+
void set_page_extent_mapped(struct page *page)
{
if (!PagePrivate(page)) {
@@ -2482,12 +2493,6 @@
}
}
-static void set_page_extent_head(struct page *page, unsigned long len)
-{
- WARN_ON(!PagePrivate(page));
- set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
-}
-
/*
* basic readpage implementation. Locked extent state structs are inserted
* into the tree that are removed when the IO is done (by the end_io
@@ -3585,6 +3590,7 @@
return NULL;
eb->start = start;
eb->len = len;
+ eb->tree = tree;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
@@ -3637,8 +3643,31 @@
do {
index--;
page = extent_buffer_page(eb, index);
- if (page)
+ if (page) {
+ spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ /*
+ * We need to make sure we haven't be attached
+ * to a new eb.
+ */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
+ page_cache_release(page);
+ }
+ spin_unlock(&page->mapping->private_lock);
+
+ /* One for when we alloced the page */
page_cache_release(page);
+ }
} while (index != start_idx);
}
@@ -3683,6 +3712,32 @@
WARN_ON(1);
goto free_eb;
}
+
+ spin_lock(&mapping->private_lock);
+ if (PagePrivate(p)) {
+ /*
+ * We could have already allocated an eb for this page
+ * and attached one so lets see if we can get a ref on
+ * the existing eb, and if we can we know it's good and
+ * we can just return that one, else we know we can just
+ * overwrite page->private.
+ */
+ exists = (struct extent_buffer *)p->private;
+ if (atomic_inc_not_zero(&exists->refs)) {
+ spin_unlock(&mapping->private_lock);
+ unlock_page(p);
+ goto free_eb;
+ }
+
+ /*
+ * Do this so attach doesn't complain and we need to
+ * drop the ref the old guy had.
+ */
+ ClearPagePrivate(p);
+ page_cache_release(p);
+ }
+ attach_extent_buffer_page(eb, p);
+ spin_unlock(&mapping->private_lock);
mark_page_accessed(p);
eb->pages[i] = p;
if (!PageUptodate(p))
@@ -3705,7 +3760,6 @@
if (ret == -EEXIST) {
exists = radix_tree_lookup(&tree->buffer,
start >> PAGE_CACHE_SHIFT);
- /* add one reference for the caller */
atomic_inc(&exists->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
@@ -3725,12 +3779,9 @@
* after the extent buffer is in the radix tree so
* it doesn't get lost
*/
- set_page_extent_mapped(eb->pages[0]);
- set_page_extent_head(eb->pages[0], eb->len);
SetPageChecked(eb->pages[0]);
for (i = 1; i < num_pages; i++) {
p = extent_buffer_page(eb, i);
- set_page_extent_mapped(p);
ClearPageChecked(p);
unlock_page(p);
}
@@ -3794,10 +3845,6 @@
lock_page(page);
WARN_ON(!PagePrivate(page));
- set_page_extent_mapped(page);
- if (i == 0)
- set_page_extent_head(page, eb->len);
-
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
if (!PageDirty(page)) {
@@ -4010,9 +4057,6 @@
atomic_set(&eb->pages_reading, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
- set_page_extent_mapped(page);
- if (i == 0)
- set_page_extent_head(page, eb->len);
if (!PageUptodate(page)) {
ClearPageError(page);
err = __extent_read_full_page(tree, page,
@@ -4395,22 +4439,19 @@
struct extent_buffer *eb =
container_of(head, struct extent_buffer, rcu_head);
- btrfs_release_extent_buffer(eb);
+ __free_extent_buffer(eb);
}
int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
- struct extent_buffer *eb;
+ struct extent_buffer *eb = (struct extent_buffer *)page->private;
int ret = 1;
- spin_lock(&tree->buffer_lock);
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (!eb) {
- spin_unlock(&tree->buffer_lock);
- return ret;
- }
+ if (!PagePrivate(page) || !eb)
+ return 1;
+ spin_lock(&tree->buffer_lock);
if (atomic_read(&eb->refs) > 1 ||
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
ret = 0;
@@ -4426,6 +4467,7 @@
goto out;
}
radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ btrfs_release_extent_buffer_page(eb, 0);
out:
spin_unlock(&tree->buffer_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e38a3d..83e432d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -127,6 +127,7 @@
unsigned long map_start;
unsigned long map_len;
unsigned long bflags;
+ struct extent_io_tree *tree;
atomic_t refs;
atomic_t pages_reading;
struct list_head leak_list;