Blame - fs/btrfs/extent_io.c - SHIFTPHONES/mainline/linux

blob: 409bad3928db394f9782f8236f4d628792cf82e5 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
David Sterba	c1d7c51	2018-04-03 19:23:33 +0200	[diff] [blame]	2
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3	#include <linux/bitops.h>
				4	#include <linux/slab.h>
				5	#include <linux/bio.h>
				6	#include <linux/mm.h>
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7	#include <linux/pagemap.h>
				8	#include <linux/page-flags.h>
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	9	#include <linux/spinlock.h>
				10	#include <linux/blkdev.h>
				11	#include <linux/swap.h>
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	12	#include <linux/writeback.h>
				13	#include <linux/pagevec.h>
Linus Torvalds	268bb0c	2011-05-20 12:50:29 -0700	[diff] [blame]	14	#include <linux/prefetch.h>
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	15	#include <linux/fsverity.h>
Johannes Thumshirn	cea6280	2021-03-16 19:04:01 +0900	[diff] [blame]	16	#include "misc.h"
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	17	#include "extent_io.h"
Josef Bacik	9c7d3a5	2019-09-23 10:05:19 -0400	[diff] [blame]	18	#include "extent-io-tree.h"
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	19	#include "extent_map.h"
David Woodhouse	902b22f	2008-08-20 08:51:49 -0400	[diff] [blame]	20	#include "ctree.h"
				21	#include "btrfs_inode.h"
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	22	#include "volumes.h"
Stefan Behrens	21adbd5	2011-11-09 13:44:05 +0100	[diff] [blame]	23	#include "check-integrity.h"
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	24	#include "locking.h"
Josef Bacik	606686e	2012-06-04 14:03:51 -0400	[diff] [blame]	25	#include "rcu-string.h"
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	26	#include "backref.h"
David Sterba	6af49db	2017-06-23 04:09:57 +0200	[diff] [blame]	27	#include "disk-io.h"
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	28	#include "subpage.h"
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	29	#include "zoned.h"
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	30	#include "block-group.h"
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	31
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	32	static struct kmem_cache *extent_state_cache;
				33	static struct kmem_cache *extent_buffer_cache;
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	34	static struct bio_set btrfs_bioset;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	35
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	36	static inline bool extent_state_in_tree(const struct extent_state *state)
				37	{
				38	return !RB_EMPTY_NODE(&state->rb_node);
				39	}
				40
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	41	#ifdef CONFIG_BTRFS_DEBUG
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	42	static LIST_HEAD(states);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	43	static DEFINE_SPINLOCK(leak_lock);
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	44
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	45	static inline void btrfs_leak_debug_add(spinlock_t *lock,
				46	struct list_head *new,
				47	struct list_head *head)
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	48	{
				49	unsigned long flags;
				50
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	51	spin_lock_irqsave(lock, flags);
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	52	list_add(new, head);
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	53	spin_unlock_irqrestore(lock, flags);
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	54	}
				55
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	56	static inline void btrfs_leak_debug_del(spinlock_t *lock,
				57	struct list_head *entry)
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	58	{
				59	unsigned long flags;
				60
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	61	spin_lock_irqsave(lock, flags);
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	62	list_del(entry);
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	63	spin_unlock_irqrestore(lock, flags);
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	64	}
				65
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	66	void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
Josef Bacik	33ca832	2019-09-23 10:05:17 -0400	[diff] [blame]	67	{
				68	struct extent_buffer *eb;
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	69	unsigned long flags;
Josef Bacik	33ca832	2019-09-23 10:05:17 -0400	[diff] [blame]	70
Josef Bacik	8c38938	2020-02-14 16:11:42 -0500	[diff] [blame]	71	/*
				72	* If we didn't get into open_ctree our allocated_ebs will not be
				73	* initialized, so just skip this.
				74	*/
				75	if (!fs_info->allocated_ebs.next)
				76	return;
				77
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	78	spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
				79	while (!list_empty(&fs_info->allocated_ebs)) {
				80	eb = list_first_entry(&fs_info->allocated_ebs,
				81	struct extent_buffer, leak_list);
Josef Bacik	8c38938	2020-02-14 16:11:42 -0500	[diff] [blame]	82	pr_err(
				83	"BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
				84	eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
				85	btrfs_header_owner(eb));
Josef Bacik	33ca832	2019-09-23 10:05:17 -0400	[diff] [blame]	86	list_del(&eb->leak_list);
				87	kmem_cache_free(extent_buffer_cache, eb);
				88	}
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	89	spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
Josef Bacik	33ca832	2019-09-23 10:05:17 -0400	[diff] [blame]	90	}
				91
				92	static inline void btrfs_extent_state_leak_debug_check(void)
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	93	{
				94	struct extent_state *state;
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	95
				96	while (!list_empty(&states)) {
				97	state = list_entry(states.next, struct extent_state, leak_list);
David Sterba	9ee49a04	2015-01-14 19:52:13 +0100	[diff] [blame]	98	pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	99	state->start, state->end, state->state,
				100	extent_state_in_tree(state),
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	101	refcount_read(&state->refs));
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	102	list_del(&state->leak_list);
				103	kmem_cache_free(extent_state_cache, state);
				104	}
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	105	}
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	106
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	107	#define btrfs_debug_check_extent_io_range(tree, start, end) \
				108	__btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	109	static inline void __btrfs_debug_check_extent_io_range(const char *caller,
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	110	struct extent_io_tree *tree, u64 start, u64 end)
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	111	{
Nikolay Borisov	65a680f	2018-11-01 14:09:49 +0200	[diff] [blame]	112	struct inode *inode = tree->private_data;
				113	u64 isize;
				114
				115	if (!inode \|\| !is_data_inode(inode))
				116	return;
				117
				118	isize = i_size_read(inode);
				119	if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
				120	btrfs_debug_rl(BTRFS_I(inode)->root->fs_info,
				121	"%s: ino %llu isize %llu odd range [%llu,%llu]",
				122	caller, btrfs_ino(BTRFS_I(inode)), isize, start, end);
				123	}
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	124	}
Eric Sandeen	6d49ba1	2013-04-22 16:12:31 +0000	[diff] [blame]	125	#else
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	126	#define btrfs_leak_debug_add(lock, new, head) do {} while (0)
				127	#define btrfs_leak_debug_del(lock, entry) do {} while (0)
Josef Bacik	33ca832	2019-09-23 10:05:17 -0400	[diff] [blame]	128	#define btrfs_extent_state_leak_debug_check() do {} while (0)
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	129	#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	130	#endif
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	131
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	132	struct tree_entry {
				133	u64 start;
				134	u64 end;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	135	struct rb_node rb_node;
				136	};
				137
				138	struct extent_page_data {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	139	struct btrfs_bio_ctrl bio_ctrl;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	140	/* tells writepage not to lock the state bits for this range
				141	* it still does the unlocking
				142	*/
Chris Mason	ffbd517	2009-04-20 15:50:09 -0400	[diff] [blame]	143	unsigned int extent_locked:1;
				144
Christoph Hellwig	70fd761	2016-11-01 07:40:10 -0600	[diff] [blame]	145	/* tells the submit_bio code to use REQ_SYNC */
Chris Mason	ffbd517	2009-04-20 15:50:09 -0400	[diff] [blame]	146	unsigned int sync_io:1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	147	};
				148
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	149	static int add_extent_changeset(struct extent_state *state, u32 bits,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	150	struct extent_changeset *changeset,
				151	int set)
				152	{
				153	int ret;
				154
				155	if (!changeset)
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	156	return 0;
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	157	if (set && (state->state & bits) == bits)
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	158	return 0;
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	159	if (!set && (state->state & bits) == 0)
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	160	return 0;
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	161	changeset->bytes_changed += state->end - state->start + 1;
David Sterba	53d3235	2017-02-13 13:42:29 +0100	[diff] [blame]	162	ret = ulist_add(&changeset->range_changed, state->start, state->end,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	163	GFP_ATOMIC);
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	164	return ret;
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	165	}
				166
Nikolay Borisov	c1be9c1	2020-09-14 12:37:08 +0300	[diff] [blame]	167	int __must_check submit_one_bio(struct bio *bio, int mirror_num,
				168	unsigned long bio_flags)
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	169	{
				170	blk_status_t ret = 0;
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	171	struct extent_io_tree *tree = bio->bi_private;
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	172
				173	bio->bi_private = NULL;
				174
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	175	/* Caller should ensure the bio has at least some range added */
				176	ASSERT(bio->bi_iter.bi_size);
Nikolay Borisov	908930f	2020-09-18 16:34:37 +0300	[diff] [blame]	177	if (is_data_inode(tree->private_data))
				178	ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num,
				179	bio_flags);
				180	else
Nikolay Borisov	1b36294	2020-09-18 16:34:38 +0300	[diff] [blame]	181	ret = btrfs_submit_metadata_bio(tree->private_data, bio,
				182	mirror_num, bio_flags);
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	183
				184	return blk_status_to_errno(ret);
				185	}
				186
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	187	/* Cleanup unsubmitted bios */
				188	static void end_write_bio(struct extent_page_data *epd, int ret)
				189	{
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	190	struct bio *bio = epd->bio_ctrl.bio;
				191
				192	if (bio) {
				193	bio->bi_status = errno_to_blk_status(ret);
				194	bio_endio(bio);
				195	epd->bio_ctrl.bio = NULL;
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	196	}
				197	}
				198
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	199	/*
				200	* Submit bio from extent page data via submit_one_bio
				201	*
				202	* Return 0 if everything is OK.
				203	* Return <0 for error.
				204	*/
				205	static int __must_check flush_write_bio(struct extent_page_data *epd)
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	206	{
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	207	int ret = 0;
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	208	struct bio *bio = epd->bio_ctrl.bio;
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	209
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	210	if (bio) {
				211	ret = submit_one_bio(bio, 0, 0);
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	212	/*
				213	* Clean up of epd->bio is handled by its endio function.
				214	* And endio is either triggered by successful bio execution
				215	* or the error handler of submit bio hook.
				216	* So at this point, no matter what happened, we don't need
				217	* to clean up epd->bio.
				218	*/
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	219	epd->bio_ctrl.bio = NULL;
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	220	}
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	221	return ret;
Qu Wenruo	bb58eb9	2019-01-25 13:09:15 +0800	[diff] [blame]	222	}
David Sterba	e2932ee	2017-06-23 04:16:17 +0200	[diff] [blame]	223
Josef Bacik	6f0d04f	2019-09-23 10:05:18 -0400	[diff] [blame]	224	int __init extent_state_cache_init(void)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	225	{
David Sterba	837e197	2012-09-07 03:00:48 -0600	[diff] [blame]	226	extent_state_cache = kmem_cache_create("btrfs_extent_state",
Christoph Hellwig	9601e3f	2009-04-13 15:33:09 +0200	[diff] [blame]	227	sizeof(struct extent_state), 0,
Nikolay Borisov	fba4b69	2016-06-23 21:17:08 +0300	[diff] [blame]	228	SLAB_MEM_SPREAD, NULL);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	229	if (!extent_state_cache)
				230	return -ENOMEM;
Josef Bacik	6f0d04f	2019-09-23 10:05:18 -0400	[diff] [blame]	231	return 0;
				232	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	233
Josef Bacik	6f0d04f	2019-09-23 10:05:18 -0400	[diff] [blame]	234	int __init extent_io_init(void)
				235	{
David Sterba	837e197	2012-09-07 03:00:48 -0600	[diff] [blame]	236	extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
Christoph Hellwig	9601e3f	2009-04-13 15:33:09 +0200	[diff] [blame]	237	sizeof(struct extent_buffer), 0,
Nikolay Borisov	fba4b69	2016-06-23 21:17:08 +0300	[diff] [blame]	238	SLAB_MEM_SPREAD, NULL);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	239	if (!extent_buffer_cache)
Josef Bacik	6f0d04f	2019-09-23 10:05:18 -0400	[diff] [blame]	240	return -ENOMEM;
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	241
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	242	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	243	offsetof(struct btrfs_bio, bio),
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	244	BIOSET_NEED_BVECS))
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	245	goto free_buffer_cache;
Darrick J. Wong	b208c2f	2013-09-19 20:37:07 -0700	[diff] [blame]	246
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	247	if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
Darrick J. Wong	b208c2f	2013-09-19 20:37:07 -0700	[diff] [blame]	248	goto free_bioset;
				249
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	250	return 0;
				251
Darrick J. Wong	b208c2f	2013-09-19 20:37:07 -0700	[diff] [blame]	252	free_bioset:
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	253	bioset_exit(&btrfs_bioset);
Darrick J. Wong	b208c2f	2013-09-19 20:37:07 -0700	[diff] [blame]	254
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	255	free_buffer_cache:
				256	kmem_cache_destroy(extent_buffer_cache);
				257	extent_buffer_cache = NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	258	return -ENOMEM;
				259	}
				260
Josef Bacik	6f0d04f	2019-09-23 10:05:18 -0400	[diff] [blame]	261	void __cold extent_state_cache_exit(void)
				262	{
				263	btrfs_extent_state_leak_debug_check();
				264	kmem_cache_destroy(extent_state_cache);
				265	}
				266
David Sterba	e67c718	2018-02-19 17:24:18 +0100	[diff] [blame]	267	void __cold extent_io_exit(void)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	268	{
Kirill A. Shutemov	8c0a853	2012-09-26 11:33:07 +1000	[diff] [blame]	269	/*
				270	* Make sure all delayed rcu free are flushed before we
				271	* destroy caches.
				272	*/
				273	rcu_barrier();
Kinglong Mee	5598e90	2016-01-29 21:36:35 +0800	[diff] [blame]	274	kmem_cache_destroy(extent_buffer_cache);
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	275	bioset_exit(&btrfs_bioset);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	276	}
				277
Josef Bacik	41a2ee7	2020-01-17 09:02:21 -0500	[diff] [blame]	278	/*
				279	* For the file_extent_tree, we want to hold the inode lock when we lookup and
				280	* update the disk_i_size, but lockdep will complain because our io_tree we hold
				281	* the tree lock and get the inode lock when setting delalloc. These two things
				282	* are unrelated, so make a class for the file_extent_tree so we don't get the
				283	* two locking patterns mixed up.
				284	*/
				285	static struct lock_class_key file_extent_tree_class;
				286
Qu Wenruo	c258d6e	2019-03-01 10:47:58 +0800	[diff] [blame]	287	void extent_io_tree_init(struct btrfs_fs_info *fs_info,
Qu Wenruo	43eb5f2	2019-03-01 10:47:59 +0800	[diff] [blame]	288	struct extent_io_tree *tree, unsigned int owner,
				289	void *private_data)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	290	{
Qu Wenruo	c258d6e	2019-03-01 10:47:58 +0800	[diff] [blame]	291	tree->fs_info = fs_info;
Eric Paris	6bef4d3	2010-02-23 19:43:04 +0000	[diff] [blame]	292	tree->state = RB_ROOT;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	293	tree->dirty_bytes = 0;
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	294	spin_lock_init(&tree->lock);
Josef Bacik	c6100a4	2017-05-05 11:57:13 -0400	[diff] [blame]	295	tree->private_data = private_data;
Qu Wenruo	43eb5f2	2019-03-01 10:47:59 +0800	[diff] [blame]	296	tree->owner = owner;
Josef Bacik	41a2ee7	2020-01-17 09:02:21 -0500	[diff] [blame]	297	if (owner == IO_TREE_INODE_FILE_EXTENT)
				298	lockdep_set_class(&tree->lock, &file_extent_tree_class);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	299	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	300
Nikolay Borisov	41e7acd	2019-03-25 14:31:24 +0200	[diff] [blame]	301	void extent_io_tree_release(struct extent_io_tree *tree)
				302	{
				303	spin_lock(&tree->lock);
				304	/*
				305	* Do a single barrier for the waitqueue_active check here, the state
				306	* of the waitqueue should not change once extent_io_tree_release is
				307	* called.
				308	*/
				309	smp_mb();
				310	while (!RB_EMPTY_ROOT(&tree->state)) {
				311	struct rb_node *node;
				312	struct extent_state *state;
				313
				314	node = rb_first(&tree->state);
				315	state = rb_entry(node, struct extent_state, rb_node);
				316	rb_erase(&state->rb_node, &tree->state);
				317	RB_CLEAR_NODE(&state->rb_node);
				318	/*
				319	* btree io trees aren't supposed to have tasks waiting for
				320	* changes in the flags of extent states ever.
				321	*/
				322	ASSERT(!waitqueue_active(&state->wq));
				323	free_extent_state(state);
				324
				325	cond_resched_lock(&tree->lock);
				326	}
				327	spin_unlock(&tree->lock);
				328	}
				329
Christoph Hellwig	b295086	2008-12-02 09:54:17 -0500	[diff] [blame]	330	static struct extent_state *alloc_extent_state(gfp_t mask)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	331	{
				332	struct extent_state *state;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	333
Michal Hocko	3ba7ab2	2017-01-09 15:39:02 +0100	[diff] [blame]	334	/*
				335	* The given mask might be not appropriate for the slab allocator,
				336	* drop the unsupported bits
				337	*/
				338	mask &= ~(__GFP_DMA32\|__GFP_HIGHMEM);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	339	state = kmem_cache_alloc(extent_state_cache, mask);
Peter	2b114d1	2008-04-01 11:21:40 -0400	[diff] [blame]	340	if (!state)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	341	return state;
				342	state->state = 0;
David Sterba	47dc196	2016-02-11 13:24:13 +0100	[diff] [blame]	343	state->failrec = NULL;
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	344	RB_CLEAR_NODE(&state->rb_node);
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	345	btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	346	refcount_set(&state->refs, 1);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	347	init_waitqueue_head(&state->wq);
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	348	trace_alloc_extent_state(state, mask, _RET_IP_);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	349	return state;
				350	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	351
Chris Mason	4845e44	2010-05-25 20:56:50 -0400	[diff] [blame]	352	void free_extent_state(struct extent_state *state)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	353	{
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	354	if (!state)
				355	return;
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	356	if (refcount_dec_and_test(&state->refs)) {
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	357	WARN_ON(extent_state_in_tree(state));
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	358	btrfs_leak_debug_del(&leak_lock, &state->leak_list);
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	359	trace_free_extent_state(state, _RET_IP_);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	360	kmem_cache_free(extent_state_cache, state);
				361	}
				362	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	363
Filipe Manana	f2071b2	2014-02-12 15:05:53 +0000	[diff] [blame]	364	static struct rb_node tree_insert(struct rb_root root,
				365	struct rb_node *search_start,
				366	u64 offset,
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	367	struct rb_node *node,
				368	struct rb_node ***p_in,
				369	struct rb_node **parent_in)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	370	{
Filipe Manana	f2071b2	2014-02-12 15:05:53 +0000	[diff] [blame]	371	struct rb_node **p;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	372	struct rb_node *parent = NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	373	struct tree_entry *entry;
				374
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	375	if (p_in && parent_in) {
				376	p = *p_in;
				377	parent = *parent_in;
				378	goto do_insert;
				379	}
				380
Filipe Manana	f2071b2	2014-02-12 15:05:53 +0000	[diff] [blame]	381	p = search_start ? &search_start : &root->rb_node;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	382	while (*p) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	383	parent = *p;
				384	entry = rb_entry(parent, struct tree_entry, rb_node);
				385
				386	if (offset < entry->start)
				387	p = &(*p)->rb_left;
				388	else if (offset > entry->end)
				389	p = &(*p)->rb_right;
				390	else
				391	return parent;
				392	}
				393
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	394	do_insert:
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	395	rb_link_node(node, parent, p);
				396	rb_insert_color(node, root);
				397	return NULL;
				398	}
				399
Nikolay Borisov	8666e63	2019-06-05 14:50:04 +0300	[diff] [blame]	400	/**
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	401	* Search @tree for an entry that contains @offset. Such entry would have
				402	* entry->start <= offset && entry->end >= offset.
Nikolay Borisov	8666e63	2019-06-05 14:50:04 +0300	[diff] [blame]	403	*
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	404	* @tree: the tree to search
				405	* @offset: offset that should fall within an entry in @tree
				406	* @next_ret: pointer to the first entry whose range ends after @offset
				407	* @prev_ret: pointer to the first entry whose range begins before @offset
				408	* @p_ret: pointer where new node should be anchored (used when inserting an
				409	* entry in the tree)
				410	* @parent_ret: points to entry which would have been the parent of the entry,
Nikolay Borisov	8666e63	2019-06-05 14:50:04 +0300	[diff] [blame]	411	* containing @offset
				412	*
				413	* This function returns a pointer to the entry that contains @offset byte
				414	* address. If no such entry exists, then NULL is returned and the other
				415	* pointer arguments to the function are filled, otherwise the found entry is
				416	* returned and other pointers are left untouched.
				417	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	418	static struct rb_node __etree_search(struct extent_io_tree tree, u64 offset,
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	419	struct rb_node **next_ret,
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	420	struct rb_node **prev_ret,
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	421	struct rb_node ***p_ret,
				422	struct rb_node **parent_ret)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	423	{
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	424	struct rb_root *root = &tree->state;
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	425	struct rb_node **n = &root->rb_node;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	426	struct rb_node *prev = NULL;
				427	struct rb_node *orig_prev = NULL;
				428	struct tree_entry *entry;
				429	struct tree_entry *prev_entry = NULL;
				430
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	431	while (*n) {
				432	prev = *n;
				433	entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	434	prev_entry = entry;
				435
				436	if (offset < entry->start)
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	437	n = &(*n)->rb_left;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	438	else if (offset > entry->end)
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	439	n = &(*n)->rb_right;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	440	else
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	441	return *n;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	442	}
				443
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	444	if (p_ret)
				445	*p_ret = n;
				446	if (parent_ret)
				447	*parent_ret = prev;
				448
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	449	if (next_ret) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	450	orig_prev = prev;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	451	while (prev && offset > prev_entry->end) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	452	prev = rb_next(prev);
				453	prev_entry = rb_entry(prev, struct tree_entry, rb_node);
				454	}
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	455	*next_ret = prev;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	456	prev = orig_prev;
				457	}
				458
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	459	if (prev_ret) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	460	prev_entry = rb_entry(prev, struct tree_entry, rb_node);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	461	while (prev && offset < prev_entry->start) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	462	prev = rb_prev(prev);
				463	prev_entry = rb_entry(prev, struct tree_entry, rb_node);
				464	}
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	465	*prev_ret = prev;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	466	}
				467	return NULL;
				468	}
				469
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	470	static inline struct rb_node *
				471	tree_search_for_insert(struct extent_io_tree *tree,
				472	u64 offset,
				473	struct rb_node ***p_ret,
				474	struct rb_node **parent_ret)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	475	{
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	476	struct rb_node *next= NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	477	struct rb_node *ret;
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	478
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	479	ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	480	if (!ret)
Nikolay Borisov	352646c	2019-01-30 16:51:00 +0200	[diff] [blame]	481	return next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	482	return ret;
				483	}
				484
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	485	static inline struct rb_node tree_search(struct extent_io_tree tree,
				486	u64 offset)
				487	{
				488	return tree_search_for_insert(tree, offset, NULL, NULL);
				489	}
				490
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	491	/*
				492	* utility function to look for merge candidates inside a given range.
				493	* Any extents with matching state are merged together into a single
				494	* extent in the tree. Extents with EXTENT_IO in their state field
				495	* are not merged because the end_io handlers need to be able to do
				496	* operations on them without sleeping (or doing allocations/splits).
				497	*
				498	* This should be called with the tree lock held.
				499	*/
Jeff Mahoney	1bf8504	2011-07-21 16:56:09 +0000	[diff] [blame]	500	static void merge_state(struct extent_io_tree *tree,
				501	struct extent_state *state)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	502	{
				503	struct extent_state *other;
				504	struct rb_node *other_node;
				505
Nikolay Borisov	8882679	2019-03-14 15:28:31 +0200	[diff] [blame]	506	if (state->state & (EXTENT_LOCKED \| EXTENT_BOUNDARY))
Jeff Mahoney	1bf8504	2011-07-21 16:56:09 +0000	[diff] [blame]	507	return;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	508
				509	other_node = rb_prev(&state->rb_node);
				510	if (other_node) {
				511	other = rb_entry(other_node, struct extent_state, rb_node);
				512	if (other->end == state->start - 1 &&
				513	other->state == state->state) {
Nikolay Borisov	5c84819	2018-11-01 14:09:52 +0200	[diff] [blame]	514	if (tree->private_data &&
				515	is_data_inode(tree->private_data))
				516	btrfs_merge_delalloc_extent(tree->private_data,
				517	state, other);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	518	state->start = other->start;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	519	rb_erase(&other->rb_node, &tree->state);
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	520	RB_CLEAR_NODE(&other->rb_node);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	521	free_extent_state(other);
				522	}
				523	}
				524	other_node = rb_next(&state->rb_node);
				525	if (other_node) {
				526	other = rb_entry(other_node, struct extent_state, rb_node);
				527	if (other->start == state->end + 1 &&
				528	other->state == state->state) {
Nikolay Borisov	5c84819	2018-11-01 14:09:52 +0200	[diff] [blame]	529	if (tree->private_data &&
				530	is_data_inode(tree->private_data))
				531	btrfs_merge_delalloc_extent(tree->private_data,
				532	state, other);
Josef Bacik	df98b6e	2011-06-20 14:53:48 -0400	[diff] [blame]	533	state->end = other->end;
Josef Bacik	df98b6e	2011-06-20 14:53:48 -0400	[diff] [blame]	534	rb_erase(&other->rb_node, &tree->state);
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	535	RB_CLEAR_NODE(&other->rb_node);
Josef Bacik	df98b6e	2011-06-20 14:53:48 -0400	[diff] [blame]	536	free_extent_state(other);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	537	}
				538	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	539	}
				540
Xiao Guangrong	3150b69	2011-07-14 03:19:08 +0000	[diff] [blame]	541	static void set_state_bits(struct extent_io_tree *tree,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	542	struct extent_state state, u32 bits,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	543	struct extent_changeset *changeset);
Xiao Guangrong	3150b69	2011-07-14 03:19:08 +0000	[diff] [blame]	544
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	545	/*
				546	* insert an extent_state struct into the tree. 'bits' are set on the
				547	* struct before it is inserted.
				548	*
				549	* This may return -EEXIST if the extent is already there, in which case the
				550	* state struct is freed.
				551	*
				552	* The tree lock is not taken internally. This is a utility function and
				553	* probably isn't what you want to call (see set/clear_extent_bit).
				554	*/
				555	static int insert_state(struct extent_io_tree *tree,
				556	struct extent_state *state, u64 start, u64 end,
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	557	struct rb_node ***p,
				558	struct rb_node **parent,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	559	u32 bits, struct extent_changeset changeset)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	560	{
				561	struct rb_node *node;
				562
David Sterba	2792237	2019-06-18 20:00:05 +0200	[diff] [blame]	563	if (end < start) {
				564	btrfs_err(tree->fs_info,
				565	"insert state: end < start %llu %llu", end, start);
				566	WARN_ON(1);
				567	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	568	state->start = start;
				569	state->end = end;
Josef Bacik	9ed74f2	2009-09-11 16:12:44 -0400	[diff] [blame]	570
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	571	set_state_bits(tree, state, bits, changeset);
Xiao Guangrong	3150b69	2011-07-14 03:19:08 +0000	[diff] [blame]	572
Filipe Manana	f2071b2	2014-02-12 15:05:53 +0000	[diff] [blame]	573	node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	574	if (node) {
				575	struct extent_state *found;
				576	found = rb_entry(node, struct extent_state, rb_node);
David Sterba	2792237	2019-06-18 20:00:05 +0200	[diff] [blame]	577	btrfs_err(tree->fs_info,
				578	"found node %llu %llu on insert of %llu %llu",
Geert Uytterhoeven	c1c9ff7	2013-08-20 13:20:07 +0200	[diff] [blame]	579	found->start, found->end, start, end);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	580	return -EEXIST;
				581	}
				582	merge_state(tree, state);
				583	return 0;
				584	}
				585
				586	/*
				587	* split a given extent state struct in two, inserting the preallocated
				588	* struct 'prealloc' as the newly created second half. 'split' indicates an
				589	* offset inside 'orig' where it should be split.
				590	*
				591	* Before calling,
				592	* the tree has 'orig' at [orig->start, orig->end]. After calling, there
				593	* are two extent state structs in the tree:
				594	* prealloc: [orig->start, split - 1]
				595	* orig: [ split, orig->end ]
				596	*
				597	* The tree locks are not taken by this function. They need to be held
				598	* by the caller.
				599	*/
				600	static int split_state(struct extent_io_tree tree, struct extent_state orig,
				601	struct extent_state *prealloc, u64 split)
				602	{
				603	struct rb_node *node;
Josef Bacik	9ed74f2	2009-09-11 16:12:44 -0400	[diff] [blame]	604
Nikolay Borisov	abbb55f	2018-11-01 14:09:53 +0200	[diff] [blame]	605	if (tree->private_data && is_data_inode(tree->private_data))
				606	btrfs_split_delalloc_extent(tree->private_data, orig, split);
Josef Bacik	9ed74f2	2009-09-11 16:12:44 -0400	[diff] [blame]	607
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	608	prealloc->start = orig->start;
				609	prealloc->end = split - 1;
				610	prealloc->state = orig->state;
				611	orig->start = split;
				612
Filipe Manana	f2071b2	2014-02-12 15:05:53 +0000	[diff] [blame]	613	node = tree_insert(&tree->state, &orig->rb_node, prealloc->end,
				614	&prealloc->rb_node, NULL, NULL);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	615	if (node) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	616	free_extent_state(prealloc);
				617	return -EEXIST;
				618	}
				619	return 0;
				620	}
				621
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	622	static struct extent_state next_state(struct extent_state state)
				623	{
				624	struct rb_node *next = rb_next(&state->rb_node);
				625	if (next)
				626	return rb_entry(next, struct extent_state, rb_node);
				627	else
				628	return NULL;
				629	}
				630
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	631	/*
				632	* utility function to clear some bits in an extent state struct.
Andrea Gelmini	52042d8	2018-11-28 12:05:13 +0100	[diff] [blame]	633	* it will optionally wake up anyone waiting on this state (wake == 1).
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	634	*
				635	* If no bits are set on the state struct after clearing things, the
				636	* struct is freed and removed from the tree
				637	*/
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	638	static struct extent_state clear_state_bit(struct extent_io_tree tree,
				639	struct extent_state *state,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	640	u32 *bits, int wake,
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	641	struct extent_changeset *changeset)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	642	{
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	643	struct extent_state *next;
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	644	u32 bits_to_clear = *bits & ~EXTENT_CTLBITS;
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	645	int ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	646
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	647	if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	648	u64 range = state->end - state->start + 1;
				649	WARN_ON(range > tree->dirty_bytes);
				650	tree->dirty_bytes -= range;
				651	}
Nikolay Borisov	a36bb5f	2018-11-01 14:09:51 +0200	[diff] [blame]	652
				653	if (tree->private_data && is_data_inode(tree->private_data))
				654	btrfs_clear_delalloc_extent(tree->private_data, state, bits);
				655
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	656	ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
				657	BUG_ON(ret < 0);
Josef Bacik	32c00af	2009-10-08 13:34:05 -0400	[diff] [blame]	658	state->state &= ~bits_to_clear;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	659	if (wake)
				660	wake_up(&state->wq);
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	661	if (state->state == 0) {
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	662	next = next_state(state);
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	663	if (extent_state_in_tree(state)) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	664	rb_erase(&state->rb_node, &tree->state);
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	665	RB_CLEAR_NODE(&state->rb_node);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	666	free_extent_state(state);
				667	} else {
				668	WARN_ON(1);
				669	}
				670	} else {
				671	merge_state(tree, state);
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	672	next = next_state(state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	673	}
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	674	return next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	675	}
				676
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	677	static struct extent_state *
				678	alloc_extent_state_atomic(struct extent_state *prealloc)
				679	{
				680	if (!prealloc)
				681	prealloc = alloc_extent_state(GFP_ATOMIC);
				682
				683	return prealloc;
				684	}
				685
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	686	static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	687	{
Su Yue	29b665c	2021-01-03 17:28:03 +0800	[diff] [blame]	688	btrfs_panic(tree->fs_info, err,
David Sterba	05912a3	2018-07-18 19:23:45 +0200	[diff] [blame]	689	"locking error: extent tree was modified by another thread while locked");
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	690	}
				691
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	692	/*
				693	* clear some bits on a range in the tree. This may require splitting
				694	* or inserting elements in the tree, so the gfp mask is used to
				695	* indicate which allocations or sleeping are allowed.
				696	*
				697	* pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
				698	* the given range from the tree regardless of state (ie for truncate).
				699	*
				700	* the range [start, end] is inclusive.
				701	*
Jeff Mahoney	6763af8	2012-03-01 14:56:29 +0100	[diff] [blame]	702	* This takes the tree lock, and returns 0 on success and < 0 on error.
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	703	*/
David Sterba	66b0c88	2017-10-31 16:30:47 +0100	[diff] [blame]	704	int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	705	u32 bits, int wake, int delete,
				706	struct extent_state **cached_state,
				707	gfp_t mask, struct extent_changeset *changeset)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	708	{
				709	struct extent_state *state;
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	710	struct extent_state *cached;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	711	struct extent_state *prealloc = NULL;
				712	struct rb_node *node;
Yan Zheng	5c939df	2009-05-27 09:16:03 -0400	[diff] [blame]	713	u64 last_end;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	714	int err;
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	715	int clear = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	716
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	717	btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruo	a1d1984	2019-03-01 10:48:00 +0800	[diff] [blame]	718	trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	719
Josef Bacik	7ee9e44	2013-06-21 16:37:03 -0400	[diff] [blame]	720	if (bits & EXTENT_DELALLOC)
				721	bits \|= EXTENT_NORESERVE;
				722
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	723	if (delete)
				724	bits \|= ~EXTENT_CTLBITS;
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	725
Nikolay Borisov	8882679	2019-03-14 15:28:31 +0200	[diff] [blame]	726	if (bits & (EXTENT_LOCKED \| EXTENT_BOUNDARY))
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	727	clear = 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	728	again:
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	729	if (!prealloc && gfpflags_allow_blocking(mask)) {
Filipe Manana	c7bc631	2014-11-03 14:12:57 +0000	[diff] [blame]	730	/*
				731	* Don't care for allocation failure here because we might end
				732	* up not needing the pre-allocated extent state at all, which
				733	* is the case if we only have in the tree extent states that
				734	* cover our input range and don't cover too any other range.
				735	* If we end up needing a new extent state we allocate it later.
				736	*/
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	737	prealloc = alloc_extent_state(mask);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	738	}
				739
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	740	spin_lock(&tree->lock);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	741	if (cached_state) {
				742	cached = *cached_state;
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	743
				744	if (clear) {
				745	*cached_state = NULL;
				746	cached_state = NULL;
				747	}
				748
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	749	if (cached && extent_state_in_tree(cached) &&
				750	cached->start <= start && cached->end > start) {
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	751	if (clear)
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	752	refcount_dec(&cached->refs);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	753	state = cached;
Chris Mason	42daec2	2009-09-23 19:51:09 -0400	[diff] [blame]	754	goto hit_next;
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	755	}
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	756	if (clear)
				757	free_extent_state(cached);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	758	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	759	/*
				760	* this search will find the extents that end after
				761	* our range starts
				762	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	763	node = tree_search(tree, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	764	if (!node)
				765	goto out;
				766	state = rb_entry(node, struct extent_state, rb_node);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	767	hit_next:
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	768	if (state->start > end)
				769	goto out;
				770	WARN_ON(state->end < start);
Yan Zheng	5c939df	2009-05-27 09:16:03 -0400	[diff] [blame]	771	last_end = state->end;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	772
Liu Bo	0449314	2012-02-16 18:34:37 +0800	[diff] [blame]	773	/* the state doesn't have the wanted bits, go ahead */
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	774	if (!(state->state & bits)) {
				775	state = next_state(state);
Liu Bo	0449314	2012-02-16 18:34:37 +0800	[diff] [blame]	776	goto next;
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	777	}
Liu Bo	0449314	2012-02-16 18:34:37 +0800	[diff] [blame]	778
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	779	/*
				780	* \| ---- desired range ---- \|
				781	* \| state \| or
				782	* \| ------------- state -------------- \|
				783	*
				784	* We need to split the extent we found, and may flip
				785	* bits on second half.
				786	*
				787	* If the extent we found extends past our range, we
				788	* just split and search again. It'll get split again
				789	* the next time though.
				790	*
				791	* If the extent we found is inside our range, we clear
				792	* the desired bit on it.
				793	*/
				794
				795	if (state->start < start) {
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	796	prealloc = alloc_extent_state_atomic(prealloc);
				797	BUG_ON(!prealloc);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	798	err = split_state(tree, state, prealloc, start);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	799	if (err)
				800	extent_io_tree_panic(tree, err);
				801
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	802	prealloc = NULL;
				803	if (err)
				804	goto out;
				805	if (state->end <= end) {
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	806	state = clear_state_bit(tree, state, &bits, wake,
				807	changeset);
Liu Bo	d1ac6e4	2012-05-10 18:10:39 +0800	[diff] [blame]	808	goto next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	809	}
				810	goto search_again;
				811	}
				812	/*
				813	* \| ---- desired range ---- \|
				814	* \| state \|
				815	* We need to split the extent, and clear the bit
				816	* on the first half
				817	*/
				818	if (state->start <= end && state->end > end) {
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	819	prealloc = alloc_extent_state_atomic(prealloc);
				820	BUG_ON(!prealloc);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	821	err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	822	if (err)
				823	extent_io_tree_panic(tree, err);
				824
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	825	if (wake)
				826	wake_up(&state->wq);
Chris Mason	42daec2	2009-09-23 19:51:09 -0400	[diff] [blame]	827
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	828	clear_state_bit(tree, prealloc, &bits, wake, changeset);
Josef Bacik	9ed74f2	2009-09-11 16:12:44 -0400	[diff] [blame]	829
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	830	prealloc = NULL;
				831	goto out;
				832	}
Chris Mason	42daec2	2009-09-23 19:51:09 -0400	[diff] [blame]	833
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	834	state = clear_state_bit(tree, state, &bits, wake, changeset);
Liu Bo	0449314	2012-02-16 18:34:37 +0800	[diff] [blame]	835	next:
Yan Zheng	5c939df	2009-05-27 09:16:03 -0400	[diff] [blame]	836	if (last_end == (u64)-1)
				837	goto out;
				838	start = last_end + 1;
Li Zefan	cdc6a39	2012-03-12 16:39:48 +0800	[diff] [blame]	839	if (start <= end && state && !need_resched())
Liu Bo	692e575	2012-02-16 18:34:36 +0800	[diff] [blame]	840	goto hit_next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	841
				842	search_again:
				843	if (start > end)
				844	goto out;
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	845	spin_unlock(&tree->lock);
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	846	if (gfpflags_allow_blocking(mask))
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	847	cond_resched();
				848	goto again;
David Sterba	7ab5cb2	2016-04-27 01:02:15 +0200	[diff] [blame]	849
				850	out:
				851	spin_unlock(&tree->lock);
				852	if (prealloc)
				853	free_extent_state(prealloc);
				854
				855	return 0;
				856
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	857	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	858
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	859	static void wait_on_state(struct extent_io_tree *tree,
				860	struct extent_state *state)
Christoph Hellwig	641f521	2008-12-02 06:36:10 -0500	[diff] [blame]	861	__releases(tree->lock)
				862	__acquires(tree->lock)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	863	{
				864	DEFINE_WAIT(wait);
				865	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	866	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	867	schedule();
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	868	spin_lock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	869	finish_wait(&state->wq, &wait);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	870	}
				871
				872	/*
				873	* waits for one or more bits to clear on a range in the state tree.
				874	* The range [start, end] is inclusive.
				875	* The tree lock is taken by this function
				876	*/
David Sterba	4107488	2013-04-29 13:38:46 +0000	[diff] [blame]	877	static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	878	u32 bits)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	879	{
				880	struct extent_state *state;
				881	struct rb_node *node;
				882
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	883	btrfs_debug_check_extent_io_range(tree, start, end);
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	884
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	885	spin_lock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	886	again:
				887	while (1) {
				888	/*
				889	* this search will find all the extents that end after
				890	* our range starts
				891	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	892	node = tree_search(tree, start);
Filipe Manana	c50d3e7	2014-03-31 14:53:25 +0100	[diff] [blame]	893	process_node:
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	894	if (!node)
				895	break;
				896
				897	state = rb_entry(node, struct extent_state, rb_node);
				898
				899	if (state->start > end)
				900	goto out;
				901
				902	if (state->state & bits) {
				903	start = state->start;
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	904	refcount_inc(&state->refs);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	905	wait_on_state(tree, state);
				906	free_extent_state(state);
				907	goto again;
				908	}
				909	start = state->end + 1;
				910
				911	if (start > end)
				912	break;
				913
Filipe Manana	c50d3e7	2014-03-31 14:53:25 +0100	[diff] [blame]	914	if (!cond_resched_lock(&tree->lock)) {
				915	node = rb_next(node);
				916	goto process_node;
				917	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	918	}
				919	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	920	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	921	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	922
Jeff Mahoney	1bf8504	2011-07-21 16:56:09 +0000	[diff] [blame]	923	static void set_state_bits(struct extent_io_tree *tree,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	924	struct extent_state *state,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	925	u32 bits, struct extent_changeset changeset)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	926	{
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	927	u32 bits_to_set = *bits & ~EXTENT_CTLBITS;
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	928	int ret;
Josef Bacik	9ed74f2	2009-09-11 16:12:44 -0400	[diff] [blame]	929
Nikolay Borisov	e06a1fc	2018-11-01 14:09:50 +0200	[diff] [blame]	930	if (tree->private_data && is_data_inode(tree->private_data))
				931	btrfs_set_delalloc_extent(tree->private_data, state, bits);
				932
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	933	if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	934	u64 range = state->end - state->start + 1;
				935	tree->dirty_bytes += range;
				936	}
David Sterba	57599c7	2018-03-01 17:56:34 +0100	[diff] [blame]	937	ret = add_extent_changeset(state, bits_to_set, changeset, 1);
				938	BUG_ON(ret < 0);
Yan, Zheng	0ca1f7c	2010-05-16 10:48:47 -0400	[diff] [blame]	939	state->state \|= bits_to_set;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	940	}
				941
Filipe Manana	e38e2ed	2014-10-13 12:28:38 +0100	[diff] [blame]	942	static void cache_state_if_flags(struct extent_state *state,
				943	struct extent_state **cached_ptr,
David Sterba	9ee49a04	2015-01-14 19:52:13 +0100	[diff] [blame]	944	unsigned flags)
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	945	{
				946	if (cached_ptr && !(*cached_ptr)) {
Filipe Manana	e38e2ed	2014-10-13 12:28:38 +0100	[diff] [blame]	947	if (!flags \|\| (state->state & flags)) {
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	948	*cached_ptr = state;
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	949	refcount_inc(&state->refs);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	950	}
				951	}
				952	}
				953
Filipe Manana	e38e2ed	2014-10-13 12:28:38 +0100	[diff] [blame]	954	static void cache_state(struct extent_state *state,
				955	struct extent_state **cached_ptr)
				956	{
				957	return cache_state_if_flags(state, cached_ptr,
Nikolay Borisov	8882679	2019-03-14 15:28:31 +0200	[diff] [blame]	958	EXTENT_LOCKED \| EXTENT_BOUNDARY);
Filipe Manana	e38e2ed	2014-10-13 12:28:38 +0100	[diff] [blame]	959	}
				960
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	961	/*
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	962	* set some bits on a range in the tree. This may require allocations or
				963	* sleeping, so the gfp mask is used to indicate what is allowed.
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	964	*
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	965	* If any of the exclusive bits are set, this will fail with -EEXIST if some
				966	* part of the range already has the desired bits set. The start of the
				967	* existing range is returned in failed_start in this case.
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	968	*
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	969	* [start, end] is inclusive This takes the tree lock.
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	970	*/
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	971	int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
				972	u32 exclusive_bits, u64 *failed_start,
Nikolay Borisov	1cab5e7	2020-11-05 11:08:00 +0200	[diff] [blame]	973	struct extent_state **cached_state, gfp_t mask,
				974	struct extent_changeset *changeset)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	975	{
				976	struct extent_state *state;
				977	struct extent_state *prealloc = NULL;
				978	struct rb_node *node;
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	979	struct rb_node **p;
				980	struct rb_node *parent;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	981	int err = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	982	u64 last_start;
				983	u64 last_end;
Chris Mason	42daec2	2009-09-23 19:51:09 -0400	[diff] [blame]	984
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	985	btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruo	a1d1984	2019-03-01 10:48:00 +0800	[diff] [blame]	986	trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	987
Qu Wenruo	3f6bb4a	2020-10-21 14:24:51 +0800	[diff] [blame]	988	if (exclusive_bits)
				989	ASSERT(failed_start);
				990	else
				991	ASSERT(failed_start == NULL);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	992	again:
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	993	if (!prealloc && gfpflags_allow_blocking(mask)) {
David Sterba	059f791	2016-04-27 01:03:45 +0200	[diff] [blame]	994	/*
				995	* Don't care for allocation failure here because we might end
				996	* up not needing the pre-allocated extent state at all, which
				997	* is the case if we only have in the tree extent states that
				998	* cover our input range and don't cover too any other range.
				999	* If we end up needing a new extent state we allocate it later.
				1000	*/
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1001	prealloc = alloc_extent_state(mask);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1002	}
				1003
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	1004	spin_lock(&tree->lock);
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	1005	if (cached_state && *cached_state) {
				1006	state = *cached_state;
Josef Bacik	df98b6e	2011-06-20 14:53:48 -0400	[diff] [blame]	1007	if (state->start <= start && state->end > start &&
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	1008	extent_state_in_tree(state)) {
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	1009	node = &state->rb_node;
				1010	goto hit_next;
				1011	}
				1012	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1013	/*
				1014	* this search will find all the extents that end after
				1015	* our range starts.
				1016	*/
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	1017	node = tree_search_for_insert(tree, start, &p, &parent);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1018	if (!node) {
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	1019	prealloc = alloc_extent_state_atomic(prealloc);
				1020	BUG_ON(!prealloc);
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	1021	err = insert_state(tree, prealloc, start, end,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1022	&p, &parent, &bits, changeset);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1023	if (err)
				1024	extent_io_tree_panic(tree, err);
				1025
Filipe David Borba Manana	c42ac0b	2013-11-26 15:01:34 +0000	[diff] [blame]	1026	cache_state(prealloc, cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1027	prealloc = NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1028	goto out;
				1029	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1030	state = rb_entry(node, struct extent_state, rb_node);
Chris Mason	40431d6	2009-08-05 12:57:59 -0400	[diff] [blame]	1031	hit_next:
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1032	last_start = state->start;
				1033	last_end = state->end;
				1034
				1035	/*
				1036	* \| ---- desired range ---- \|
				1037	* \| state \|
				1038	*
				1039	* Just lock what we found and keep going
				1040	*/
				1041	if (state->start == start && state->end <= end) {
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	1042	if (state->state & exclusive_bits) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1043	*failed_start = state->start;
				1044	err = -EEXIST;
				1045	goto out;
				1046	}
Chris Mason	42daec2	2009-09-23 19:51:09 -0400	[diff] [blame]	1047
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1048	set_state_bits(tree, state, &bits, changeset);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	1049	cache_state(state, cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1050	merge_state(tree, state);
Yan Zheng	5c939df	2009-05-27 09:16:03 -0400	[diff] [blame]	1051	if (last_end == (u64)-1)
				1052	goto out;
				1053	start = last_end + 1;
Liu Bo	d1ac6e4	2012-05-10 18:10:39 +0800	[diff] [blame]	1054	state = next_state(state);
				1055	if (start < end && state && state->start == start &&
				1056	!need_resched())
				1057	goto hit_next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1058	goto search_again;
				1059	}
				1060
				1061	/*
				1062	* \| ---- desired range ---- \|
				1063	* \| state \|
				1064	* or
				1065	* \| ------------- state -------------- \|
				1066	*
				1067	* We need to split the extent we found, and may flip bits on
				1068	* second half.
				1069	*
				1070	* If the extent we found extends past our
				1071	* range, we just split and search again. It'll get split
				1072	* again the next time though.
				1073	*
				1074	* If the extent we found is inside our range, we set the
				1075	* desired bit on it.
				1076	*/
				1077	if (state->start < start) {
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	1078	if (state->state & exclusive_bits) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1079	*failed_start = start;
				1080	err = -EEXIST;
				1081	goto out;
				1082	}
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	1083
Filipe Manana	55ffaab	2020-02-13 10:20:02 +0000	[diff] [blame]	1084	/*
				1085	* If this extent already has all the bits we want set, then
				1086	* skip it, not necessary to split it or do anything with it.
				1087	*/
				1088	if ((state->state & bits) == bits) {
				1089	start = state->end + 1;
				1090	cache_state(state, cached_state);
				1091	goto search_again;
				1092	}
				1093
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	1094	prealloc = alloc_extent_state_atomic(prealloc);
				1095	BUG_ON(!prealloc);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1096	err = split_state(tree, state, prealloc, start);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1097	if (err)
				1098	extent_io_tree_panic(tree, err);
				1099
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1100	prealloc = NULL;
				1101	if (err)
				1102	goto out;
				1103	if (state->end <= end) {
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1104	set_state_bits(tree, state, &bits, changeset);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	1105	cache_state(state, cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1106	merge_state(tree, state);
Yan Zheng	5c939df	2009-05-27 09:16:03 -0400	[diff] [blame]	1107	if (last_end == (u64)-1)
				1108	goto out;
				1109	start = last_end + 1;
Liu Bo	d1ac6e4	2012-05-10 18:10:39 +0800	[diff] [blame]	1110	state = next_state(state);
				1111	if (start < end && state && state->start == start &&
				1112	!need_resched())
				1113	goto hit_next;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1114	}
				1115	goto search_again;
				1116	}
				1117	/*
				1118	* \| ---- desired range ---- \|
				1119	* \| state \| or \| state \|
				1120	*
				1121	* There's a hole, we need to insert something in it and
				1122	* ignore the extent we found.
				1123	*/
				1124	if (state->start > start) {
				1125	u64 this_end;
				1126	if (end < last_start)
				1127	this_end = end;
				1128	else
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1129	this_end = last_start - 1;
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	1130
				1131	prealloc = alloc_extent_state_atomic(prealloc);
				1132	BUG_ON(!prealloc);
Xiao Guangrong	c7f895a	2011-04-20 06:45:49 +0000	[diff] [blame]	1133
				1134	/*
				1135	* Avoid to free 'prealloc' if it can be merged with
				1136	* the later extent.
				1137	*/
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1138	err = insert_state(tree, prealloc, start, this_end,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1139	NULL, NULL, &bits, changeset);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1140	if (err)
				1141	extent_io_tree_panic(tree, err);
				1142
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	1143	cache_state(prealloc, cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1144	prealloc = NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1145	start = this_end + 1;
				1146	goto search_again;
				1147	}
				1148	/*
				1149	* \| ---- desired range ---- \|
				1150	* \| state \|
				1151	* We need to split the extent, and set the bit
				1152	* on the first half
				1153	*/
				1154	if (state->start <= end && state->end > end) {
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	1155	if (state->state & exclusive_bits) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1156	*failed_start = start;
				1157	err = -EEXIST;
				1158	goto out;
				1159	}
Xiao Guangrong	8233767	2011-04-20 06:44:57 +0000	[diff] [blame]	1160
				1161	prealloc = alloc_extent_state_atomic(prealloc);
				1162	BUG_ON(!prealloc);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1163	err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1164	if (err)
				1165	extent_io_tree_panic(tree, err);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1166
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1167	set_state_bits(tree, prealloc, &bits, changeset);
Chris Mason	2c64c53	2009-09-02 15:04:12 -0400	[diff] [blame]	1168	cache_state(prealloc, cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1169	merge_state(tree, prealloc);
				1170	prealloc = NULL;
				1171	goto out;
				1172	}
				1173
David Sterba	b5a4ba14	2016-04-27 01:02:15 +0200	[diff] [blame]	1174	search_again:
				1175	if (start > end)
				1176	goto out;
				1177	spin_unlock(&tree->lock);
				1178	if (gfpflags_allow_blocking(mask))
				1179	cond_resched();
				1180	goto again;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1181
				1182	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	1183	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1184	if (prealloc)
				1185	free_extent_state(prealloc);
				1186
				1187	return err;
				1188
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1189	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1190
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1191	/**
Liu Bo	10983f2	2012-07-11 15:26:19 +0800	[diff] [blame]	1192	* convert_extent_bit - convert all bits in a given range from one bit to
				1193	* another
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1194	* @tree: the io tree to search
				1195	* @start: the start offset in bytes
				1196	* @end: the end offset in bytes (inclusive)
				1197	* @bits: the bits to set in this range
				1198	* @clear_bits: the bits to clear in this range
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1199	* @cached_state: state that we're going to cache
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1200	*
				1201	* This will go through and set bits for the given range. If any states exist
				1202	* already in this range they are set with the given bit and cleared of the
				1203	* clear_bits. This is only meant to be used by things that are mergeable, ie
				1204	* converting from say DELALLOC to DIRTY. This is not meant to be used with
				1205	* boundary bits like LOCK.
David Sterba	210aa27	2016-04-26 23:54:39 +0200	[diff] [blame]	1206	*
				1207	* All allocations are done with GFP_NOFS.
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1208	*/
				1209	int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1210	u32 bits, u32 clear_bits,
David Sterba	210aa27	2016-04-26 23:54:39 +0200	[diff] [blame]	1211	struct extent_state **cached_state)
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1212	{
				1213	struct extent_state *state;
				1214	struct extent_state *prealloc = NULL;
				1215	struct rb_node *node;
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	1216	struct rb_node **p;
				1217	struct rb_node *parent;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1218	int err = 0;
				1219	u64 last_start;
				1220	u64 last_end;
Filipe Manana	c8fd3de	2014-10-13 12:28:39 +0100	[diff] [blame]	1221	bool first_iteration = true;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1222
Josef Bacik	a5dee37	2013-12-13 10:02:44 -0500	[diff] [blame]	1223	btrfs_debug_check_extent_io_range(tree, start, end);
Qu Wenruo	a1d1984	2019-03-01 10:48:00 +0800	[diff] [blame]	1224	trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits,
				1225	clear_bits);
David Sterba	8d599ae	2013-04-30 15:22:23 +0000	[diff] [blame]	1226
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1227	again:
David Sterba	210aa27	2016-04-26 23:54:39 +0200	[diff] [blame]	1228	if (!prealloc) {
Filipe Manana	c8fd3de	2014-10-13 12:28:39 +0100	[diff] [blame]	1229	/*
				1230	* Best effort, don't worry if extent state allocation fails
				1231	* here for the first iteration. We might have a cached state
				1232	* that matches exactly the target range, in which case no
				1233	* extent state allocations are needed. We'll only know this
				1234	* after locking the tree.
				1235	*/
David Sterba	210aa27	2016-04-26 23:54:39 +0200	[diff] [blame]	1236	prealloc = alloc_extent_state(GFP_NOFS);
Filipe Manana	c8fd3de	2014-10-13 12:28:39 +0100	[diff] [blame]	1237	if (!prealloc && !first_iteration)
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1238	return -ENOMEM;
				1239	}
				1240
				1241	spin_lock(&tree->lock);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1242	if (cached_state && *cached_state) {
				1243	state = *cached_state;
				1244	if (state->start <= start && state->end > start &&
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	1245	extent_state_in_tree(state)) {
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1246	node = &state->rb_node;
				1247	goto hit_next;
				1248	}
				1249	}
				1250
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1251	/*
				1252	* this search will find all the extents that end after
				1253	* our range starts.
				1254	*/
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	1255	node = tree_search_for_insert(tree, start, &p, &parent);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1256	if (!node) {
				1257	prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo	1cf4ffd	2011-12-07 20:08:40 -0500	[diff] [blame]	1258	if (!prealloc) {
				1259	err = -ENOMEM;
				1260	goto out;
				1261	}
Filipe David Borba Manana	12cfbad	2013-11-26 15:41:47 +0000	[diff] [blame]	1262	err = insert_state(tree, prealloc, start, end,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1263	&p, &parent, &bits, NULL);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1264	if (err)
				1265	extent_io_tree_panic(tree, err);
Filipe David Borba Manana	c42ac0b	2013-11-26 15:01:34 +0000	[diff] [blame]	1266	cache_state(prealloc, cached_state);
				1267	prealloc = NULL;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1268	goto out;
				1269	}
				1270	state = rb_entry(node, struct extent_state, rb_node);
				1271	hit_next:
				1272	last_start = state->start;
				1273	last_end = state->end;
				1274
				1275	/*
				1276	* \| ---- desired range ---- \|
				1277	* \| state \|
				1278	*
				1279	* Just lock what we found and keep going
				1280	*/
				1281	if (state->start == start && state->end <= end) {
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1282	set_state_bits(tree, state, &bits, NULL);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1283	cache_state(state, cached_state);
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1284	state = clear_state_bit(tree, state, &clear_bits, 0, NULL);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1285	if (last_end == (u64)-1)
				1286	goto out;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1287	start = last_end + 1;
Liu Bo	d1ac6e4	2012-05-10 18:10:39 +0800	[diff] [blame]	1288	if (start < end && state && state->start == start &&
				1289	!need_resched())
				1290	goto hit_next;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1291	goto search_again;
				1292	}
				1293
				1294	/*
				1295	* \| ---- desired range ---- \|
				1296	* \| state \|
				1297	* or
				1298	* \| ------------- state -------------- \|
				1299	*
				1300	* We need to split the extent we found, and may flip bits on
				1301	* second half.
				1302	*
				1303	* If the extent we found extends past our
				1304	* range, we just split and search again. It'll get split
				1305	* again the next time though.
				1306	*
				1307	* If the extent we found is inside our range, we set the
				1308	* desired bit on it.
				1309	*/
				1310	if (state->start < start) {
				1311	prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo	1cf4ffd	2011-12-07 20:08:40 -0500	[diff] [blame]	1312	if (!prealloc) {
				1313	err = -ENOMEM;
				1314	goto out;
				1315	}
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1316	err = split_state(tree, state, prealloc, start);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1317	if (err)
				1318	extent_io_tree_panic(tree, err);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1319	prealloc = NULL;
				1320	if (err)
				1321	goto out;
				1322	if (state->end <= end) {
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1323	set_state_bits(tree, state, &bits, NULL);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1324	cache_state(state, cached_state);
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1325	state = clear_state_bit(tree, state, &clear_bits, 0,
				1326	NULL);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1327	if (last_end == (u64)-1)
				1328	goto out;
				1329	start = last_end + 1;
Liu Bo	d1ac6e4	2012-05-10 18:10:39 +0800	[diff] [blame]	1330	if (start < end && state && state->start == start &&
				1331	!need_resched())
				1332	goto hit_next;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1333	}
				1334	goto search_again;
				1335	}
				1336	/*
				1337	* \| ---- desired range ---- \|
				1338	* \| state \| or \| state \|
				1339	*
				1340	* There's a hole, we need to insert something in it and
				1341	* ignore the extent we found.
				1342	*/
				1343	if (state->start > start) {
				1344	u64 this_end;
				1345	if (end < last_start)
				1346	this_end = end;
				1347	else
				1348	this_end = last_start - 1;
				1349
				1350	prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo	1cf4ffd	2011-12-07 20:08:40 -0500	[diff] [blame]	1351	if (!prealloc) {
				1352	err = -ENOMEM;
				1353	goto out;
				1354	}
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1355
				1356	/*
				1357	* Avoid to free 'prealloc' if it can be merged with
				1358	* the later extent.
				1359	*/
				1360	err = insert_state(tree, prealloc, start, this_end,
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1361	NULL, NULL, &bits, NULL);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1362	if (err)
				1363	extent_io_tree_panic(tree, err);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1364	cache_state(prealloc, cached_state);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1365	prealloc = NULL;
				1366	start = this_end + 1;
				1367	goto search_again;
				1368	}
				1369	/*
				1370	* \| ---- desired range ---- \|
				1371	* \| state \|
				1372	* We need to split the extent, and set the bit
				1373	* on the first half
				1374	*/
				1375	if (state->start <= end && state->end > end) {
				1376	prealloc = alloc_extent_state_atomic(prealloc);
Liu Bo	1cf4ffd	2011-12-07 20:08:40 -0500	[diff] [blame]	1377	if (!prealloc) {
				1378	err = -ENOMEM;
				1379	goto out;
				1380	}
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1381
				1382	err = split_state(tree, state, prealloc, end + 1);
Jeff Mahoney	c2d904e	2011-10-03 23:22:32 -0400	[diff] [blame]	1383	if (err)
				1384	extent_io_tree_panic(tree, err);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1385
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1386	set_state_bits(tree, prealloc, &bits, NULL);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1387	cache_state(prealloc, cached_state);
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1388	clear_state_bit(tree, prealloc, &clear_bits, 0, NULL);
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1389	prealloc = NULL;
				1390	goto out;
				1391	}
				1392
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1393	search_again:
				1394	if (start > end)
				1395	goto out;
				1396	spin_unlock(&tree->lock);
David Sterba	210aa27	2016-04-26 23:54:39 +0200	[diff] [blame]	1397	cond_resched();
Filipe Manana	c8fd3de	2014-10-13 12:28:39 +0100	[diff] [blame]	1398	first_iteration = false;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1399	goto again;
Josef Bacik	462d6fa	2011-09-26 13:56:12 -0400	[diff] [blame]	1400
				1401	out:
				1402	spin_unlock(&tree->lock);
				1403	if (prealloc)
				1404	free_extent_state(prealloc);
				1405
				1406	return err;
				1407	}
				1408
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1409	/* wrappers around set/clear extent bit */
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1410	int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1411	u32 bits, struct extent_changeset *changeset)
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1412	{
				1413	/*
				1414	* We don't support EXTENT_LOCKED yet, as current changeset will
				1415	* record any bits changed, so for EXTENT_LOCKED case, it will
				1416	* either fail with -EEXIST or changeset will record the whole
				1417	* range.
				1418	*/
				1419	BUG_ON(bits & EXTENT_LOCKED);
				1420
Nikolay Borisov	1cab5e7	2020-11-05 11:08:00 +0200	[diff] [blame]	1421	return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
				1422	changeset);
Qu Wenruo	d38ed27	2015-10-12 14:53:37 +0800	[diff] [blame]	1423	}
				1424
Nikolay Borisov	4ca7365	2019-03-27 14:24:10 +0200	[diff] [blame]	1425	int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1426	u32 bits)
Nikolay Borisov	4ca7365	2019-03-27 14:24:10 +0200	[diff] [blame]	1427	{
Nikolay Borisov	1cab5e7	2020-11-05 11:08:00 +0200	[diff] [blame]	1428	return set_extent_bit(tree, start, end, bits, 0, NULL, NULL,
				1429	GFP_NOWAIT, NULL);
Nikolay Borisov	4ca7365	2019-03-27 14:24:10 +0200	[diff] [blame]	1430	}
				1431
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1432	int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1433	u32 bits, int wake, int delete,
David Sterba	ae0f162	2017-10-31 16:37:52 +0100	[diff] [blame]	1434	struct extent_state **cached)
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1435	{
				1436	return __clear_extent_bit(tree, start, end, bits, wake, delete,
David Sterba	ae0f162	2017-10-31 16:37:52 +0100	[diff] [blame]	1437	cached, GFP_NOFS, NULL);
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1438	}
				1439
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1440	int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1441	u32 bits, struct extent_changeset *changeset)
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1442	{
				1443	/*
				1444	* Don't support EXTENT_LOCKED case, same reason as
				1445	* set_record_extent_bits().
				1446	*/
				1447	BUG_ON(bits & EXTENT_LOCKED);
				1448
David Sterba	f734c44	2016-04-26 23:54:39 +0200	[diff] [blame]	1449	return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS,
Qu Wenruo	fefdc55	2015-10-12 15:35:38 +0800	[diff] [blame]	1450	changeset);
				1451	}
				1452
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	1453	/*
				1454	* either insert or lock state struct between start and end use mask to tell
				1455	* us if waiting is desired.
				1456	*/
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	1457	int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
David Sterba	ff13db4	2015-12-03 14:30:40 +0100	[diff] [blame]	1458	struct extent_state **cached_state)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1459	{
				1460	int err;
				1461	u64 failed_start;
David Sterba	9ee49a04	2015-01-14 19:52:13 +0100	[diff] [blame]	1462
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1463	while (1) {
Nikolay Borisov	1cab5e7	2020-11-05 11:08:00 +0200	[diff] [blame]	1464	err = set_extent_bit(tree, start, end, EXTENT_LOCKED,
				1465	EXTENT_LOCKED, &failed_start,
				1466	cached_state, GFP_NOFS, NULL);
Jeff Mahoney	d008237	2012-03-01 14:57:19 +0100	[diff] [blame]	1467	if (err == -EEXIST) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1468	wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
				1469	start = failed_start;
Jeff Mahoney	d008237	2012-03-01 14:57:19 +0100	[diff] [blame]	1470	} else
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1471	break;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1472	WARN_ON(start > end);
				1473	}
				1474	return err;
				1475	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1476
Jeff Mahoney	d008237	2012-03-01 14:57:19 +0100	[diff] [blame]	1477	int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
Josef Bacik	2517920	2008-10-29 14:49:05 -0400	[diff] [blame]	1478	{
				1479	int err;
				1480	u64 failed_start;
				1481
Nikolay Borisov	1cab5e7	2020-11-05 11:08:00 +0200	[diff] [blame]	1482	err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
				1483	&failed_start, NULL, GFP_NOFS, NULL);
Yan Zheng	6643558	2008-10-30 14:19:50 -0400	[diff] [blame]	1484	if (err == -EEXIST) {
				1485	if (failed_start > start)
				1486	clear_extent_bit(tree, start, failed_start - 1,
David Sterba	ae0f162	2017-10-31 16:37:52 +0100	[diff] [blame]	1487	EXTENT_LOCKED, 1, 0, NULL);
Josef Bacik	2517920	2008-10-29 14:49:05 -0400	[diff] [blame]	1488	return 0;
Yan Zheng	6643558	2008-10-30 14:19:50 -0400	[diff] [blame]	1489	}
Josef Bacik	2517920	2008-10-29 14:49:05 -0400	[diff] [blame]	1490	return 1;
				1491	}
Josef Bacik	2517920	2008-10-29 14:49:05 -0400	[diff] [blame]	1492
David Sterba	bd1fa4f	2015-12-03 13:08:59 +0100	[diff] [blame]	1493	void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1494	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1495	unsigned long index = start >> PAGE_SHIFT;
				1496	unsigned long end_index = end >> PAGE_SHIFT;
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1497	struct page *page;
				1498
				1499	while (index <= end_index) {
				1500	page = find_get_page(inode->i_mapping, index);
				1501	BUG_ON(!page); /* Pages should be in the extent_io_tree */
				1502	clear_page_dirty_for_io(page);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1503	put_page(page);
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1504	index++;
				1505	}
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1506	}
				1507
David Sterba	f631157	2015-12-03 13:08:59 +0100	[diff] [blame]	1508	void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1509	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1510	unsigned long index = start >> PAGE_SHIFT;
				1511	unsigned long end_index = end >> PAGE_SHIFT;
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1512	struct page *page;
				1513
				1514	while (index <= end_index) {
				1515	page = find_get_page(inode->i_mapping, index);
				1516	BUG_ON(!page); /* Pages should be in the extent_io_tree */
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1517	__set_page_dirty_nobuffers(page);
Konstantin Khebnikov	8d38633	2015-02-11 15:26:55 -0800	[diff] [blame]	1518	account_page_redirty(page);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1519	put_page(page);
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1520	index++;
				1521	}
Chris Mason	4adaa61	2013-03-26 13:07:00 -0400	[diff] [blame]	1522	}
				1523
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	1524	/* find the first state struct with 'bits' set after 'start', and
				1525	* return it. tree->lock must be held. NULL will returned if
				1526	* nothing was found after 'start'
				1527	*/
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	1528	static struct extent_state *
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1529	find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, u32 bits)
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1530	{
				1531	struct rb_node *node;
				1532	struct extent_state *state;
				1533
				1534	/*
				1535	* this search will find all the extents that end after
				1536	* our range starts.
				1537	*/
				1538	node = tree_search(tree, start);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1539	if (!node)
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1540	goto out;
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1541
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1542	while (1) {
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1543	state = rb_entry(node, struct extent_state, rb_node);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1544	if (state->end >= start && (state->state & bits))
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1545	return state;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1546
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1547	node = rb_next(node);
				1548	if (!node)
				1549	break;
				1550	}
				1551	out:
				1552	return NULL;
				1553	}
Chris Mason	d7fc640	2008-02-18 12:12:38 -0500	[diff] [blame]	1554
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	1555	/*
Qu Wenruo	03509b7	2020-10-21 14:24:50 +0800	[diff] [blame]	1556	* Find the first offset in the io tree with one or more @bits set.
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1557	*
Qu Wenruo	03509b7	2020-10-21 14:24:50 +0800	[diff] [blame]	1558	* Note: If there are multiple bits set in @bits, any of them will match.
				1559	*
				1560	* Return 0 if we find something, and update @start_ret and @end_ret.
				1561	* Return 1 if we found nothing.
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1562	*/
				1563	int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1564	u64 start_ret, u64 end_ret, u32 bits,
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1565	struct extent_state **cached_state)
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1566	{
				1567	struct extent_state *state;
				1568	int ret = 1;
				1569
				1570	spin_lock(&tree->lock);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1571	if (cached_state && *cached_state) {
				1572	state = *cached_state;
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	1573	if (state->end == start - 1 && extent_state_in_tree(state)) {
Liu Bo	9688e9a	2018-08-23 03:14:53 +0800	[diff] [blame]	1574	while ((state = next_state(state)) != NULL) {
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1575	if (state->state & bits)
				1576	goto got_it;
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1577	}
				1578	free_extent_state(*cached_state);
				1579	*cached_state = NULL;
				1580	goto out;
				1581	}
				1582	free_extent_state(*cached_state);
				1583	*cached_state = NULL;
				1584	}
				1585
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1586	state = find_first_extent_bit_state(tree, start, bits);
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1587	got_it:
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1588	if (state) {
Filipe Manana	e38e2ed	2014-10-13 12:28:38 +0100	[diff] [blame]	1589	cache_state_if_flags(state, cached_state, 0);
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1590	*start_ret = state->start;
				1591	*end_ret = state->end;
				1592	ret = 0;
				1593	}
Josef Bacik	e613887	2012-09-27 17:07:30 -0400	[diff] [blame]	1594	out:
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1595	spin_unlock(&tree->lock);
				1596	return ret;
				1597	}
				1598
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1599	/**
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	1600	* Find a contiguous area of bits
				1601	*
				1602	* @tree: io tree to check
				1603	* @start: offset to start the search from
				1604	* @start_ret: the first offset we found with the bits set
				1605	* @end_ret: the final contiguous range of the bits that were set
				1606	* @bits: bits to look for
Josef Bacik	41a2ee7	2020-01-17 09:02:21 -0500	[diff] [blame]	1607	*
				1608	* set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
				1609	* to set bits appropriately, and then merge them again. During this time it
				1610	* will drop the tree->lock, so use this helper if you want to find the actual
				1611	* contiguous area for given bits. We will search to the first bit we find, and
				1612	* then walk down the tree until we find a non-contiguous area. The area
				1613	* returned will be the full contiguous area with the bits set.
				1614	*/
				1615	int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1616	u64 start_ret, u64 end_ret, u32 bits)
Josef Bacik	41a2ee7	2020-01-17 09:02:21 -0500	[diff] [blame]	1617	{
				1618	struct extent_state *state;
				1619	int ret = 1;
				1620
				1621	spin_lock(&tree->lock);
				1622	state = find_first_extent_bit_state(tree, start, bits);
				1623	if (state) {
				1624	*start_ret = state->start;
				1625	*end_ret = state->end;
				1626	while ((state = next_state(state)) != NULL) {
				1627	if (state->start > (*end_ret + 1))
				1628	break;
				1629	*end_ret = state->end;
				1630	}
				1631	ret = 0;
				1632	}
				1633	spin_unlock(&tree->lock);
				1634	return ret;
				1635	}
				1636
				1637	/**
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	1638	* Find the first range that has @bits not set. This range could start before
				1639	* @start.
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1640	*
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	1641	* @tree: the tree to search
				1642	* @start: offset at/after which the found extent should start
				1643	* @start_ret: records the beginning of the range
				1644	* @end_ret: records the end of the range (inclusive)
				1645	* @bits: the set of bits which must be unset
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1646	*
				1647	* Since unallocated range is also considered one which doesn't have the bits
				1648	* set it's possible that @end_ret contains -1, this happens in case the range
				1649	* spans (last_range_end, end of device]. In this case it's up to the caller to
				1650	* trim @end_ret to the appropriate size.
				1651	*/
				1652	void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	1653	u64 start_ret, u64 end_ret, u32 bits)
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1654	{
				1655	struct extent_state *state;
				1656	struct rb_node node, prev = NULL, *next;
				1657
				1658	spin_lock(&tree->lock);
				1659
				1660	/* Find first extent with bits cleared */
				1661	while (1) {
				1662	node = __etree_search(tree, start, &next, &prev, NULL, NULL);
Nikolay Borisov	5750c37	2020-01-27 11:59:26 +0200	[diff] [blame]	1663	if (!node && !next && !prev) {
				1664	/*
				1665	* Tree is completely empty, send full range and let
				1666	* caller deal with it
				1667	*/
				1668	*start_ret = 0;
				1669	*end_ret = -1;
				1670	goto out;
				1671	} else if (!node && !next) {
				1672	/*
				1673	* We are past the last allocated chunk, set start at
				1674	* the end of the last extent.
				1675	*/
				1676	state = rb_entry(prev, struct extent_state, rb_node);
				1677	*start_ret = state->end + 1;
				1678	*end_ret = -1;
				1679	goto out;
				1680	} else if (!node) {
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1681	node = next;
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1682	}
Nikolay Borisov	1eaebb3	2019-06-03 13:06:02 +0300	[diff] [blame]	1683	/*
				1684	* At this point 'node' either contains 'start' or start is
				1685	* before 'node'
				1686	*/
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1687	state = rb_entry(node, struct extent_state, rb_node);
Nikolay Borisov	1eaebb3	2019-06-03 13:06:02 +0300	[diff] [blame]	1688
				1689	if (in_range(start, state->start, state->end - state->start + 1)) {
				1690	if (state->state & bits) {
				1691	/*
				1692	* \|--range with bits sets--\|
				1693	* \|
				1694	* start
				1695	*/
				1696	start = state->end + 1;
				1697	} else {
				1698	/*
				1699	* 'start' falls within a range that doesn't
				1700	* have the bits set, so take its start as
				1701	* the beginning of the desired range
				1702	*
				1703	* \|--range with bits cleared----\|
				1704	* \|
				1705	* start
				1706	*/
				1707	*start_ret = state->start;
				1708	break;
				1709	}
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1710	} else {
Nikolay Borisov	1eaebb3	2019-06-03 13:06:02 +0300	[diff] [blame]	1711	/*
				1712	* \|---prev range---\|---hole/unset---\|---node range---\|
				1713	* \|
				1714	* start
				1715	*
				1716	* or
				1717	*
				1718	* \|---hole/unset--\|\|--first node--\|
				1719	* 0 \|
				1720	* start
				1721	*/
				1722	if (prev) {
				1723	state = rb_entry(prev, struct extent_state,
				1724	rb_node);
				1725	*start_ret = state->end + 1;
				1726	} else {
				1727	*start_ret = 0;
				1728	}
Nikolay Borisov	45bfcfc	2019-03-27 14:24:17 +0200	[diff] [blame]	1729	break;
				1730	}
				1731	}
				1732
				1733	/*
				1734	* Find the longest stretch from start until an entry which has the
				1735	* bits set
				1736	*/
				1737	while (1) {
				1738	state = rb_entry(node, struct extent_state, rb_node);
				1739	if (state->end >= start && !(state->state & bits)) {
				1740	*end_ret = state->end;
				1741	} else {
				1742	*end_ret = state->start - 1;
				1743	break;
				1744	}
				1745
				1746	node = rb_next(node);
				1747	if (!node)
				1748	break;
				1749	}
				1750	out:
				1751	spin_unlock(&tree->lock);
				1752	}
				1753
Xiao Guangrong	69261c4	2011-07-14 03:19:45 +0000	[diff] [blame]	1754	/*
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	1755	* find a contiguous range of bytes in the file marked as delalloc, not
				1756	* more than 'max_bytes'. start and end are used to return the range,
				1757	*
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1758	* true is returned if we find something, false if nothing was in the tree
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	1759	*/
Josef Bacik	083e75e	2019-09-23 10:05:20 -0400	[diff] [blame]	1760	bool btrfs_find_delalloc_range(struct extent_io_tree tree, u64 start,
				1761	u64 *end, u64 max_bytes,
				1762	struct extent_state **cached_state)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1763	{
				1764	struct rb_node *node;
				1765	struct extent_state *state;
				1766	u64 cur_start = *start;
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1767	bool found = false;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1768	u64 total_bytes = 0;
				1769
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	1770	spin_lock(&tree->lock);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1771
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1772	/*
				1773	* this search will find all the extents that end after
				1774	* our range starts.
				1775	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	1776	node = tree_search(tree, cur_start);
Peter	2b114d1	2008-04-01 11:21:40 -0400	[diff] [blame]	1777	if (!node) {
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1778	*end = (u64)-1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1779	goto out;
				1780	}
				1781
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	1782	while (1) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1783	state = rb_entry(node, struct extent_state, rb_node);
Zheng Yan	5b21f2e	2008-09-26 10:05:38 -0400	[diff] [blame]	1784	if (found && (state->start != cur_start \|\|
				1785	(state->state & EXTENT_BOUNDARY))) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1786	goto out;
				1787	}
				1788	if (!(state->state & EXTENT_DELALLOC)) {
				1789	if (!found)
				1790	*end = state->end;
				1791	goto out;
				1792	}
Josef Bacik	c2a128d	2010-02-02 21:19:11 +0000	[diff] [blame]	1793	if (!found) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1794	*start = state->start;
Josef Bacik	c2a128d	2010-02-02 21:19:11 +0000	[diff] [blame]	1795	*cached_state = state;
Elena Reshetova	b7ac31b	2017-03-03 10:55:19 +0200	[diff] [blame]	1796	refcount_inc(&state->refs);
Josef Bacik	c2a128d	2010-02-02 21:19:11 +0000	[diff] [blame]	1797	}
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1798	found = true;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1799	*end = state->end;
				1800	cur_start = state->end + 1;
				1801	node = rb_next(node);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1802	total_bytes += state->end - state->start + 1;
Josef Bacik	7bf811a5	2013-10-07 22:11:09 -0400	[diff] [blame]	1803	if (total_bytes >= max_bytes)
Josef Bacik	573aeca	2013-08-30 14:38:49 -0400	[diff] [blame]	1804	break;
Josef Bacik	573aeca	2013-08-30 14:38:49 -0400	[diff] [blame]	1805	if (!node)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1806	break;
				1807	}
				1808	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	1809	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	1810	return found;
				1811	}
				1812
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1813	/*
				1814	* Process one page for __process_pages_contig().
				1815	*
				1816	* Return >0 if we hit @page == @locked_page.
				1817	* Return 0 if we updated the page status.
				1818	* Return -EGAIN if the we need to try again.
				1819	* (For PAGE_LOCK case but got dirty page or page not belong to mapping)
				1820	*/
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1821	static int process_one_page(struct btrfs_fs_info *fs_info,
				1822	struct address_space *mapping,
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1823	struct page page, struct page locked_page,
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1824	unsigned long page_ops, u64 start, u64 end)
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1825	{
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1826	u32 len;
				1827
				1828	ASSERT(end + 1 - start != 0 && end + 1 - start < U32_MAX);
				1829	len = end + 1 - start;
				1830
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1831	if (page_ops & PAGE_SET_ORDERED)
Qu Wenruo	b945a46	2021-05-31 16:50:46 +0800	[diff] [blame]	1832	btrfs_page_clamp_set_ordered(fs_info, page, start, len);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1833	if (page_ops & PAGE_SET_ERROR)
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1834	btrfs_page_clamp_set_error(fs_info, page, start, len);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1835	if (page_ops & PAGE_START_WRITEBACK) {
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1836	btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
				1837	btrfs_page_clamp_set_writeback(fs_info, page, start, len);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1838	}
				1839	if (page_ops & PAGE_END_WRITEBACK)
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1840	btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
Qu Wenruo	a33a8e9	2021-05-31 16:50:47 +0800	[diff] [blame]	1841
				1842	if (page == locked_page)
				1843	return 1;
				1844
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1845	if (page_ops & PAGE_LOCK) {
Qu Wenruo	1e1de38	2021-05-31 16:50:44 +0800	[diff] [blame]	1846	int ret;
				1847
				1848	ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
				1849	if (ret)
				1850	return ret;
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1851	if (!PageDirty(page) \|\| page->mapping != mapping) {
Qu Wenruo	1e1de38	2021-05-31 16:50:44 +0800	[diff] [blame]	1852	btrfs_page_end_writer_lock(fs_info, page, start, len);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1853	return -EAGAIN;
				1854	}
				1855	}
				1856	if (page_ops & PAGE_UNLOCK)
Qu Wenruo	1e1de38	2021-05-31 16:50:44 +0800	[diff] [blame]	1857	btrfs_page_end_writer_lock(fs_info, page, start, len);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1858	return 0;
				1859	}
				1860
Liu Bo	da2c700	2017-02-10 16:41:05 +0100	[diff] [blame]	1861	static int __process_pages_contig(struct address_space *mapping,
				1862	struct page *locked_page,
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	1863	u64 start, u64 end, unsigned long page_ops,
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1864	u64 *processed_end)
				1865	{
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1866	struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1867	pgoff_t start_index = start >> PAGE_SHIFT;
				1868	pgoff_t end_index = end >> PAGE_SHIFT;
				1869	pgoff_t index = start_index;
				1870	unsigned long nr_pages = end_index - start_index + 1;
				1871	unsigned long pages_processed = 0;
				1872	struct page *pages[16];
				1873	int err = 0;
				1874	int i;
				1875
				1876	if (page_ops & PAGE_LOCK) {
				1877	ASSERT(page_ops == PAGE_LOCK);
				1878	ASSERT(processed_end && *processed_end == start);
				1879	}
				1880
				1881	if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
				1882	mapping_set_error(mapping, -EIO);
				1883
				1884	while (nr_pages > 0) {
				1885	int found_pages;
				1886
				1887	found_pages = find_get_pages_contig(mapping, index,
				1888	min_t(unsigned long,
				1889	nr_pages, ARRAY_SIZE(pages)), pages);
				1890	if (found_pages == 0) {
				1891	/*
				1892	* Only if we're going to lock these pages, we can find
				1893	* nothing at @index.
				1894	*/
				1895	ASSERT(page_ops & PAGE_LOCK);
				1896	err = -EAGAIN;
				1897	goto out;
				1898	}
				1899
				1900	for (i = 0; i < found_pages; i++) {
				1901	int process_ret;
				1902
Qu Wenruo	e38992b	2021-05-31 16:50:42 +0800	[diff] [blame]	1903	process_ret = process_one_page(fs_info, mapping,
				1904	pages[i], locked_page, page_ops,
				1905	start, end);
Qu Wenruo	ed8f13b	2021-05-31 16:50:38 +0800	[diff] [blame]	1906	if (process_ret < 0) {
				1907	for (; i < found_pages; i++)
				1908	put_page(pages[i]);
				1909	err = -EAGAIN;
				1910	goto out;
				1911	}
				1912	put_page(pages[i]);
				1913	pages_processed++;
				1914	}
				1915	nr_pages -= found_pages;
				1916	index += found_pages;
				1917	cond_resched();
				1918	}
				1919	out:
				1920	if (err && processed_end) {
				1921	/*
				1922	* Update @processed_end. I know this is awful since it has
				1923	* two different return value patterns (inclusive vs exclusive).
				1924	*
				1925	* But the exclusive pattern is necessary if @start is 0, or we
				1926	* underflow and check against processed_end won't work as
				1927	* expected.
				1928	*/
				1929	if (pages_processed)
				1930	*processed_end = min(end,
				1931	((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
				1932	else
				1933	*processed_end = start;
				1934	}
				1935	return err;
				1936	}
Liu Bo	da2c700	2017-02-10 16:41:05 +0100	[diff] [blame]	1937
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	1938	static noinline void __unlock_for_delalloc(struct inode *inode,
				1939	struct page *locked_page,
				1940	u64 start, u64 end)
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1941	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1942	unsigned long index = start >> PAGE_SHIFT;
				1943	unsigned long end_index = end >> PAGE_SHIFT;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1944
Liu Bo	76c0021	2017-02-10 16:42:14 +0100	[diff] [blame]	1945	ASSERT(locked_page);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1946	if (index == locked_page->index && end_index == index)
Jeff Mahoney	143bede	2012-03-01 14:56:26 +0100	[diff] [blame]	1947	return;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1948
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	1949	__process_pages_contig(inode->i_mapping, locked_page, start, end,
Liu Bo	76c0021	2017-02-10 16:42:14 +0100	[diff] [blame]	1950	PAGE_UNLOCK, NULL);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1951	}
				1952
				1953	static noinline int lock_delalloc_pages(struct inode *inode,
				1954	struct page *locked_page,
				1955	u64 delalloc_start,
				1956	u64 delalloc_end)
				1957	{
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1958	unsigned long index = delalloc_start >> PAGE_SHIFT;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	1959	unsigned long end_index = delalloc_end >> PAGE_SHIFT;
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	1960	u64 processed_end = delalloc_start;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1961	int ret;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1962
Liu Bo	76c0021	2017-02-10 16:42:14 +0100	[diff] [blame]	1963	ASSERT(locked_page);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1964	if (index == locked_page->index && index == end_index)
				1965	return 0;
				1966
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	1967	ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
				1968	delalloc_end, PAGE_LOCK, &processed_end);
				1969	if (ret == -EAGAIN && processed_end > delalloc_start)
Liu Bo	76c0021	2017-02-10 16:42:14 +0100	[diff] [blame]	1970	__unlock_for_delalloc(inode, locked_page, delalloc_start,
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	1971	processed_end);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1972	return ret;
				1973	}
				1974
				1975	/*
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1976	* Find and lock a contiguous range of bytes in the file marked as delalloc, no
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	1977	* more than @max_bytes.
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1978	*
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	1979	* @start: The original start bytenr to search.
				1980	* Will store the extent range start bytenr.
				1981	* @end: The original end bytenr of the search range
				1982	* Will store the extent range end bytenr.
				1983	*
				1984	* Return true if we find a delalloc range which starts inside the original
				1985	* range, and @start/@end will store the delalloc range start/end.
				1986	*
				1987	* Return false if we can't find any delalloc range which starts inside the
				1988	* original range, and @start/@end will be the non-delalloc range start/end.
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1989	*/
Johannes Thumshirn	ce9f967	2018-11-19 10:38:17 +0100	[diff] [blame]	1990	EXPORT_FOR_TESTS
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	1991	noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
Josef Bacik	294e30f	2013-10-09 12:00:56 -0400	[diff] [blame]	1992	struct page locked_page, u64 start,
Nikolay Borisov	917aace	2018-10-26 14:43:20 +0300	[diff] [blame]	1993	u64 *end)
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1994	{
Goldwyn Rodrigues	9978059	2019-06-21 10:02:54 -0500	[diff] [blame]	1995	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	1996	const u64 orig_start = *start;
				1997	const u64 orig_end = *end;
Nikolay Borisov	917aace	2018-10-26 14:43:20 +0300	[diff] [blame]	1998	u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	1999	u64 delalloc_start;
				2000	u64 delalloc_end;
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	2001	bool found;
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2002	struct extent_state *cached_state = NULL;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2003	int ret;
				2004	int loops = 0;
				2005
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	2006	/* Caller should pass a valid @end to indicate the search range end */
				2007	ASSERT(orig_end > orig_start);
				2008
				2009	/* The range should at least cover part of the page */
				2010	ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE \|\|
				2011	orig_end <= page_offset(locked_page)));
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2012	again:
				2013	/* step one, find a bunch of delalloc bytes starting at start */
				2014	delalloc_start = *start;
				2015	delalloc_end = 0;
Josef Bacik	083e75e	2019-09-23 10:05:20 -0400	[diff] [blame]	2016	found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
				2017	max_bytes, &cached_state);
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	2018	if (!found \|\| delalloc_end <= *start \|\| delalloc_start > orig_end) {
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2019	*start = delalloc_start;
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	2020
				2021	/* @delalloc_end can be -1, never go beyond @orig_end */
				2022	*end = min(delalloc_end, orig_end);
Josef Bacik	c2a128d	2010-02-02 21:19:11 +0000	[diff] [blame]	2023	free_extent_state(cached_state);
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	2024	return false;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2025	}
				2026
				2027	/*
Chris Mason	70b99e6	2008-10-31 12:46:39 -0400	[diff] [blame]	2028	* start comes from the offset of locked_page. We have to lock
				2029	* pages in order, so we can't process delalloc bytes before
				2030	* locked_page
				2031	*/
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2032	if (delalloc_start < *start)
Chris Mason	70b99e6	2008-10-31 12:46:39 -0400	[diff] [blame]	2033	delalloc_start = *start;
Chris Mason	70b99e6	2008-10-31 12:46:39 -0400	[diff] [blame]	2034
				2035	/*
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2036	* make sure to limit the number of pages we try to lock down
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2037	*/
Josef Bacik	7bf811a5	2013-10-07 22:11:09 -0400	[diff] [blame]	2038	if (delalloc_end + 1 - delalloc_start > max_bytes)
				2039	delalloc_end = delalloc_start + max_bytes - 1;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2040
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2041	/* step two, lock all the pages after the page that has start */
				2042	ret = lock_delalloc_pages(inode, locked_page,
				2043	delalloc_start, delalloc_end);
Nikolay Borisov	9bfd61d	2018-10-26 14:43:21 +0300	[diff] [blame]	2044	ASSERT(!ret \|\| ret == -EAGAIN);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2045	if (ret == -EAGAIN) {
				2046	/* some of the pages are gone, lets avoid looping by
				2047	* shortening the size of the delalloc range we're searching
				2048	*/
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2049	free_extent_state(cached_state);
Chris Mason	7d78874	2014-05-21 05:49:54 -0700	[diff] [blame]	2050	cached_state = NULL;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2051	if (!loops) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2052	max_bytes = PAGE_SIZE;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2053	loops = 1;
				2054	goto again;
				2055	} else {
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	2056	found = false;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2057	goto out_failed;
				2058	}
				2059	}
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2060
				2061	/* step three, lock the state bits for the whole range */
David Sterba	ff13db4	2015-12-03 14:30:40 +0100	[diff] [blame]	2062	lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2063
				2064	/* then test to make sure it is all still delalloc */
				2065	ret = test_range_bit(tree, delalloc_start, delalloc_end,
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2066	EXTENT_DELALLOC, 1, cached_state);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2067	if (!ret) {
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2068	unlock_extent_cached(tree, delalloc_start, delalloc_end,
David Sterba	e43bbe5	2017-12-12 21:43:52 +0100	[diff] [blame]	2069	&cached_state);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2070	__unlock_for_delalloc(inode, locked_page,
				2071	delalloc_start, delalloc_end);
				2072	cond_resched();
				2073	goto again;
				2074	}
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2075	free_extent_state(cached_state);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	2076	*start = delalloc_start;
				2077	*end = delalloc_end;
				2078	out_failed:
				2079	return found;
				2080	}
				2081
Nikolay Borisov	ad7ff17	2020-06-03 08:55:06 +0300	[diff] [blame]	2082	void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
Nikolay Borisov	74e9194	2019-07-17 16:18:16 +0300	[diff] [blame]	2083	struct page *locked_page,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	2084	u32 clear_bits, unsigned long page_ops)
Liu Bo	873695b	2017-02-02 17:49:22 -0800	[diff] [blame]	2085	{
Nikolay Borisov	ad7ff17	2020-06-03 08:55:06 +0300	[diff] [blame]	2086	clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
Liu Bo	873695b	2017-02-02 17:49:22 -0800	[diff] [blame]	2087
Nikolay Borisov	ad7ff17	2020-06-03 08:55:06 +0300	[diff] [blame]	2088	__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
Qu Wenruo	98af9ab	2021-05-31 16:50:37 +0800	[diff] [blame]	2089	start, end, page_ops, NULL);
Liu Bo	873695b	2017-02-02 17:49:22 -0800	[diff] [blame]	2090	}
				2091
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	2092	/*
				2093	* count the number of bytes in the tree that have a given bit(s)
				2094	* set. This can be fairly slow, except for EXTENT_DIRTY which is
				2095	* cached. The total number found is returned.
				2096	*/
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2097	u64 count_range_bits(struct extent_io_tree *tree,
				2098	u64 *start, u64 search_end, u64 max_bytes,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	2099	u32 bits, int contig)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2100	{
				2101	struct rb_node *node;
				2102	struct extent_state *state;
				2103	u64 cur_start = *start;
				2104	u64 total_bytes = 0;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	2105	u64 last = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2106	int found = 0;
				2107
Dulshani Gunawardhana	fae7f21	2013-10-31 10:30:08 +0530	[diff] [blame]	2108	if (WARN_ON(search_end <= cur_start))
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2109	return 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2110
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2111	spin_lock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2112	if (cur_start == 0 && bits == EXTENT_DIRTY) {
				2113	total_bytes = tree->dirty_bytes;
				2114	goto out;
				2115	}
				2116	/*
				2117	* this search will find all the extents that end after
				2118	* our range starts.
				2119	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	2120	node = tree_search(tree, cur_start);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2121	if (!node)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2122	goto out;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2123
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	2124	while (1) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2125	state = rb_entry(node, struct extent_state, rb_node);
				2126	if (state->start > search_end)
				2127	break;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	2128	if (contig && found && state->start > last + 1)
				2129	break;
				2130	if (state->end >= cur_start && (state->state & bits) == bits) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2131	total_bytes += min(search_end, state->end) + 1 -
				2132	max(cur_start, state->start);
				2133	if (total_bytes >= max_bytes)
				2134	break;
				2135	if (!found) {
Josef Bacik	af60bed	2011-05-04 11:11:17 -0400	[diff] [blame]	2136	*start = max(cur_start, state->start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2137	found = 1;
				2138	}
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	2139	last = state->end;
				2140	} else if (contig && found) {
				2141	break;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2142	}
				2143	node = rb_next(node);
				2144	if (!node)
				2145	break;
				2146	}
				2147	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2148	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2149	return total_bytes;
				2150	}
Christoph Hellwig	b295086	2008-12-02 09:54:17 -0500	[diff] [blame]	2151
Chris Mason	d352ac6	2008-09-29 15:18:18 -0400	[diff] [blame]	2152	/*
				2153	* set the private field for a given byte offset in the tree. If there isn't
				2154	* an extent_state there already, this does nothing.
				2155	*/
Josef Bacik	b3f167a	2019-09-23 10:05:21 -0400	[diff] [blame]	2156	int set_state_failrec(struct extent_io_tree *tree, u64 start,
				2157	struct io_failure_record *failrec)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2158	{
				2159	struct rb_node *node;
				2160	struct extent_state *state;
				2161	int ret = 0;
				2162
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2163	spin_lock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2164	/*
				2165	* this search will find all the extents that end after
				2166	* our range starts.
				2167	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	2168	node = tree_search(tree, start);
Peter	2b114d1	2008-04-01 11:21:40 -0400	[diff] [blame]	2169	if (!node) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2170	ret = -ENOENT;
				2171	goto out;
				2172	}
				2173	state = rb_entry(node, struct extent_state, rb_node);
				2174	if (state->start != start) {
				2175	ret = -ENOENT;
				2176	goto out;
				2177	}
David Sterba	47dc196	2016-02-11 13:24:13 +0100	[diff] [blame]	2178	state->failrec = failrec;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2179	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2180	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2181	return ret;
				2182	}
				2183
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2184	struct io_failure_record get_state_failrec(struct extent_io_tree tree, u64 start)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2185	{
				2186	struct rb_node *node;
				2187	struct extent_state *state;
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2188	struct io_failure_record *failrec;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2189
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2190	spin_lock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2191	/*
				2192	* this search will find all the extents that end after
				2193	* our range starts.
				2194	*/
Chris Mason	80ea96b	2008-02-01 14:51:59 -0500	[diff] [blame]	2195	node = tree_search(tree, start);
Peter	2b114d1	2008-04-01 11:21:40 -0400	[diff] [blame]	2196	if (!node) {
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2197	failrec = ERR_PTR(-ENOENT);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2198	goto out;
				2199	}
				2200	state = rb_entry(node, struct extent_state, rb_node);
				2201	if (state->start != start) {
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2202	failrec = ERR_PTR(-ENOENT);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2203	goto out;
				2204	}
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2205
				2206	failrec = state->failrec;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2207	out:
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2208	spin_unlock(&tree->lock);
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2209	return failrec;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2210	}
				2211
				2212	/*
				2213	* searches a range in the state tree for a given mask.
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	2214	* If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2215	* has the bits set. Otherwise, 1 is returned if any bit in the
				2216	* range is found set.
				2217	*/
				2218	int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
Qu Wenruo	f97e27e	2020-11-13 20:51:40 +0800	[diff] [blame]	2219	u32 bits, int filled, struct extent_state *cached)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2220	{
				2221	struct extent_state *state = NULL;
				2222	struct rb_node *node;
				2223	int bitset = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2224
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2225	spin_lock(&tree->lock);
Filipe Manana	27a3507	2014-07-06 20:09:59 +0100	[diff] [blame]	2226	if (cached && extent_state_in_tree(cached) && cached->start <= start &&
Josef Bacik	df98b6e	2011-06-20 14:53:48 -0400	[diff] [blame]	2227	cached->end > start)
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	2228	node = &cached->rb_node;
				2229	else
				2230	node = tree_search(tree, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2231	while (node && start <= end) {
				2232	state = rb_entry(node, struct extent_state, rb_node);
				2233
				2234	if (filled && state->start > start) {
				2235	bitset = 0;
				2236	break;
				2237	}
				2238
				2239	if (state->start > end)
				2240	break;
				2241
				2242	if (state->state & bits) {
				2243	bitset = 1;
				2244	if (!filled)
				2245	break;
				2246	} else if (filled) {
				2247	bitset = 0;
				2248	break;
				2249	}
Chris Mason	46562ce	2009-09-23 20:23:16 -0400	[diff] [blame]	2250
				2251	if (state->end == (u64)-1)
				2252	break;
				2253
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2254	start = state->end + 1;
				2255	if (start > end)
				2256	break;
				2257	node = rb_next(node);
				2258	if (!node) {
				2259	if (filled)
				2260	bitset = 0;
				2261	break;
				2262	}
				2263	}
Chris Mason	cad321a	2008-12-17 14:51:42 -0500	[diff] [blame]	2264	spin_unlock(&tree->lock);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2265	return bitset;
				2266	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2267
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2268	int free_io_failure(struct extent_io_tree *failure_tree,
				2269	struct extent_io_tree *io_tree,
				2270	struct io_failure_record *rec)
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2271	{
				2272	int ret;
				2273	int err = 0;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2274
David Sterba	47dc196	2016-02-11 13:24:13 +0100	[diff] [blame]	2275	set_state_failrec(failure_tree, rec->start, NULL);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2276	ret = clear_extent_bits(failure_tree, rec->start,
				2277	rec->start + rec->len - 1,
David Sterba	9116621	2016-04-26 23:54:39 +0200	[diff] [blame]	2278	EXTENT_LOCKED \| EXTENT_DIRTY);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2279	if (ret)
				2280	err = ret;
				2281
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2282	ret = clear_extent_bits(io_tree, rec->start,
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2283	rec->start + rec->len - 1,
David Sterba	9116621	2016-04-26 23:54:39 +0200	[diff] [blame]	2284	EXTENT_DAMAGED);
David Woodhouse	53b381b	2013-01-29 18:40:14 -0500	[diff] [blame]	2285	if (ret && !err)
				2286	err = ret;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2287
				2288	kfree(rec);
				2289	return err;
				2290	}
				2291
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2292	/*
				2293	* this bypasses the standard btrfs submit functions deliberately, as
				2294	* the standard behavior is to write all copies in a raid setup. here we only
				2295	* want to write the one bad copy. so we do the mapping for ourselves and issue
				2296	* submit_bio directly.
Stefan Behrens	3ec706c	2012-11-05 15:46:42 +0100	[diff] [blame]	2297	* to avoid any synchronization issues, wait for the data after writing, which
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2298	* actually prevents the read that triggered the error from finishing.
				2299	* currently, there can be no more than two copies of every data bit. thus,
				2300	* exactly one rewrite is required.
				2301	*/
Qu Wenruo	38d5e54	2021-09-03 20:45:14 +0800	[diff] [blame]	2302	static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
				2303	u64 length, u64 logical, struct page *page,
				2304	unsigned int pg_offset, int mirror_num)
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2305	{
				2306	struct bio *bio;
				2307	struct btrfs_device *dev;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2308	u64 map_length = 0;
				2309	u64 sector;
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2310	struct btrfs_io_context *bioc = NULL;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2311	int ret;
				2312
Linus Torvalds	1751e8a	2017-11-27 13:05:09 -0800	[diff] [blame]	2313	ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2314	BUG_ON(!mirror_num);
				2315
Johannes Thumshirn	554aed7	2021-12-07 06:28:36 -0800	[diff] [blame]	2316	if (btrfs_repair_one_zone(fs_info, logical))
				2317	return 0;
Naohiro Aota	f7ef528	2021-02-04 19:22:16 +0900	[diff] [blame]	2318
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2319	bio = btrfs_bio_alloc(1);
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	2320	bio->bi_iter.bi_size = 0;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2321	map_length = length;
				2322
Filipe Manana	b5de8d0	2016-05-27 22:21:27 +0100	[diff] [blame]	2323	/*
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2324	* Avoid races with device replace and make sure our bioc has devices
Filipe Manana	b5de8d0	2016-05-27 22:21:27 +0100	[diff] [blame]	2325	* associated to its stripes that don't go away while we are doing the
				2326	* read repair operation.
				2327	*/
				2328	btrfs_bio_counter_inc_blocked(fs_info);
Nikolay Borisov	e4ff5fb	2017-07-19 10:48:42 +0300	[diff] [blame]	2329	if (btrfs_is_parity_mirror(fs_info, logical, length)) {
Liu Bo	c725328	2017-03-29 10:53:58 -0700	[diff] [blame]	2330	/*
				2331	* Note that we don't use BTRFS_MAP_WRITE because it's supposed
				2332	* to update all raid stripes, but here we just want to correct
				2333	* bad stripe, thus BTRFS_MAP_READ is abused to only get the bad
				2334	* stripe's dev and sector.
				2335	*/
				2336	ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2337	&map_length, &bioc, 0);
Liu Bo	c725328	2017-03-29 10:53:58 -0700	[diff] [blame]	2338	if (ret) {
				2339	btrfs_bio_counter_dec(fs_info);
				2340	bio_put(bio);
				2341	return -EIO;
				2342	}
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2343	ASSERT(bioc->mirror_num == 1);
Liu Bo	c725328	2017-03-29 10:53:58 -0700	[diff] [blame]	2344	} else {
				2345	ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2346	&map_length, &bioc, mirror_num);
Liu Bo	c725328	2017-03-29 10:53:58 -0700	[diff] [blame]	2347	if (ret) {
				2348	btrfs_bio_counter_dec(fs_info);
				2349	bio_put(bio);
				2350	return -EIO;
				2351	}
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2352	BUG_ON(mirror_num != bioc->mirror_num);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2353	}
Liu Bo	c725328	2017-03-29 10:53:58 -0700	[diff] [blame]	2354
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2355	sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
Kent Overstreet	4f024f3	2013-10-11 15:44:27 -0700	[diff] [blame]	2356	bio->bi_iter.bi_sector = sector;
Qu Wenruo	4c66461	2021-09-15 15:17:16 +0800	[diff] [blame]	2357	dev = bioc->stripes[bioc->mirror_num - 1].dev;
				2358	btrfs_put_bioc(bioc);
Anand Jain	ebbede4	2017-12-04 12:54:52 +0800	[diff] [blame]	2359	if (!dev \|\| !dev->bdev \|\|
				2360	!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
Filipe Manana	b5de8d0	2016-05-27 22:21:27 +0100	[diff] [blame]	2361	btrfs_bio_counter_dec(fs_info);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2362	bio_put(bio);
				2363	return -EIO;
				2364	}
Christoph Hellwig	74d4699	2017-08-23 19:10:32 +0200	[diff] [blame]	2365	bio_set_dev(bio, dev->bdev);
Christoph Hellwig	70fd761	2016-11-01 07:40:10 -0600	[diff] [blame]	2366	bio->bi_opf = REQ_OP_WRITE \| REQ_SYNC;
Miao Xie	ffdd201	2014-09-12 18:44:00 +0800	[diff] [blame]	2367	bio_add_page(bio, page, length, pg_offset);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2368
Mike Christie	4e49ea4	2016-06-05 14:31:41 -0500	[diff] [blame]	2369	if (btrfsic_submit_bio_wait(bio)) {
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2370	/* try to remap that extent elsewhere? */
Filipe Manana	b5de8d0	2016-05-27 22:21:27 +0100	[diff] [blame]	2371	btrfs_bio_counter_dec(fs_info);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2372	bio_put(bio);
Stefan Behrens	442a4f6	2012-05-25 16:06:08 +0200	[diff] [blame]	2373	btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2374	return -EIO;
				2375	}
				2376
David Sterba	b14af3b	2015-10-08 10:43:10 +0200	[diff] [blame]	2377	btrfs_info_rl_in_rcu(fs_info,
				2378	"read error corrected: ino %llu off %llu (dev %s sector %llu)",
Josef Bacik	6ec656b	2017-05-05 11:57:14 -0400	[diff] [blame]	2379	ino, start,
Miao Xie	1203b68	2014-09-12 18:44:01 +0800	[diff] [blame]	2380	rcu_str_deref(dev->name), sector);
Filipe Manana	b5de8d0	2016-05-27 22:21:27 +0100	[diff] [blame]	2381	btrfs_bio_counter_dec(fs_info);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2382	bio_put(bio);
				2383	return 0;
				2384	}
				2385
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	2386	int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2387	{
David Sterba	20a1fbf9	2019-03-20 11:23:44 +0100	[diff] [blame]	2388	struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2389	u64 start = eb->start;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	2390	int i, num_pages = num_extent_pages(eb);
Chris Mason	d95603b	2012-04-12 15:55:15 -0400	[diff] [blame]	2391	int ret = 0;
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2392
David Howells	bc98a42	2017-07-17 08:45:34 +0100	[diff] [blame]	2393	if (sb_rdonly(fs_info->sb))
Ilya Dryomov	908960c	2013-11-03 19:06:39 +0200	[diff] [blame]	2394	return -EROFS;
				2395
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2396	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	2397	struct page *p = eb->pages[i];
Miao Xie	1203b68	2014-09-12 18:44:01 +0800	[diff] [blame]	2398
Josef Bacik	6ec656b	2017-05-05 11:57:14 -0400	[diff] [blame]	2399	ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p,
Miao Xie	1203b68	2014-09-12 18:44:01 +0800	[diff] [blame]	2400	start - page_offset(p), mirror_num);
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2401	if (ret)
				2402	break;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	2403	start += PAGE_SIZE;
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	2404	}
				2405
				2406	return ret;
				2407	}
				2408
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2409	/*
				2410	* each time an IO finishes, we do a fast check in the IO failure tree
				2411	* to see if we need to process or clean up an io_failure_record
				2412	*/
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2413	int clean_io_failure(struct btrfs_fs_info *fs_info,
				2414	struct extent_io_tree *failure_tree,
				2415	struct extent_io_tree *io_tree, u64 start,
				2416	struct page *page, u64 ino, unsigned int pg_offset)
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2417	{
				2418	u64 private;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2419	struct io_failure_record *failrec;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2420	struct extent_state *state;
				2421	int num_copies;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2422	int ret;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2423
				2424	private = 0;
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2425	ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
				2426	EXTENT_DIRTY, 0);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2427	if (!ret)
				2428	return 0;
				2429
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2430	failrec = get_state_failrec(failure_tree, start);
				2431	if (IS_ERR(failrec))
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2432	return 0;
				2433
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2434	BUG_ON(!failrec->this_mirror);
				2435
David Howells	bc98a42	2017-07-17 08:45:34 +0100	[diff] [blame]	2436	if (sb_rdonly(fs_info->sb))
Ilya Dryomov	908960c	2013-11-03 19:06:39 +0200	[diff] [blame]	2437	goto out;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2438
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2439	spin_lock(&io_tree->lock);
				2440	state = find_first_extent_bit_state(io_tree,
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2441	failrec->start,
				2442	EXTENT_LOCKED);
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2443	spin_unlock(&io_tree->lock);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2444
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	2445	if (state && state->start <= failrec->start &&
				2446	state->end >= failrec->start + failrec->len - 1) {
Stefan Behrens	3ec706c	2012-11-05 15:46:42 +0100	[diff] [blame]	2447	num_copies = btrfs_num_copies(fs_info, failrec->logical,
				2448	failrec->len);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2449	if (num_copies > 1) {
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2450	repair_io_failure(fs_info, ino, start, failrec->len,
				2451	failrec->logical, page, pg_offset,
				2452	failrec->failed_mirror);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2453	}
				2454	}
				2455
				2456	out:
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2457	free_io_failure(failure_tree, io_tree, failrec);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2458
Miao Xie	454ff3d	2014-09-12 18:43:58 +0800	[diff] [blame]	2459	return 0;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2460	}
				2461
Miao Xie	f612496	2014-09-12 18:44:04 +0800	[diff] [blame]	2462	/*
				2463	* Can be called when
				2464	* - hold extent lock
				2465	* - under ordered extent
				2466	* - the inode is freeing
				2467	*/
Nikolay Borisov	7ab7956	2017-02-20 13:50:57 +0200	[diff] [blame]	2468	void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
Miao Xie	f612496	2014-09-12 18:44:04 +0800	[diff] [blame]	2469	{
Nikolay Borisov	7ab7956	2017-02-20 13:50:57 +0200	[diff] [blame]	2470	struct extent_io_tree *failure_tree = &inode->io_failure_tree;
Miao Xie	f612496	2014-09-12 18:44:04 +0800	[diff] [blame]	2471	struct io_failure_record *failrec;
				2472	struct extent_state state, next;
				2473
				2474	if (RB_EMPTY_ROOT(&failure_tree->state))
				2475	return;
				2476
				2477	spin_lock(&failure_tree->lock);
				2478	state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
				2479	while (state) {
				2480	if (state->start > end)
				2481	break;
				2482
				2483	ASSERT(state->end <= end);
				2484
				2485	next = next_state(state);
				2486
David Sterba	47dc196	2016-02-11 13:24:13 +0100	[diff] [blame]	2487	failrec = state->failrec;
Miao Xie	f612496	2014-09-12 18:44:04 +0800	[diff] [blame]	2488	free_extent_state(state);
				2489	kfree(failrec);
				2490
				2491	state = next;
				2492	}
				2493	spin_unlock(&failure_tree->lock);
				2494	}
				2495
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2496	static struct io_failure_record btrfs_get_io_failure_record(struct inode inode,
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2497	u64 start)
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2498	{
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2499	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2500	struct io_failure_record *failrec;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2501	struct extent_map *em;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2502	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
				2503	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
				2504	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2505	const u32 sectorsize = fs_info->sectorsize;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2506	int ret;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2507	u64 logical;
				2508
Nikolay Borisov	2279a27	2020-07-02 15:23:28 +0300	[diff] [blame]	2509	failrec = get_state_failrec(failure_tree, start);
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2510	if (!IS_ERR(failrec)) {
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2511	btrfs_debug(fs_info,
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2512	"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
				2513	failrec->logical, failrec->start, failrec->len);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2514	/*
				2515	* when data can be on disk more than twice, add to failrec here
				2516	* (e.g. with a list for failed_mirror) to make
				2517	* clean_io_failure() clean all those errors at once.
				2518	*/
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2519
				2520	return failrec;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2521	}
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2522
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2523	failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
				2524	if (!failrec)
				2525	return ERR_PTR(-ENOMEM);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2526
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2527	failrec->start = start;
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2528	failrec->len = sectorsize;
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2529	failrec->this_mirror = 0;
				2530	failrec->bio_flags = 0;
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2531
				2532	read_lock(&em_tree->lock);
				2533	em = lookup_extent_mapping(em_tree, start, failrec->len);
				2534	if (!em) {
				2535	read_unlock(&em_tree->lock);
				2536	kfree(failrec);
				2537	return ERR_PTR(-EIO);
				2538	}
				2539
				2540	if (em->start > start \|\| em->start + em->len <= start) {
				2541	free_extent_map(em);
				2542	em = NULL;
				2543	}
				2544	read_unlock(&em_tree->lock);
				2545	if (!em) {
				2546	kfree(failrec);
				2547	return ERR_PTR(-EIO);
				2548	}
				2549
				2550	logical = start - em->start;
				2551	logical = em->block_start + logical;
				2552	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
				2553	logical = em->block_start;
				2554	failrec->bio_flags = EXTENT_BIO_COMPRESSED;
				2555	extent_set_compress_type(&failrec->bio_flags, em->compress_type);
				2556	}
				2557
				2558	btrfs_debug(fs_info,
				2559	"Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu",
				2560	logical, start, failrec->len);
				2561
				2562	failrec->logical = logical;
				2563	free_extent_map(em);
				2564
				2565	/* Set the bits in the private failure tree */
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2566	ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2567	EXTENT_LOCKED \| EXTENT_DIRTY);
				2568	if (ret >= 0) {
				2569	ret = set_state_failrec(failure_tree, start, failrec);
				2570	/* Set the bits in the inode's tree */
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2571	ret = set_extent_bits(tree, start, start + sectorsize - 1,
				2572	EXTENT_DAMAGED);
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2573	} else if (ret < 0) {
				2574	kfree(failrec);
				2575	return ERR_PTR(ret);
				2576	}
				2577
				2578	return failrec;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2579	}
				2580
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2581	static bool btrfs_check_repairable(struct inode *inode,
Omar Sandoval	ce06d3e	2020-04-16 14:46:18 -0700	[diff] [blame]	2582	struct io_failure_record *failrec,
				2583	int failed_mirror)
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2584	{
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2585	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2586	int num_copies;
				2587
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2588	num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2589	if (num_copies == 1) {
				2590	/*
				2591	* we only have a single copy of the data, so don't bother with
				2592	* all the retry and error correction code that follows. no
				2593	* matter what the error is, it is very likely to persist.
				2594	*/
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2595	btrfs_debug(fs_info,
				2596	"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
				2597	num_copies, failrec->this_mirror, failed_mirror);
Liu Bo	c3cfb65	2017-07-13 15:00:50 -0700	[diff] [blame]	2598	return false;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2599	}
				2600
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2601	/* The failure record should only contain one sector */
				2602	ASSERT(failrec->len == fs_info->sectorsize);
				2603
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2604	/*
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2605	* There are two premises:
				2606	* a) deliver good data to the caller
				2607	* b) correct the bad sectors on disk
				2608	*
				2609	* Since we're only doing repair for one sector, we only need to get
				2610	* a good copy of the failed sector and if we succeed, we have setup
				2611	* everything for repair_io_failure to do the rest for us.
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2612	*/
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2613	failrec->failed_mirror = failed_mirror;
				2614	failrec->this_mirror++;
				2615	if (failrec->this_mirror == failed_mirror)
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2616	failrec->this_mirror++;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2617
Miao Xie	facc8a22	2013-07-25 19:22:34 +0800	[diff] [blame]	2618	if (failrec->this_mirror > num_copies) {
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2619	btrfs_debug(fs_info,
				2620	"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
				2621	num_copies, failrec->this_mirror, failed_mirror);
Liu Bo	c3cfb65	2017-07-13 15:00:50 -0700	[diff] [blame]	2622	return false;
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2623	}
				2624
Liu Bo	c3cfb65	2017-07-13 15:00:50 -0700	[diff] [blame]	2625	return true;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2626	}
				2627
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2628	int btrfs_repair_one_sector(struct inode *inode,
				2629	struct bio *failed_bio, u32 bio_offset,
				2630	struct page *page, unsigned int pgoff,
				2631	u64 start, int failed_mirror,
				2632	submit_bio_hook_t *submit_bio_hook)
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2633	{
				2634	struct io_failure_record *failrec;
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2635	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2636	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2637	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2638	struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
Qu Wenruo	7ffd27e	2020-12-02 14:47:58 +0800	[diff] [blame]	2639	const int icsum = bio_offset >> fs_info->sectorsize_bits;
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2640	struct bio *repair_bio;
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2641	struct btrfs_bio *repair_bbio;
Christoph Hellwig	4e4cbee	2017-06-03 09:38:06 +0200	[diff] [blame]	2642	blk_status_t status;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2643
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2644	btrfs_debug(fs_info,
				2645	"repair read error: read error at %llu", start);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2646
Mike Christie	1f7ad75	2016-06-05 14:31:51 -0500	[diff] [blame]	2647	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2648
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2649	failrec = btrfs_get_io_failure_record(inode, start);
Nikolay Borisov	3526302	2020-07-02 15:23:29 +0300	[diff] [blame]	2650	if (IS_ERR(failrec))
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2651	return PTR_ERR(failrec);
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2652
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2653
				2654	if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2655	free_io_failure(failure_tree, tree, failrec);
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2656	return -EIO;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2657	}
				2658
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2659	repair_bio = btrfs_bio_alloc(1);
				2660	repair_bbio = btrfs_bio(repair_bio);
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2661	repair_bio->bi_opf = REQ_OP_READ;
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2662	repair_bio->bi_end_io = failed_bio->bi_end_io;
				2663	repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
				2664	repair_bio->bi_private = failed_bio->bi_private;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2665
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2666	if (failed_bbio->csum) {
David Sterba	223486c	2020-07-02 11:27:30 +0200	[diff] [blame]	2667	const u32 csum_size = fs_info->csum_size;
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2668
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2669	repair_bbio->csum = repair_bbio->csum_inline;
				2670	memcpy(repair_bbio->csum,
				2671	failed_bbio->csum + csum_size * icsum, csum_size);
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2672	}
				2673
				2674	bio_add_page(repair_bio, page, failrec->len, pgoff);
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2675	repair_bbio->iter = repair_bio->bi_iter;
Miao Xie	2fe6303	2014-09-12 18:43:59 +0800	[diff] [blame]	2676
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	2677	btrfs_debug(btrfs_sb(inode->i_sb),
Qu Wenruo	1245835	2021-05-03 10:08:56 +0800	[diff] [blame]	2678	"repair read error: submitting new read to mirror %d",
				2679	failrec->this_mirror);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2680
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2681	status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
				2682	failrec->bio_flags);
Christoph Hellwig	4e4cbee	2017-06-03 09:38:06 +0200	[diff] [blame]	2683	if (status) {
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2684	free_io_failure(failure_tree, tree, failrec);
Omar Sandoval	77d5d68	2020-04-16 14:46:25 -0700	[diff] [blame]	2685	bio_put(repair_bio);
Miao Xie	6c387ab	2014-09-12 18:43:57 +0800	[diff] [blame]	2686	}
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2687	return blk_status_to_errno(status);
				2688	}
				2689
				2690	static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
				2691	{
				2692	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
				2693
				2694	ASSERT(page_offset(page) <= start &&
				2695	start + len <= page_offset(page) + PAGE_SIZE);
				2696
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2697	if (uptodate) {
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	2698	if (fsverity_active(page->mapping->host) &&
				2699	!PageError(page) &&
				2700	!PageUptodate(page) &&
				2701	start < i_size_read(page->mapping->host) &&
				2702	!fsverity_verify_page(page)) {
				2703	btrfs_page_set_error(fs_info, page, start, len);
				2704	} else {
				2705	btrfs_page_set_uptodate(fs_info, page, start, len);
				2706	}
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2707	} else {
				2708	btrfs_page_clear_uptodate(fs_info, page, start, len);
				2709	btrfs_page_set_error(fs_info, page, start, len);
				2710	}
				2711
				2712	if (fs_info->sectorsize == PAGE_SIZE)
				2713	unlock_page(page);
Qu Wenruo	3d078ef	2021-06-07 17:02:58 +0800	[diff] [blame]	2714	else
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	2715	btrfs_subpage_end_reader(fs_info, page, start, len);
				2716	}
				2717
				2718	static blk_status_t submit_read_repair(struct inode *inode,
				2719	struct bio *failed_bio, u32 bio_offset,
				2720	struct page *page, unsigned int pgoff,
				2721	u64 start, u64 end, int failed_mirror,
				2722	unsigned int error_bitmap,
				2723	submit_bio_hook_t *submit_bio_hook)
				2724	{
				2725	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				2726	const u32 sectorsize = fs_info->sectorsize;
				2727	const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
				2728	int error = 0;
				2729	int i;
				2730
				2731	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
				2732
				2733	/* We're here because we had some read errors or csum mismatch */
				2734	ASSERT(error_bitmap);
				2735
				2736	/*
				2737	* We only get called on buffered IO, thus page must be mapped and bio
				2738	* must not be cloned.
				2739	*/
				2740	ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
				2741
				2742	/* Iterate through all the sectors in the range */
				2743	for (i = 0; i < nr_bits; i++) {
				2744	const unsigned int offset = i * sectorsize;
				2745	struct extent_state *cached = NULL;
				2746	bool uptodate = false;
				2747	int ret;
				2748
				2749	if (!(error_bitmap & (1U << i))) {
				2750	/*
				2751	* This sector has no error, just end the page read
				2752	* and unlock the range.
				2753	*/
				2754	uptodate = true;
				2755	goto next;
				2756	}
				2757
				2758	ret = btrfs_repair_one_sector(inode, failed_bio,
				2759	bio_offset + offset,
				2760	page, pgoff + offset, start + offset,
				2761	failed_mirror, submit_bio_hook);
				2762	if (!ret) {
				2763	/*
				2764	* We have submitted the read repair, the page release
				2765	* will be handled by the endio function of the
				2766	* submitted repair bio.
				2767	* Thus we don't need to do any thing here.
				2768	*/
				2769	continue;
				2770	}
				2771	/*
				2772	* Repair failed, just record the error but still continue.
				2773	* Or the remaining sectors will not be properly unlocked.
				2774	*/
				2775	if (!error)
				2776	error = ret;
				2777	next:
				2778	end_page_read(page, uptodate, start + offset, sectorsize);
				2779	if (uptodate)
				2780	set_extent_uptodate(&BTRFS_I(inode)->io_tree,
				2781	start + offset,
				2782	start + offset + sectorsize - 1,
				2783	&cached, GFP_ATOMIC);
				2784	unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
				2785	start + offset,
				2786	start + offset + sectorsize - 1,
				2787	&cached);
				2788	}
				2789	return errno_to_blk_status(error);
Jan Schmidt	4a54c8c	2011-07-22 15:41:52 +0200	[diff] [blame]	2790	}
				2791
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2792	/* lots and lots of room for performance fixes in the end_bio funcs */
				2793
David Sterba	b5227c0	2015-12-03 13:08:59 +0100	[diff] [blame]	2794	void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2795	{
Qu Wenruo	38a39ac7	2021-04-08 20:32:27 +0800	[diff] [blame]	2796	struct btrfs_inode *inode;
David Sterba	25c1252	2021-07-26 14:15:08 +0200	[diff] [blame]	2797	const bool uptodate = (err == 0);
Eric Sandeen	3e2426b	2014-06-12 00:39:58 -0500	[diff] [blame]	2798	int ret = 0;
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2799
Qu Wenruo	38a39ac7	2021-04-08 20:32:27 +0800	[diff] [blame]	2800	ASSERT(page && page->mapping);
				2801	inode = BTRFS_I(page->mapping->host);
				2802	btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2803
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2804	if (!uptodate) {
Qu Wenruo	963e4db	2021-07-26 14:35:07 +0800	[diff] [blame]	2805	const struct btrfs_fs_info *fs_info = inode->root->fs_info;
				2806	u32 len;
				2807
				2808	ASSERT(end + 1 - start <= U32_MAX);
				2809	len = end + 1 - start;
				2810
				2811	btrfs_page_clear_uptodate(fs_info, page, start, len);
				2812	btrfs_page_set_error(fs_info, page, start, len);
Colin Ian King	bff5baf	2017-05-09 18:14:01 +0100	[diff] [blame]	2813	ret = err < 0 ? err : -EIO;
Liu Bo	5dca6ee	2014-05-12 12:47:36 +0800	[diff] [blame]	2814	mapping_set_error(page->mapping, ret);
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2815	}
Jeff Mahoney	87826df	2012-02-15 16:23:57 +0100	[diff] [blame]	2816	}
				2817
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2818	/*
				2819	* after a writepage IO is done, we need to:
				2820	* clear the uptodate bits on error
				2821	* clear the writeback bits in the extent tree for this IO
				2822	* end_page_writeback if the page has no more pending IO
				2823	*
				2824	* Scheduling is not allowed, so the extent state tree is expected
				2825	* to have one and only one object corresponding to this IO.
				2826	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2827	static void end_bio_extent_writepage(struct bio *bio)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2828	{
Christoph Hellwig	4e4cbee	2017-06-03 09:38:06 +0200	[diff] [blame]	2829	int error = blk_status_to_errno(bio->bi_status);
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	2830	struct bio_vec *bvec;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2831	u64 start;
				2832	u64 end;
Ming Lei	6dc4f10	2019-02-15 19:13:19 +0800	[diff] [blame]	2833	struct bvec_iter_all iter_all;
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	2834	bool first_bvec = true;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2835
David Sterba	c09abff	2017-07-13 18:10:07 +0200	[diff] [blame]	2836	ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig	2b070cf	2019-04-25 09:03:00 +0200	[diff] [blame]	2837	bio_for_each_segment_all(bvec, bio, iter_all) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2838	struct page *page = bvec->bv_page;
Jeff Mahoney	0b246af	2016-06-22 18:54:23 -0400	[diff] [blame]	2839	struct inode *inode = page->mapping->host;
				2840	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Qu Wenruo	321a02d	2021-05-31 16:50:40 +0800	[diff] [blame]	2841	const u32 sectorsize = fs_info->sectorsize;
David Woodhouse	902b22f	2008-08-20 08:51:49 -0400	[diff] [blame]	2842
Qu Wenruo	321a02d	2021-05-31 16:50:40 +0800	[diff] [blame]	2843	/* Our read/write should always be sector aligned. */
				2844	if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
				2845	btrfs_err(fs_info,
				2846	"partial page write in btrfs with offset %u and length %u",
				2847	bvec->bv_offset, bvec->bv_len);
				2848	else if (!IS_ALIGNED(bvec->bv_len, sectorsize))
				2849	btrfs_info(fs_info,
				2850	"incomplete page write with offset %u and length %u",
				2851	bvec->bv_offset, bvec->bv_len);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2852
Qu Wenruo	321a02d	2021-05-31 16:50:40 +0800	[diff] [blame]	2853	start = page_offset(page) + bvec->bv_offset;
				2854	end = start + bvec->bv_len - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2855
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	2856	if (first_bvec) {
				2857	btrfs_record_physical_zoned(inode, start, bio);
				2858	first_bvec = false;
				2859	}
				2860
Christoph Hellwig	4e4cbee	2017-06-03 09:38:06 +0200	[diff] [blame]	2861	end_extent_writepage(page, error, start, end);
Qu Wenruo	9047e31	2021-05-31 16:50:43 +0800	[diff] [blame]	2862
				2863	btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	2864	}
Chris Mason	2b1f55b	2008-09-24 11:48:04 -0400	[diff] [blame]	2865
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2866	bio_put(bio);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2867	}
				2868
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	2869	/*
				2870	* Record previously processed extent range
				2871	*
				2872	* For endio_readpage_release_extent() to handle a full extent range, reducing
				2873	* the extent io operations.
				2874	*/
				2875	struct processed_extent {
				2876	struct btrfs_inode *inode;
				2877	/* Start of the range in @inode */
				2878	u64 start;
Nigel Christian	2e626e5	2021-01-24 20:41:41 -0500	[diff] [blame]	2879	/* End of the range in @inode */
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	2880	u64 end;
				2881	bool uptodate;
				2882	};
				2883
				2884	/*
				2885	* Try to release processed extent range
				2886	*
				2887	* May not release the extent range right now if the current range is
				2888	* contiguous to processed extent.
				2889	*
				2890	* Will release processed extent when any of @inode, @uptodate, the range is
				2891	* no longer contiguous to the processed range.
				2892	*
				2893	* Passing @inode == NULL will force processed extent to be released.
				2894	*/
				2895	static void endio_readpage_release_extent(struct processed_extent *processed,
				2896	struct btrfs_inode *inode, u64 start, u64 end,
				2897	bool uptodate)
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	2898	{
				2899	struct extent_state *cached = NULL;
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	2900	struct extent_io_tree *tree;
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	2901
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	2902	/* The first extent, initialize @processed */
				2903	if (!processed->inode)
				2904	goto update;
				2905
				2906	/*
				2907	* Contiguous to processed extent, just uptodate the end.
				2908	*
				2909	* Several things to notice:
				2910	*
				2911	* - bio can be merged as long as on-disk bytenr is contiguous
				2912	* This means we can have page belonging to other inodes, thus need to
				2913	* check if the inode still matches.
				2914	* - bvec can contain range beyond current page for multi-page bvec
				2915	* Thus we need to do processed->end + 1 >= start check
				2916	*/
				2917	if (processed->inode == inode && processed->uptodate == uptodate &&
				2918	processed->end + 1 >= start && end >= processed->end) {
				2919	processed->end = end;
				2920	return;
				2921	}
				2922
				2923	tree = &processed->inode->io_tree;
				2924	/*
				2925	* Now we don't have range contiguous to the processed range, release
				2926	* the processed range now.
				2927	*/
				2928	if (processed->uptodate && tree->track_uptodate)
				2929	set_extent_uptodate(tree, processed->start, processed->end,
				2930	&cached, GFP_ATOMIC);
				2931	unlock_extent_cached_atomic(tree, processed->start, processed->end,
				2932	&cached);
				2933
				2934	update:
				2935	/* Update processed to current range */
				2936	processed->inode = inode;
				2937	processed->start = start;
				2938	processed->end = end;
				2939	processed->uptodate = uptodate;
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	2940	}
				2941
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	2942	static void begin_page_read(struct btrfs_fs_info fs_info, struct page page)
				2943	{
				2944	ASSERT(PageLocked(page));
				2945	if (fs_info->sectorsize == PAGE_SIZE)
				2946	return;
				2947
				2948	ASSERT(PagePrivate(page));
				2949	btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
				2950	}
				2951
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2952	/*
Qu Wenruo	d9bb77d	2021-03-15 13:39:14 +0800	[diff] [blame]	2953	* Find extent buffer for a givne bytenr.
				2954	*
				2955	* This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
				2956	* in endio context.
				2957	*/
				2958	static struct extent_buffer *find_extent_buffer_readpage(
				2959	struct btrfs_fs_info fs_info, struct page page, u64 bytenr)
				2960	{
				2961	struct extent_buffer *eb;
				2962
				2963	/*
				2964	* For regular sectorsize, we can use page->private to grab extent
				2965	* buffer
				2966	*/
				2967	if (fs_info->sectorsize == PAGE_SIZE) {
				2968	ASSERT(PagePrivate(page) && page->private);
				2969	return (struct extent_buffer *)page->private;
				2970	}
				2971
				2972	/* For subpage case, we need to lookup buffer radix tree */
				2973	rcu_read_lock();
				2974	eb = radix_tree_lookup(&fs_info->buffer_radix,
				2975	bytenr >> fs_info->sectorsize_bits);
				2976	rcu_read_unlock();
				2977	ASSERT(eb);
				2978	return eb;
				2979	}
				2980
				2981	/*
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2982	* after a readpage IO is done, we need to:
				2983	* clear the uptodate bits on error
				2984	* set the uptodate bits if things worked
				2985	* set the page up to date if all extents in the tree are uptodate
				2986	* clear the lock bit in the extent tree
				2987	* unlock the page if there are no other extents locked for it
				2988	*
				2989	* Scheduling is not allowed, so the extent state tree is expected
				2990	* to have one and only one object corresponding to this IO.
				2991	*/
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	2992	static void end_bio_extent_readpage(struct bio *bio)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	2993	{
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	2994	struct bio_vec *bvec;
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	2995	struct btrfs_bio *bbio = btrfs_bio(bio);
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	2996	struct extent_io_tree tree, failure_tree;
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	2997	struct processed_extent processed = { 0 };
Qu Wenruo	7ffd27e	2020-12-02 14:47:58 +0800	[diff] [blame]	2998	/*
				2999	* The offset to the beginning of a bio, since one bio can never be
				3000	* larger than UINT_MAX, u32 here is enough.
				3001	*/
				3002	u32 bio_offset = 0;
Josef Bacik	5cf1ab5	2012-04-16 09:42:26 -0400	[diff] [blame]	3003	int mirror;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3004	int ret;
Ming Lei	6dc4f10	2019-02-15 19:13:19 +0800	[diff] [blame]	3005	struct bvec_iter_all iter_all;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3006
David Sterba	c09abff	2017-07-13 18:10:07 +0200	[diff] [blame]	3007	ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig	2b070cf	2019-04-25 09:03:00 +0200	[diff] [blame]	3008	bio_for_each_segment_all(bvec, bio, iter_all) {
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3009	bool uptodate = !bio->bi_status;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3010	struct page *page = bvec->bv_page;
Josef Bacik	a71754f	2013-06-17 17:14:39 -0400	[diff] [blame]	3011	struct inode *inode = page->mapping->host;
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	3012	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Qu Wenruo	7ffd27e	2020-12-02 14:47:58 +0800	[diff] [blame]	3013	const u32 sectorsize = fs_info->sectorsize;
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3014	unsigned int error_bitmap = (unsigned int)-1;
Qu Wenruo	7ffd27e	2020-12-02 14:47:58 +0800	[diff] [blame]	3015	u64 start;
				3016	u64 end;
				3017	u32 len;
Arne Jansen	507903b	2011-04-06 10:02:20 +0000	[diff] [blame]	3018
Jeff Mahoney	ab8d0fc	2016-09-20 10:05:02 -0400	[diff] [blame]	3019	btrfs_debug(fs_info,
				3020	"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
David Sterba	1201b58	2020-11-26 15:41:27 +0100	[diff] [blame]	3021	bio->bi_iter.bi_sector, bio->bi_status,
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3022	bbio->mirror_num);
Josef Bacik	a71754f	2013-06-17 17:14:39 -0400	[diff] [blame]	3023	tree = &BTRFS_I(inode)->io_tree;
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	3024	failure_tree = &BTRFS_I(inode)->io_failure_tree;
David Woodhouse	902b22f	2008-08-20 08:51:49 -0400	[diff] [blame]	3025
Qu Wenruo	8b8bbd4	2020-10-21 14:24:58 +0800	[diff] [blame]	3026	/*
				3027	* We always issue full-sector reads, but if some block in a
				3028	* page fails to read, blk_update_request() will advance
				3029	* bv_offset and adjust bv_len to compensate. Print a warning
				3030	* for unaligned offsets, and an error if they don't add up to
				3031	* a full sector.
				3032	*/
				3033	if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
				3034	btrfs_err(fs_info,
				3035	"partial page read in btrfs with offset %u and length %u",
				3036	bvec->bv_offset, bvec->bv_len);
				3037	else if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len,
				3038	sectorsize))
				3039	btrfs_info(fs_info,
				3040	"incomplete page read with offset %u and length %u",
				3041	bvec->bv_offset, bvec->bv_len);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3042
Qu Wenruo	8b8bbd4	2020-10-21 14:24:58 +0800	[diff] [blame]	3043	start = page_offset(page) + bvec->bv_offset;
				3044	end = start + bvec->bv_len - 1;
Miao Xie	facc8a22	2013-07-25 19:22:34 +0800	[diff] [blame]	3045	len = bvec->bv_len;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3046
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3047	mirror = bbio->mirror_num;
Nikolay Borisov	78e62c0	2018-11-22 10:17:49 +0200	[diff] [blame]	3048	if (likely(uptodate)) {
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3049	if (is_data_inode(inode)) {
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3050	error_bitmap = btrfs_verify_data_csum(bbio,
Goldwyn Rodrigues	5e29576	2021-03-03 06:55:37 -0600	[diff] [blame]	3051	bio_offset, page, start, end);
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3052	ret = error_bitmap;
				3053	} else {
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3054	ret = btrfs_validate_metadata_buffer(bbio,
Qu Wenruo	8e1dc98	2020-11-12 16:47:57 +0800	[diff] [blame]	3055	page, start, end, mirror);
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3056	}
Stefan Behrens	5ee0844	2012-08-27 08:30:03 -0600	[diff] [blame]	3057	if (ret)
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3058	uptodate = false;
Stefan Behrens	5ee0844	2012-08-27 08:30:03 -0600	[diff] [blame]	3059	else
Josef Bacik	7870d08	2017-05-05 11:57:15 -0400	[diff] [blame]	3060	clean_io_failure(BTRFS_I(inode)->root->fs_info,
				3061	failure_tree, tree, start,
				3062	page,
				3063	btrfs_ino(BTRFS_I(inode)), 0);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3064	}
Josef Bacik	ea46679	2012-03-26 21:57:36 -0400	[diff] [blame]	3065
Miao Xie	f2a09da	2013-07-25 19:22:33 +0800	[diff] [blame]	3066	if (likely(uptodate))
				3067	goto readpage_ok;
				3068
Nikolay Borisov	be17b3a	2020-09-18 16:34:36 +0300	[diff] [blame]	3069	if (is_data_inode(inode)) {
Liu Bo	9d0d1c8	2017-03-24 15:04:50 -0700	[diff] [blame]	3070	/*
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3071	* btrfs_submit_read_repair() will handle all the good
				3072	* and bad sectors, we just continue to the next bvec.
Liu Bo	9d0d1c8	2017-03-24 15:04:50 -0700	[diff] [blame]	3073	*/
Qu Wenruo	150e4b0	2021-05-03 10:08:55 +0800	[diff] [blame]	3074	submit_read_repair(inode, bio, bio_offset, page,
				3075	start - page_offset(page), start,
				3076	end, mirror, error_bitmap,
				3077	btrfs_submit_data_bio);
				3078
				3079	ASSERT(bio_offset + len > bio_offset);
				3080	bio_offset += len;
				3081	continue;
Nikolay Borisov	78e62c0	2018-11-22 10:17:49 +0200	[diff] [blame]	3082	} else {
				3083	struct extent_buffer *eb;
				3084
Qu Wenruo	d9bb77d	2021-03-15 13:39:14 +0800	[diff] [blame]	3085	eb = find_extent_buffer_readpage(fs_info, page, start);
Nikolay Borisov	78e62c0	2018-11-22 10:17:49 +0200	[diff] [blame]	3086	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
				3087	eb->read_mirror = mirror;
				3088	atomic_dec(&eb->io_pages);
Chris Mason	7e38326	2008-04-09 16:28:12 -0400	[diff] [blame]	3089	}
Miao Xie	f2a09da	2013-07-25 19:22:33 +0800	[diff] [blame]	3090	readpage_ok:
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	3091	if (likely(uptodate)) {
Josef Bacik	a71754f	2013-06-17 17:14:39 -0400	[diff] [blame]	3092	loff_t i_size = i_size_read(inode);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	3093	pgoff_t end_index = i_size >> PAGE_SHIFT;
Josef Bacik	a71754f	2013-06-17 17:14:39 -0400	[diff] [blame]	3094
Qu Wenruo	c28ea61	2021-03-01 16:44:22 +0800	[diff] [blame]	3095	/*
				3096	* Zero out the remaining part if this range straddles
				3097	* i_size.
				3098	*
				3099	* Here we should only zero the range inside the bvec,
				3100	* not touch anything else.
				3101	*
				3102	* NOTE: i_size is exclusive while end is inclusive.
				3103	*/
				3104	if (page->index == end_index && i_size <= end) {
				3105	u32 zero_start = max(offset_in_page(i_size),
Qu Wenruo	d2dcc8e	2021-03-08 17:20:17 +0800	[diff] [blame]	3106	offset_in_page(start));
Qu Wenruo	c28ea61	2021-03-01 16:44:22 +0800	[diff] [blame]	3107
				3108	zero_user_segment(page, zero_start,
				3109	offset_in_page(end) + 1);
				3110	}
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	3111	}
Qu Wenruo	7ffd27e	2020-12-02 14:47:58 +0800	[diff] [blame]	3112	ASSERT(bio_offset + len > bio_offset);
				3113	bio_offset += len;
Miao Xie	883d0de	2013-07-25 19:22:35 +0800	[diff] [blame]	3114
Qu Wenruo	e09caaf	2020-11-13 20:51:29 +0800	[diff] [blame]	3115	/* Update page status and unlock */
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3116	end_page_read(page, uptodate, start, len);
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	3117	endio_readpage_release_extent(&processed, BTRFS_I(inode),
Boris Burkov	1460540	2021-06-30 13:01:49 -0700	[diff] [blame]	3118	start, end, PageUptodate(page));
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	3119	}
Qu Wenruo	94e8c95	2020-11-13 20:51:28 +0800	[diff] [blame]	3120	/* Release the last extent */
				3121	endio_readpage_release_extent(&processed, NULL, 0, 0, false);
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3122	btrfs_bio_free_csum(bbio);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3123	bio_put(bio);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3124	}
				3125
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3126	/*
David Sterba	184f999	2017-06-12 17:29:39 +0200	[diff] [blame]	3127	* Initialize the members up to but not including 'bio'. Use after allocating a
				3128	* new bio by bio_alloc_bioset as it does not initialize the bytes outside of
				3129	* 'bio' because use of __GFP_ZERO is not supported.
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3130	*/
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3131	static inline void btrfs_bio_init(struct btrfs_bio *bbio)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3132	{
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3133	memset(bbio, 0, offsetof(struct btrfs_bio, bio));
David Sterba	184f999	2017-06-12 17:29:39 +0200	[diff] [blame]	3134	}
				3135
				3136	/*
Qu Wenruo	cd8e0cc	2021-09-15 15:17:17 +0800	[diff] [blame]	3137	* Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs.
				3138	*
				3139	* The bio allocation is backed by bioset and does not fail.
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3140	*/
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3141	struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3142	{
				3143	struct bio *bio;
				3144
Qu Wenruo	cd8e0cc	2021-09-15 15:17:17 +0800	[diff] [blame]	3145	ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
				3146	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3147	btrfs_bio_init(btrfs_bio(bio));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3148	return bio;
				3149	}
				3150
David Sterba	8b6c1d5	2017-06-02 17:48:13 +0200	[diff] [blame]	3151	struct bio btrfs_bio_clone(struct bio bio)
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3152	{
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3153	struct btrfs_bio *bbio;
Miao Xie	23ea8e5	2014-09-12 18:43:54 +0800	[diff] [blame]	3154	struct bio *new;
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3155
David Sterba	6e707bc	2017-06-02 17:26:26 +0200	[diff] [blame]	3156	/* Bio allocation backed by a bioset does not fail */
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	3157	new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3158	bbio = btrfs_bio(new);
				3159	btrfs_bio_init(bbio);
				3160	bbio->iter = bio->bi_iter;
Miao Xie	23ea8e5	2014-09-12 18:43:54 +0800	[diff] [blame]	3161	return new;
				3162	}
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3163
Chaitanya Kulkarni	21dda65	2021-07-21 21:43:33 +0900	[diff] [blame]	3164	struct bio btrfs_bio_clone_partial(struct bio orig, u64 offset, u64 size)
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3165	{
				3166	struct bio *bio;
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3167	struct btrfs_bio *bbio;
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3168
Chaitanya Kulkarni	21dda65	2021-07-21 21:43:33 +0900	[diff] [blame]	3169	ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
				3170
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3171	/* this will never fail when it's backed by a bioset */
Kent Overstreet	8ac9f7c	2018-05-20 18:25:56 -0400	[diff] [blame]	3172	bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3173	ASSERT(bio);
				3174
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3175	bbio = btrfs_bio(bio);
				3176	btrfs_bio_init(bbio);
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3177
				3178	bio_trim(bio, offset >> 9, size >> 9);
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3179	bbio->iter = bio->bi_iter;
Liu Bo	2f8e914	2017-05-15 17:43:31 -0700	[diff] [blame]	3180	return bio;
				3181	}
Chris Mason	9be3395	2013-05-17 18:30:14 -0400	[diff] [blame]	3182
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3183	/**
				3184	* Attempt to add a page to bio
				3185	*
Yang Li	be8d1a2	2021-12-20 15:23:06 +0800	[diff] [blame]	3186	* @bio_ctrl: record both the bio, and its bio_flags
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3187	* @page: page to add to the bio
				3188	* @disk_bytenr: offset of the new bio or to check whether we are adding
				3189	* a contiguous page to the previous one
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3190	* @size: portion of page that we want to write
Yang Li	be8d1a2	2021-12-20 15:23:06 +0800	[diff] [blame]	3191	* @pg_offset: starting offset in the page
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3192	* @bio_flags: flags of the current bio to see if we can merge them
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3193	*
				3194	* Attempt to add a page to bio considering stripe alignment etc.
				3195	*
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3196	* Return >= 0 for the number of bytes added to the bio.
				3197	* Can return 0 if the current bio is already at stripe/zone boundary.
				3198	* Return <0 for error.
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3199	*/
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3200	static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
				3201	struct page *page,
				3202	u64 disk_bytenr, unsigned int size,
				3203	unsigned int pg_offset,
				3204	unsigned long bio_flags)
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3205	{
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3206	struct bio *bio = bio_ctrl->bio;
				3207	u32 bio_size = bio->bi_iter.bi_size;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3208	u32 real_size;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3209	const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
				3210	bool contig;
Naohiro Aota	e1326f0	2021-02-04 19:21:58 +0900	[diff] [blame]	3211	int ret;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3212
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3213	ASSERT(bio);
				3214	/* The limit should be calculated when bio_ctrl->bio is allocated */
				3215	ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
				3216	if (bio_ctrl->bio_flags != bio_flags)
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3217	return 0;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3218
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3219	if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3220	contig = bio->bi_iter.bi_sector == sector;
				3221	else
				3222	contig = bio_end_sector(bio) == sector;
				3223	if (!contig)
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3224	return 0;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3225
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3226	real_size = min(bio_ctrl->len_to_oe_boundary,
				3227	bio_ctrl->len_to_stripe_boundary) - bio_size;
				3228	real_size = min(real_size, size);
				3229
				3230	/*
				3231	* If real_size is 0, never call bio_add_*_page(), as even size is 0,
				3232	* bio will still execute its endio function on the page!
				3233	*/
				3234	if (real_size == 0)
				3235	return 0;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3236
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3237	if (bio_op(bio) == REQ_OP_ZONE_APPEND)
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3238	ret = bio_add_zone_append_page(bio, page, real_size, pg_offset);
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3239	else
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3240	ret = bio_add_page(bio, page, real_size, pg_offset);
Naohiro Aota	e1326f0	2021-02-04 19:21:58 +0900	[diff] [blame]	3241
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3242	return ret;
Naohiro Aota	953651e	2021-02-04 19:21:57 +0900	[diff] [blame]	3243	}
				3244
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3245	static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
Naohiro Aota	939c7fe	2021-08-11 15:37:08 +0900	[diff] [blame]	3246	struct btrfs_inode *inode, u64 file_offset)
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3247	{
				3248	struct btrfs_fs_info *fs_info = inode->root->fs_info;
				3249	struct btrfs_io_geometry geom;
				3250	struct btrfs_ordered_extent *ordered;
				3251	struct extent_map *em;
				3252	u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
				3253	int ret;
				3254
				3255	/*
				3256	* Pages for compressed extent are never submitted to disk directly,
				3257	* thus it has no real boundary, just set them to U32_MAX.
				3258	*
				3259	* The split happens for real compressed bio, which happens in
				3260	* btrfs_submit_compressed_read/write().
				3261	*/
				3262	if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED) {
				3263	bio_ctrl->len_to_oe_boundary = U32_MAX;
				3264	bio_ctrl->len_to_stripe_boundary = U32_MAX;
				3265	return 0;
				3266	}
				3267	em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
				3268	if (IS_ERR(em))
				3269	return PTR_ERR(em);
				3270	ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
				3271	logical, &geom);
				3272	free_extent_map(em);
				3273	if (ret < 0) {
				3274	return ret;
				3275	}
				3276	if (geom.len > U32_MAX)
				3277	bio_ctrl->len_to_stripe_boundary = U32_MAX;
				3278	else
				3279	bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
				3280
Johannes Thumshirn	7367271	2021-12-07 06:28:37 -0800	[diff] [blame]	3281	if (bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3282	bio_ctrl->len_to_oe_boundary = U32_MAX;
				3283	return 0;
				3284	}
				3285
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3286	/* Ordered extent not yet created, so we're good */
Naohiro Aota	939c7fe	2021-08-11 15:37:08 +0900	[diff] [blame]	3287	ordered = btrfs_lookup_ordered_extent(inode, file_offset);
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3288	if (!ordered) {
				3289	bio_ctrl->len_to_oe_boundary = U32_MAX;
				3290	return 0;
				3291	}
				3292
				3293	bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
				3294	ordered->disk_bytenr + ordered->disk_num_bytes - logical);
				3295	btrfs_put_ordered_extent(ordered);
				3296	return 0;
				3297	}
				3298
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3299	static int alloc_new_bio(struct btrfs_inode *inode,
				3300	struct btrfs_bio_ctrl *bio_ctrl,
				3301	struct writeback_control *wbc,
				3302	unsigned int opf,
				3303	bio_end_io_t end_io_func,
Naohiro Aota	939c7fe	2021-08-11 15:37:08 +0900	[diff] [blame]	3304	u64 disk_bytenr, u32 offset, u64 file_offset,
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3305	unsigned long bio_flags)
				3306	{
				3307	struct btrfs_fs_info *fs_info = inode->root->fs_info;
				3308	struct bio *bio;
				3309	int ret;
				3310
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3311	bio = btrfs_bio_alloc(BIO_MAX_VECS);
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3312	/*
				3313	* For compressed page range, its disk_bytenr is always @disk_bytenr
				3314	* passed in, no matter if we have added any range into previous bio.
				3315	*/
				3316	if (bio_flags & EXTENT_BIO_COMPRESSED)
Qu Wenruo	cd8e0cc	2021-09-15 15:17:17 +0800	[diff] [blame]	3317	bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3318	else
Qu Wenruo	cd8e0cc	2021-09-15 15:17:17 +0800	[diff] [blame]	3319	bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3320	bio_ctrl->bio = bio;
				3321	bio_ctrl->bio_flags = bio_flags;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3322	bio->bi_end_io = end_io_func;
				3323	bio->bi_private = &inode->io_tree;
				3324	bio->bi_write_hint = inode->vfs_inode.i_write_hint;
				3325	bio->bi_opf = opf;
Naohiro Aota	939c7fe	2021-08-11 15:37:08 +0900	[diff] [blame]	3326	ret = calc_bio_boundaries(bio_ctrl, inode, file_offset);
				3327	if (ret < 0)
				3328	goto error;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3329	if (wbc) {
				3330	struct block_device *bdev;
				3331
Anand Jain	d24fa5c	2021-08-24 13:05:19 +0800	[diff] [blame]	3332	bdev = fs_info->fs_devices->latest_dev->bdev;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3333	bio_set_dev(bio, bdev);
				3334	wbc_init_bio(wbc, bio);
				3335	}
Johannes Thumshirn	7367271	2021-12-07 06:28:37 -0800	[diff] [blame]	3336	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3337	struct btrfs_device *device;
				3338
				3339	device = btrfs_zoned_get_device(fs_info, disk_bytenr,
				3340	fs_info->sectorsize);
				3341	if (IS_ERR(device)) {
				3342	ret = PTR_ERR(device);
				3343	goto error;
				3344	}
				3345
Qu Wenruo	c3a3b19	2021-09-15 15:17:18 +0800	[diff] [blame]	3346	btrfs_bio(bio)->device = device;
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3347	}
				3348	return 0;
				3349	error:
				3350	bio_ctrl->bio = NULL;
				3351	bio->bi_status = errno_to_blk_status(ret);
				3352	bio_endio(bio);
				3353	return ret;
				3354	}
				3355
David Sterba	4b81ba4	2017-06-06 19:14:26 +0200	[diff] [blame]	3356	/*
				3357	* @opf: bio REQ_OP_* and REQ_* flags as one value
David Sterba	b8b3d62	2017-06-12 19:50:41 +0200	[diff] [blame]	3358	* @wbc: optional writeback control for io accounting
				3359	* @page: page to add to the bio
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3360	* @disk_bytenr: logical bytenr where the write will be
				3361	* @size: portion of page that we want to write to
David Sterba	b8b3d62	2017-06-12 19:50:41 +0200	[diff] [blame]	3362	* @pg_offset: offset of the new bio or to check whether we are adding
				3363	* a contiguous page to the previous one
David Sterba	5c2b1fd	2017-06-06 19:22:55 +0200	[diff] [blame]	3364	* @bio_ret: must be valid pointer, newly allocated bio will be stored there
David Sterba	b8b3d62	2017-06-12 19:50:41 +0200	[diff] [blame]	3365	* @end_io_func: end_io callback for new bio
				3366	* @mirror_num: desired mirror to read/write
				3367	* @prev_bio_flags: flags of previous bio to see if we can merge the current one
				3368	* @bio_flags: flags of the current bio to see if we can merge them
David Sterba	4b81ba4	2017-06-06 19:14:26 +0200	[diff] [blame]	3369	*/
David Sterba	0ceb34b	2020-02-05 19:09:28 +0100	[diff] [blame]	3370	static int submit_extent_page(unsigned int opf,
Chris Mason	da2f0f7	2015-07-02 13:57:22 -0700	[diff] [blame]	3371	struct writeback_control *wbc,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3372	struct btrfs_bio_ctrl *bio_ctrl,
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3373	struct page *page, u64 disk_bytenr,
David Sterba	6c5a4e2	2017-10-04 17:10:34 +0200	[diff] [blame]	3374	size_t size, unsigned long pg_offset,
Chris Mason	f188591	2008-04-09 16:28:12 -0400	[diff] [blame]	3375	bio_end_io_t end_io_func,
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3376	int mirror_num,
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3377	unsigned long bio_flags,
				3378	bool force_bio_submit)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3379	{
				3380	int ret = 0;
Naohiro Aota	e1326f0	2021-02-04 19:21:58 +0900	[diff] [blame]	3381	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3382	unsigned int cur = pg_offset;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3383
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3384	ASSERT(bio_ctrl);
David Sterba	5c2b1fd	2017-06-06 19:22:55 +0200	[diff] [blame]	3385
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3386	ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
				3387	pg_offset + size <= PAGE_SIZE);
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3388	if (force_bio_submit && bio_ctrl->bio) {
				3389	ret = submit_one_bio(bio_ctrl->bio, mirror_num, bio_ctrl->bio_flags);
				3390	bio_ctrl->bio = NULL;
				3391	if (ret < 0)
				3392	return ret;
				3393	}
				3394
				3395	while (cur < pg_offset + size) {
				3396	u32 offset = cur - pg_offset;
				3397	int added;
				3398
				3399	/* Allocate new bio if needed */
				3400	if (!bio_ctrl->bio) {
				3401	ret = alloc_new_bio(inode, bio_ctrl, wbc, opf,
				3402	end_io_func, disk_bytenr, offset,
Naohiro Aota	939c7fe	2021-08-11 15:37:08 +0900	[diff] [blame]	3403	page_offset(page) + cur,
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3404	bio_flags);
				3405	if (ret < 0)
				3406	return ret;
				3407	}
				3408	/*
				3409	* We must go through btrfs_bio_add_page() to ensure each
				3410	* page range won't cross various boundaries.
				3411	*/
				3412	if (bio_flags & EXTENT_BIO_COMPRESSED)
				3413	added = btrfs_bio_add_page(bio_ctrl, page, disk_bytenr,
				3414	size - offset, pg_offset + offset,
				3415	bio_flags);
				3416	else
				3417	added = btrfs_bio_add_page(bio_ctrl, page,
				3418	disk_bytenr + offset, size - offset,
				3419	pg_offset + offset, bio_flags);
				3420
				3421	/* Metadata page range should never be split */
				3422	if (!is_data_inode(&inode->vfs_inode))
				3423	ASSERT(added == 0 \|\| added == size - offset);
				3424
				3425	/* At least we added some page, update the account */
				3426	if (wbc && added)
				3427	wbc_account_cgroup_owner(wbc, page, added);
				3428
				3429	/* We have reached boundary, submit right now */
				3430	if (added < size - offset) {
				3431	/* The bio should contain some page(s) */
				3432	ASSERT(bio_ctrl->bio->bi_iter.bi_size);
				3433	ret = submit_one_bio(bio_ctrl->bio, mirror_num,
				3434	bio_ctrl->bio_flags);
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3435	bio_ctrl->bio = NULL;
				3436	if (ret < 0)
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3437	return ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3438	}
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3439	cur += added;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3440	}
Qu Wenruo	e0eefe0	2021-07-26 14:35:00 +0800	[diff] [blame]	3441	return 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3442	}
				3443
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	3444	static int attach_extent_buffer_page(struct extent_buffer *eb,
				3445	struct page *page,
				3446	struct btrfs_subpage *prealloc)
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	3447	{
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	3448	struct btrfs_fs_info *fs_info = eb->fs_info;
				3449	int ret = 0;
				3450
Qu Wenruo	0d01e24	2020-10-21 14:25:02 +0800	[diff] [blame]	3451	/*
				3452	* If the page is mapped to btree inode, we should hold the private
				3453	* lock to prevent race.
				3454	* For cloned or dummy extent buffers, their pages are not mapped and
				3455	* will not race with any other ebs.
				3456	*/
				3457	if (page->mapping)
				3458	lockdep_assert_held(&page->mapping->private_lock);
				3459
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	3460	if (fs_info->sectorsize == PAGE_SIZE) {
				3461	if (!PagePrivate(page))
				3462	attach_page_private(page, eb);
				3463	else
				3464	WARN_ON(page->private != (unsigned long)eb);
				3465	return 0;
				3466	}
				3467
				3468	/* Already mapped, just free prealloc */
				3469	if (PagePrivate(page)) {
				3470	btrfs_free_subpage(prealloc);
				3471	return 0;
				3472	}
				3473
				3474	if (prealloc)
				3475	/* Has preallocated memory for subpage */
				3476	attach_page_private(page, prealloc);
Guoqing Jiang	d1b89bc	2020-06-01 21:47:45 -0700	[diff] [blame]	3477	else
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	3478	/* Do new allocation to attach subpage */
				3479	ret = btrfs_attach_subpage(fs_info, page,
				3480	BTRFS_SUBPAGE_METADATA);
				3481	return ret;
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	3482	}
				3483
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	3484	int set_page_extent_mapped(struct page *page)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3485	{
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	3486	struct btrfs_fs_info *fs_info;
				3487
				3488	ASSERT(page->mapping);
				3489
				3490	if (PagePrivate(page))
				3491	return 0;
				3492
				3493	fs_info = btrfs_sb(page->mapping->host->i_sb);
				3494
				3495	if (fs_info->sectorsize < PAGE_SIZE)
				3496	return btrfs_attach_subpage(fs_info, page, BTRFS_SUBPAGE_DATA);
				3497
				3498	attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
				3499	return 0;
				3500	}
				3501
				3502	void clear_page_extent_mapped(struct page *page)
				3503	{
				3504	struct btrfs_fs_info *fs_info;
				3505
				3506	ASSERT(page->mapping);
				3507
Guoqing Jiang	d1b89bc	2020-06-01 21:47:45 -0700	[diff] [blame]	3508	if (!PagePrivate(page))
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	3509	return;
				3510
				3511	fs_info = btrfs_sb(page->mapping->host->i_sb);
				3512	if (fs_info->sectorsize < PAGE_SIZE)
				3513	return btrfs_detach_subpage(fs_info, page);
				3514
				3515	detach_page_private(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3516	}
				3517
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3518	static struct extent_map *
				3519	__get_extent_map(struct inode inode, struct page page, size_t pg_offset,
Nikolay Borisov	1a5ee1e	2020-09-14 12:37:06 +0300	[diff] [blame]	3520	u64 start, u64 len, struct extent_map **em_cached)
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3521	{
				3522	struct extent_map *em;
				3523
				3524	if (em_cached && *em_cached) {
				3525	em = *em_cached;
Filipe Manana	cbc0e92	2014-02-25 14:15:12 +0000	[diff] [blame]	3526	if (extent_map_in_tree(em) && start >= em->start &&
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3527	start < extent_map_end(em)) {
Elena Reshetova	490b54d	2017-03-03 10:55:12 +0200	[diff] [blame]	3528	refcount_inc(&em->refs);
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3529	return em;
				3530	}
				3531
				3532	free_extent_map(em);
				3533	*em_cached = NULL;
				3534	}
				3535
Nikolay Borisov	1a5ee1e	2020-09-14 12:37:06 +0300	[diff] [blame]	3536	em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3537	if (em_cached && !IS_ERR_OR_NULL(em)) {
				3538	BUG_ON(*em_cached);
Elena Reshetova	490b54d	2017-03-03 10:55:12 +0200	[diff] [blame]	3539	refcount_inc(&em->refs);
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3540	*em_cached = em;
				3541	}
				3542	return em;
				3543	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3544	/*
				3545	* basic readpage implementation. Locked extent state structs are inserted
				3546	* into the tree that are removed when the IO is done (by the end_io
				3547	* handlers)
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	3548	* XXX JDM: This needs looking at to ensure proper page locking
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	3549	* return 0 on success, otherwise return error
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3550	*/
Nikolay Borisov	0f20881	2020-09-14 14:39:16 +0300	[diff] [blame]	3551	int btrfs_do_readpage(struct page page, struct extent_map *em_cached,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3552	struct btrfs_bio_ctrl *bio_ctrl,
Nikolay Borisov	0f20881	2020-09-14 14:39:16 +0300	[diff] [blame]	3553	unsigned int read_flags, u64 *prev_em_start)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3554	{
				3555	struct inode *inode = page->mapping->host;
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3556	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Miao Xie	4eee4fa	2012-12-21 09:17:45 +0000	[diff] [blame]	3557	u64 start = page_offset(page);
David Sterba	8eec829	2017-06-06 19:50:13 +0200	[diff] [blame]	3558	const u64 end = start + PAGE_SIZE - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3559	u64 cur = start;
				3560	u64 extent_offset;
				3561	u64 last_byte = i_size_read(inode);
				3562	u64 block_start;
				3563	u64 cur_end;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3564	struct extent_map *em;
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	3565	int ret = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3566	int nr = 0;
David Sterba	306e16c	2011-04-19 14:29:38 +0200	[diff] [blame]	3567	size_t pg_offset = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3568	size_t iosize;
				3569	size_t blocksize = inode->i_sb->s_blocksize;
David Sterba	f657a31	2020-02-05 19:09:42 +0100	[diff] [blame]	3570	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
David Sterba	ae6957e	2020-02-05 19:09:30 +0100	[diff] [blame]	3571
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	3572	ret = set_page_extent_mapped(page);
				3573	if (ret < 0) {
				3574	unlock_extent(tree, start, end);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3575	btrfs_page_set_error(fs_info, page, start, PAGE_SIZE);
				3576	unlock_page(page);
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	3577	goto out;
				3578	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3579
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	3580	if (page->index == last_byte >> PAGE_SHIFT) {
Johannes Thumshirn	7073017	2018-12-05 15:23:03 +0100	[diff] [blame]	3581	size_t zero_offset = offset_in_page(last_byte);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3582
				3583	if (zero_offset) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	3584	iosize = PAGE_SIZE - zero_offset;
Ira Weiny	d048b9c	2021-05-04 18:40:07 -0700	[diff] [blame]	3585	memzero_page(page, zero_offset, iosize);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3586	flush_dcache_page(page);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3587	}
				3588	}
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3589	begin_page_read(fs_info, page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3590	while (cur <= end) {
Qu Wenruo	4c37a79	2021-07-26 14:34:50 +0800	[diff] [blame]	3591	unsigned long this_bio_flag = 0;
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3592	bool force_bio_submit = false;
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3593	u64 disk_bytenr;
Josef Bacik	c8f2f24	2013-02-11 11:33:00 -0500	[diff] [blame]	3594
Qu Wenruo	6a40491	2021-09-27 15:21:47 +0800	[diff] [blame]	3595	ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3596	if (cur >= last_byte) {
Arne Jansen	507903b	2011-04-06 10:02:20 +0000	[diff] [blame]	3597	struct extent_state *cached = NULL;
				3598
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	3599	iosize = PAGE_SIZE - pg_offset;
Ira Weiny	d048b9c	2021-05-04 18:40:07 -0700	[diff] [blame]	3600	memzero_page(page, pg_offset, iosize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3601	flush_dcache_page(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3602	set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen	507903b	2011-04-06 10:02:20 +0000	[diff] [blame]	3603	&cached, GFP_NOFS);
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3604	unlock_extent_cached(tree, cur,
David Sterba	e43bbe5	2017-12-12 21:43:52 +0100	[diff] [blame]	3605	cur + iosize - 1, &cached);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3606	end_page_read(page, true, cur, iosize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3607	break;
				3608	}
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	3609	em = __get_extent_map(inode, page, pg_offset, cur,
Nikolay Borisov	1a5ee1e	2020-09-14 12:37:06 +0300	[diff] [blame]	3610	end - cur + 1, em_cached);
David Sterba	c704005	2011-04-19 18:00:01 +0200	[diff] [blame]	3611	if (IS_ERR_OR_NULL(em)) {
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3612	unlock_extent(tree, cur, end);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3613	end_page_read(page, false, cur, end + 1 - cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3614	break;
				3615	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3616	extent_offset = cur - em->start;
				3617	BUG_ON(extent_map_end(em) <= cur);
				3618	BUG_ON(end < cur);
				3619
Li Zefan	261507a0	2010-12-17 14:21:50 +0800	[diff] [blame]	3620	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
Mark Fasheh	4b38431	2013-08-06 11:42:50 -0700	[diff] [blame]	3621	this_bio_flag \|= EXTENT_BIO_COMPRESSED;
Li Zefan	261507a0	2010-12-17 14:21:50 +0800	[diff] [blame]	3622	extent_set_compress_type(&this_bio_flag,
				3623	em->compress_type);
				3624	}
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3625
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3626	iosize = min(extent_map_end(em) - cur, end - cur + 1);
				3627	cur_end = min(extent_map_end(em) - 1, end);
Qu Wenruo	fda2832	2013-02-26 08:10:22 +0000	[diff] [blame]	3628	iosize = ALIGN(iosize, blocksize);
Goldwyn Rodrigues	949b327	2020-09-15 10:41:40 -0500	[diff] [blame]	3629	if (this_bio_flag & EXTENT_BIO_COMPRESSED)
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3630	disk_bytenr = em->block_start;
Goldwyn Rodrigues	949b327	2020-09-15 10:41:40 -0500	[diff] [blame]	3631	else
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3632	disk_bytenr = em->block_start + extent_offset;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3633	block_start = em->block_start;
Yan Zheng	d899e05	2008-10-30 14:25:28 -0400	[diff] [blame]	3634	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
				3635	block_start = EXTENT_MAP_HOLE;
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3636
				3637	/*
				3638	* If we have a file range that points to a compressed extent
Randy Dunlap	260db43	2020-08-04 19:48:34 -0700	[diff] [blame]	3639	* and it's followed by a consecutive file range that points
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3640	* to the same compressed extent (possibly with a different
				3641	* offset and/or length, so it either points to the whole extent
				3642	* or only part of it), we must make sure we do not submit a
				3643	* single bio to populate the pages for the 2 ranges because
				3644	* this makes the compressed extent read zero out the pages
				3645	* belonging to the 2nd range. Imagine the following scenario:
				3646	*
				3647	* File layout
				3648	* [0 - 8K] [8K - 24K]
				3649	* \| \|
				3650	* \| \|
				3651	* points to extent X, points to extent X,
				3652	* offset 4K, length of 8K offset 0, length 16K
				3653	*
				3654	* [extent X, compressed length = 4K uncompressed length = 16K]
				3655	*
				3656	* If the bio to read the compressed extent covers both ranges,
				3657	* it will decompress extent X into the pages belonging to the
				3658	* first range and then it will stop, zeroing out the remaining
				3659	* pages that belong to the other range that points to extent X.
				3660	* So here we make sure we submit 2 bios, one for the first
				3661	* range and another one for the third range. Both will target
				3662	* the same physical extent from disk, but we can't currently
				3663	* make the compressed bio endio callback populate the pages
				3664	* for both ranges because each compressed bio is tightly
				3665	* coupled with a single extent map, and each range can have
				3666	* an extent map with a different offset value relative to the
				3667	* uncompressed data of our extent and different lengths. This
				3668	* is a corner case so we prioritize correctness over
				3669	* non-optimal behavior (submitting 2 bios for the same extent).
				3670	*/
				3671	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
				3672	prev_em_start && *prev_em_start != (u64)-1 &&
Filipe Manana	8e92821	2019-02-14 15:17:20 +0000	[diff] [blame]	3673	*prev_em_start != em->start)
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3674	force_bio_submit = true;
				3675
				3676	if (prev_em_start)
Filipe Manana	8e92821	2019-02-14 15:17:20 +0000	[diff] [blame]	3677	*prev_em_start = em->start;
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3678
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3679	free_extent_map(em);
				3680	em = NULL;
				3681
				3682	/* we've found a hole, just zero and go on */
				3683	if (block_start == EXTENT_MAP_HOLE) {
Arne Jansen	507903b	2011-04-06 10:02:20 +0000	[diff] [blame]	3684	struct extent_state *cached = NULL;
				3685
Ira Weiny	d048b9c	2021-05-04 18:40:07 -0700	[diff] [blame]	3686	memzero_page(page, pg_offset, iosize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3687	flush_dcache_page(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3688
				3689	set_extent_uptodate(tree, cur, cur + iosize - 1,
Arne Jansen	507903b	2011-04-06 10:02:20 +0000	[diff] [blame]	3690	&cached, GFP_NOFS);
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3691	unlock_extent_cached(tree, cur,
David Sterba	e43bbe5	2017-12-12 21:43:52 +0100	[diff] [blame]	3692	cur + iosize - 1, &cached);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3693	end_page_read(page, true, cur, iosize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3694	cur = cur + iosize;
David Sterba	306e16c	2011-04-19 14:29:38 +0200	[diff] [blame]	3695	pg_offset += iosize;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3696	continue;
				3697	}
				3698	/* the get_extent function already copied into the page */
Chris Mason	9655d29	2009-09-02 15:22:30 -0400	[diff] [blame]	3699	if (test_range_bit(tree, cur, cur_end,
				3700	EXTENT_UPTODATE, 1, NULL)) {
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3701	unlock_extent(tree, cur, cur + iosize - 1);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3702	end_page_read(page, true, cur, iosize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3703	cur = cur + iosize;
David Sterba	306e16c	2011-04-19 14:29:38 +0200	[diff] [blame]	3704	pg_offset += iosize;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3705	continue;
				3706	}
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	3707	/* we have an inline extent but it didn't get marked up
				3708	* to date. Error out
				3709	*/
				3710	if (block_start == EXTENT_MAP_INLINE) {
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3711	unlock_extent(tree, cur, cur + iosize - 1);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3712	end_page_read(page, false, cur, iosize);
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	3713	cur = cur + iosize;
David Sterba	306e16c	2011-04-19 14:29:38 +0200	[diff] [blame]	3714	pg_offset += iosize;
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	3715	continue;
				3716	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3717
David Sterba	0ceb34b	2020-02-05 19:09:28 +0100	[diff] [blame]	3718	ret = submit_extent_page(REQ_OP_READ \| read_flags, NULL,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3719	bio_ctrl, page, disk_bytenr, iosize,
				3720	pg_offset,
Nikolay Borisov	fd51300	2020-09-14 12:37:11 +0300	[diff] [blame]	3721	end_bio_extent_readpage, 0,
Filipe Manana	005efed	2015-09-14 09:09:31 +0100	[diff] [blame]	3722	this_bio_flag,
				3723	force_bio_submit);
Josef Bacik	c8f2f24	2013-02-11 11:33:00 -0500	[diff] [blame]	3724	if (!ret) {
				3725	nr++;
Josef Bacik	c8f2f24	2013-02-11 11:33:00 -0500	[diff] [blame]	3726	} else {
Filipe Manana	7f042a8	2016-01-27 19:17:20 +0000	[diff] [blame]	3727	unlock_extent(tree, cur, cur + iosize - 1);
Qu Wenruo	92082d4	2021-02-02 10:28:36 +0800	[diff] [blame]	3728	end_page_read(page, false, cur, iosize);
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	3729	goto out;
Josef Bacik	edd33c9	2012-10-05 16:40:32 -0400	[diff] [blame]	3730	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3731	cur = cur + iosize;
David Sterba	306e16c	2011-04-19 14:29:38 +0200	[diff] [blame]	3732	pg_offset += iosize;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3733	}
Dan Magenheimer	90a887c	2011-05-26 10:01:56 -0600	[diff] [blame]	3734	out:
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	3735	return ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3736	}
				3737
David Sterba	b6660e8	2020-02-05 19:09:40 +0100	[diff] [blame]	3738	static inline void contiguous_readpages(struct page *pages[], int nr_pages,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3739	u64 start, u64 end,
				3740	struct extent_map **em_cached,
				3741	struct btrfs_bio_ctrl *bio_ctrl,
				3742	u64 *prev_em_start)
Miao Xie	9974090	2013-07-25 19:22:36 +0800	[diff] [blame]	3743	{
Nikolay Borisov	23d31bd	2019-05-07 10:19:23 +0300	[diff] [blame]	3744	struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
Miao Xie	9974090	2013-07-25 19:22:36 +0800	[diff] [blame]	3745	int index;
				3746
David Sterba	b272ae2	2020-02-05 19:09:33 +0100	[diff] [blame]	3747	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
Miao Xie	9974090	2013-07-25 19:22:36 +0800	[diff] [blame]	3748
				3749	for (index = 0; index < nr_pages; index++) {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	3750	btrfs_do_readpage(pages[index], em_cached, bio_ctrl,
Nikolay Borisov	0f20881	2020-09-14 14:39:16 +0300	[diff] [blame]	3751	REQ_RAHEAD, prev_em_start);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	3752	put_page(pages[index]);
Miao Xie	9974090	2013-07-25 19:22:36 +0800	[diff] [blame]	3753	}
				3754	}
				3755
David Sterba	3d4b949	2017-02-10 19:33:41 +0100	[diff] [blame]	3756	static void update_nr_written(struct writeback_control *wbc,
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	3757	unsigned long nr_written)
Chris Mason	11c8349	2009-04-20 15:50:09 -0400	[diff] [blame]	3758	{
				3759	wbc->nr_to_write -= nr_written;
Chris Mason	11c8349	2009-04-20 15:50:09 -0400	[diff] [blame]	3760	}
				3761
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3762	/*
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3763	* helper for __extent_writepage, doing all of the delayed allocation setup.
				3764	*
Nikolay Borisov	5eaad97	2018-11-01 14:09:46 +0200	[diff] [blame]	3765	* This returns 1 if btrfs_run_delalloc_range function did all the work required
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3766	* to write the page (copy into inline extent). In this case the IO has
				3767	* been started and the page is already unlocked.
				3768	*
				3769	* This returns 0 if all went well (page still locked)
				3770	* This returns < 0 if there were errors (page still locked)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3771	*/
Nikolay Borisov	cd4c0bf94	2020-06-05 10:42:10 +0300	[diff] [blame]	3772	static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3773	struct page page, struct writeback_control wbc)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3774	{
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	3775	const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
Qu Wenruo	cf3075f	2021-09-27 15:21:44 +0800	[diff] [blame]	3776	u64 delalloc_start = page_offset(page);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3777	u64 delalloc_to_write = 0;
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3778	/* How many pages are started by btrfs_run_delalloc_range() */
				3779	unsigned long nr_written = 0;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3780	int ret;
				3781	int page_started = 0;
				3782
Qu Wenruo	2749f7e	2021-09-27 15:22:07 +0800	[diff] [blame]	3783	while (delalloc_start < page_end) {
				3784	u64 delalloc_end = page_end;
				3785	bool found;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3786
Nikolay Borisov	cd4c0bf94	2020-06-05 10:42:10 +0300	[diff] [blame]	3787	found = find_lock_delalloc_range(&inode->vfs_inode, page,
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3788	&delalloc_start,
Nikolay Borisov	917aace	2018-10-26 14:43:20 +0300	[diff] [blame]	3789	&delalloc_end);
Lu Fengqi	3522e90	2018-11-29 11:33:38 +0800	[diff] [blame]	3790	if (!found) {
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3791	delalloc_start = delalloc_end + 1;
				3792	continue;
				3793	}
Nikolay Borisov	cd4c0bf94	2020-06-05 10:42:10 +0300	[diff] [blame]	3794	ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3795	delalloc_end, &page_started, &nr_written, wbc);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3796	if (ret) {
Qu Wenruo	963e4db	2021-07-26 14:35:07 +0800	[diff] [blame]	3797	btrfs_page_set_error(inode->root->fs_info, page,
				3798	page_offset(page), PAGE_SIZE);
Qu Wenruo	7361b4a	2021-07-28 14:05:05 +0800	[diff] [blame]	3799	return ret;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3800	}
				3801	/*
Kirill A. Shutemov	ea1754a	2016-04-01 15:29:48 +0300	[diff] [blame]	3802	* delalloc_end is already one less than the total length, so
				3803	* we don't subtract one from PAGE_SIZE
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3804	*/
				3805	delalloc_to_write += (delalloc_end - delalloc_start +
Kirill A. Shutemov	ea1754a	2016-04-01 15:29:48 +0300	[diff] [blame]	3806	PAGE_SIZE) >> PAGE_SHIFT;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3807	delalloc_start = delalloc_end + 1;
				3808	}
				3809	if (wbc->nr_to_write < delalloc_to_write) {
				3810	int thresh = 8192;
				3811
				3812	if (delalloc_to_write < thresh * 2)
				3813	thresh = delalloc_to_write;
				3814	wbc->nr_to_write = min_t(u64, delalloc_to_write,
				3815	thresh);
				3816	}
				3817
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3818	/* Did btrfs_run_dealloc_range() already unlock and start the IO? */
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3819	if (page_started) {
				3820	/*
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3821	* We've unlocked the page, so we can't update the mapping's
				3822	* writeback index, just update nr_to_write.
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3823	*/
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3824	wbc->nr_to_write -= nr_written;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3825	return 1;
				3826	}
				3827
Nikolay Borisov	b69d1ee	2020-07-16 18:17:19 +0300	[diff] [blame]	3828	return 0;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3829	}
				3830
				3831	/*
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3832	* Find the first byte we need to write.
				3833	*
				3834	* For subpage, one page can contain several sectors, and
				3835	* __extent_writepage_io() will just grab all extent maps in the page
				3836	* range and try to submit all non-inline/non-compressed extents.
				3837	*
				3838	* This is a big problem for subpage, we shouldn't re-submit already written
				3839	* data at all.
				3840	* This function will lookup subpage dirty bit to find which range we really
				3841	* need to submit.
				3842	*
				3843	* Return the next dirty range in [@start, @end).
				3844	* If no dirty range is found, @start will be page_offset(page) + PAGE_SIZE.
				3845	*/
				3846	static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
				3847	struct page page, u64 start, u64 *end)
				3848	{
				3849	struct btrfs_subpage subpage = (struct btrfs_subpage )page->private;
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	3850	struct btrfs_subpage_info *spi = fs_info->subpage_info;
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3851	u64 orig_start = *start;
				3852	/* Declare as unsigned long so we can use bitmap ops */
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3853	unsigned long flags;
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	3854	int range_start_bit;
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3855	int range_end_bit;
				3856
				3857	/*
				3858	* For regular sector size == page size case, since one page only
				3859	* contains one sector, we return the page offset directly.
				3860	*/
				3861	if (fs_info->sectorsize == PAGE_SIZE) {
				3862	*start = page_offset(page);
				3863	*end = page_offset(page) + PAGE_SIZE;
				3864	return;
				3865	}
				3866
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	3867	range_start_bit = spi->dirty_offset +
				3868	(offset_in_page(orig_start) >> fs_info->sectorsize_bits);
				3869
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3870	/* We should have the page locked, but just in case */
				3871	spin_lock_irqsave(&subpage->lock, flags);
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	3872	bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
				3873	spi->dirty_offset + spi->bitmap_nr_bits);
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3874	spin_unlock_irqrestore(&subpage->lock, flags);
				3875
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	3876	range_start_bit -= spi->dirty_offset;
				3877	range_end_bit -= spi->dirty_offset;
				3878
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3879	start = page_offset(page) + range_start_bit fs_info->sectorsize;
				3880	end = page_offset(page) + range_end_bit fs_info->sectorsize;
				3881	}
				3882
				3883	/*
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3884	* helper for __extent_writepage. This calls the writepage start hooks,
				3885	* and does the loop to map the page into extents and bios.
				3886	*
				3887	* We return 1 if the IO is started and the page is unlocked,
				3888	* 0 if all went well (page still locked)
				3889	* < 0 if there were errors (page still locked)
				3890	*/
Nikolay Borisov	d4580fe	2020-06-03 08:55:33 +0300	[diff] [blame]	3891	static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3892	struct page *page,
				3893	struct writeback_control *wbc,
				3894	struct extent_page_data *epd,
				3895	loff_t i_size,
David Sterba	57e5ffe	2019-10-29 18:28:55 +0100	[diff] [blame]	3896	int *nr_ret)
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3897	{
Qu Wenruo	6bc5636	2021-01-06 09:01:41 +0800	[diff] [blame]	3898	struct btrfs_fs_info *fs_info = inode->root->fs_info;
Qu Wenruo	a129ffb	2021-07-27 13:41:32 +0800	[diff] [blame]	3899	u64 cur = page_offset(page);
				3900	u64 end = cur + PAGE_SIZE - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3901	u64 extent_offset;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3902	u64 block_start;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3903	struct extent_map *em;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3904	int ret = 0;
				3905	int nr = 0;
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	3906	u32 opf = REQ_OP_WRITE;
David Sterba	57e5ffe	2019-10-29 18:28:55 +0100	[diff] [blame]	3907	const unsigned int write_flags = wbc_to_write_flags(wbc);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3908	bool compressed;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3909
Qu Wenruo	a129ffb	2021-07-27 13:41:32 +0800	[diff] [blame]	3910	ret = btrfs_writepage_cow_fixup(page);
Nikolay Borisov	d75855b	2018-11-01 14:09:47 +0200	[diff] [blame]	3911	if (ret) {
				3912	/* Fixup worker will requeue */
Josef Bacik	5ab5805	2020-01-21 11:51:43 -0500	[diff] [blame]	3913	redirty_page_for_writepage(wbc, page);
Nikolay Borisov	d75855b	2018-11-01 14:09:47 +0200	[diff] [blame]	3914	unlock_page(page);
				3915	return 1;
Chris Mason	247e743	2008-07-17 12:53:51 -0400	[diff] [blame]	3916	}
				3917
Chris Mason	11c8349	2009-04-20 15:50:09 -0400	[diff] [blame]	3918	/*
				3919	* we don't want to touch the inode after unlocking the page,
				3920	* so we update the mapping writeback index now
				3921	*/
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	3922	update_nr_written(wbc, 1);
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	3923
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3924	while (cur <= end) {
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	3925	u64 disk_bytenr;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3926	u64 em_end;
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3927	u64 dirty_range_start = cur;
				3928	u64 dirty_range_end;
Qu Wenruo	6bc5636	2021-01-06 09:01:41 +0800	[diff] [blame]	3929	u32 iosize;
David Sterba	58409ed	2016-05-04 11:46:10 +0200	[diff] [blame]	3930
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3931	if (cur >= i_size) {
Qu Wenruo	38a39ac7	2021-04-08 20:32:27 +0800	[diff] [blame]	3932	btrfs_writepage_endio_finish_ordered(inode, page, cur,
David Sterba	25c1252	2021-07-26 14:15:08 +0200	[diff] [blame]	3933	end, true);
Qu Wenruo	cc1d0d9	2021-07-26 14:34:58 +0800	[diff] [blame]	3934	/*
				3935	* This range is beyond i_size, thus we don't need to
				3936	* bother writing back.
				3937	* But we still need to clear the dirty subpage bit, or
				3938	* the next time the page gets dirtied, we will try to
				3939	* writeback the sectors with subpage dirty bits,
				3940	* causing writeback without ordered extent.
				3941	*/
				3942	btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3943	break;
				3944	}
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3945
				3946	find_next_dirty_byte(fs_info, page, &dirty_range_start,
				3947	&dirty_range_end);
				3948	if (cur < dirty_range_start) {
				3949	cur = dirty_range_start;
				3950	continue;
				3951	}
				3952
Nikolay Borisov	d4580fe	2020-06-03 08:55:33 +0300	[diff] [blame]	3953	em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
David Sterba	c704005	2011-04-19 18:00:01 +0200	[diff] [blame]	3954	if (IS_ERR_OR_NULL(em)) {
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3955	btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
Filipe Manana	61391d5	2014-05-09 17:17:40 +0100	[diff] [blame]	3956	ret = PTR_ERR_OR_ZERO(em);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3957	break;
				3958	}
				3959
				3960	extent_offset = cur - em->start;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	3961	em_end = extent_map_end(em);
Qu Wenruo	6bc5636	2021-01-06 09:01:41 +0800	[diff] [blame]	3962	ASSERT(cur <= em_end);
				3963	ASSERT(cur < end);
				3964	ASSERT(IS_ALIGNED(em->start, fs_info->sectorsize));
				3965	ASSERT(IS_ALIGNED(em->len, fs_info->sectorsize));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3966	block_start = em->block_start;
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3967	compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
Qu Wenruo	6bc5636	2021-01-06 09:01:41 +0800	[diff] [blame]	3968	disk_bytenr = em->block_start + extent_offset;
				3969
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	3970	/*
				3971	* Note that em_end from extent_map_end() and dirty_range_end from
				3972	* find_next_dirty_byte() are all exclusive
				3973	*/
				3974	iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	3975
Johannes Thumshirn	e380adf	2021-05-19 00:40:27 +0900	[diff] [blame]	3976	if (btrfs_use_zone_append(inode, em->block_start))
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	3977	opf = REQ_OP_ZONE_APPEND;
				3978
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3979	free_extent_map(em);
				3980	em = NULL;
				3981
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3982	/*
				3983	* compressed and inline extents are written through other
				3984	* paths in the FS
				3985	*/
				3986	if (compressed \|\| block_start == EXTENT_MAP_HOLE \|\|
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3987	block_start == EXTENT_MAP_INLINE) {
Omar Sandoval	c8b0403	2019-12-02 17:34:24 -0800	[diff] [blame]	3988	if (compressed)
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3989	nr++;
Omar Sandoval	c8b0403	2019-12-02 17:34:24 -0800	[diff] [blame]	3990	else
Qu Wenruo	38a39ac7	2021-04-08 20:32:27 +0800	[diff] [blame]	3991	btrfs_writepage_endio_finish_ordered(inode,
David Sterba	25c1252	2021-07-26 14:15:08 +0200	[diff] [blame]	3992	page, cur, cur + iosize - 1, true);
Qu Wenruo	cc1d0d9	2021-07-26 14:34:58 +0800	[diff] [blame]	3993	btrfs_page_clear_dirty(fs_info, page, cur, iosize);
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3994	cur += iosize;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	3995	continue;
				3996	}
Chris Mason	c8b9781	2008-10-29 14:49:59 -0400	[diff] [blame]	3997
Qu Wenruo	d2a9106	2021-05-31 16:50:49 +0800	[diff] [blame]	3998	btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
David Sterba	58409ed	2016-05-04 11:46:10 +0200	[diff] [blame]	3999	if (!PageWriteback(page)) {
Nikolay Borisov	d4580fe	2020-06-03 08:55:33 +0300	[diff] [blame]	4000	btrfs_err(inode->root->fs_info,
David Sterba	58409ed	2016-05-04 11:46:10 +0200	[diff] [blame]	4001	"page %lu not writeback, cur %llu end %llu",
				4002	page->index, cur, end);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4003	}
David Sterba	58409ed	2016-05-04 11:46:10 +0200	[diff] [blame]	4004
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	4005	/*
				4006	* Although the PageDirty bit is cleared before entering this
				4007	* function, subpage dirty bit is not cleared.
				4008	* So clear subpage dirty bit here so next time we won't submit
				4009	* page for range already written to disk.
				4010	*/
				4011	btrfs_page_clear_dirty(fs_info, page, cur, iosize);
				4012
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4013	ret = submit_extent_page(opf \| write_flags, wbc,
				4014	&epd->bio_ctrl, page,
Naohiro Aota	d8e3fb1	2021-02-04 19:22:05 +0900	[diff] [blame]	4015	disk_bytenr, iosize,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4016	cur - page_offset(page),
David Sterba	58409ed	2016-05-04 11:46:10 +0200	[diff] [blame]	4017	end_bio_extent_writepage,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4018	0, 0, false);
Takafumi Kubota	fe01aa6	2017-02-09 17:24:33 +0900	[diff] [blame]	4019	if (ret) {
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	4020	btrfs_page_set_error(fs_info, page, cur, iosize);
Takafumi Kubota	fe01aa6	2017-02-09 17:24:33 +0900	[diff] [blame]	4021	if (PageWriteback(page))
Qu Wenruo	c5ef5c6	2021-05-31 16:50:50 +0800	[diff] [blame]	4022	btrfs_page_clear_writeback(fs_info, page, cur,
				4023	iosize);
Takafumi Kubota	fe01aa6	2017-02-09 17:24:33 +0900	[diff] [blame]	4024	}
Chris Mason	7f3c74f	2008-07-18 12:01:11 -0400	[diff] [blame]	4025
Qu Wenruo	6bc5636	2021-01-06 09:01:41 +0800	[diff] [blame]	4026	cur += iosize;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4027	nr++;
				4028	}
Qu Wenruo	cc1d0d9	2021-07-26 14:34:58 +0800	[diff] [blame]	4029	/*
				4030	* If we finish without problem, we should not only clear page dirty,
				4031	* but also empty subpage dirty bits
				4032	*/
				4033	if (!ret)
				4034	btrfs_page_assert_not_dirty(fs_info, page);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4035	*nr_ret = nr;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4036	return ret;
				4037	}
				4038
				4039	/*
				4040	* the writepage semantics are similar to regular writepage. extent
				4041	* records are inserted to lock ranges in the tree, and as dirty areas
				4042	* are found, they are marked writeback. Then the lock bits are removed
				4043	* and the end_io handler clears the writeback ranges
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	4044	*
				4045	* Return 0 if everything goes well.
				4046	* Return <0 for error.
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4047	*/
				4048	static int __extent_writepage(struct page page, struct writeback_control wbc,
David Sterba	aab6e9e	2017-11-30 18:00:02 +0100	[diff] [blame]	4049	struct extent_page_data *epd)
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4050	{
				4051	struct inode *inode = page->mapping->host;
Qu Wenruo	e55a0de	2021-09-27 15:22:05 +0800	[diff] [blame]	4052	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Qu Wenruo	cf3075f	2021-09-27 15:21:44 +0800	[diff] [blame]	4053	const u64 page_start = page_offset(page);
				4054	const u64 page_end = page_start + PAGE_SIZE - 1;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4055	int ret;
				4056	int nr = 0;
Omar Sandoval	eb70d22	2019-12-02 17:34:20 -0800	[diff] [blame]	4057	size_t pg_offset;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4058	loff_t i_size = i_size_read(inode);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	4059	unsigned long end_index = i_size >> PAGE_SHIFT;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4060
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4061	trace___extent_writepage(page, inode, wbc);
				4062
				4063	WARN_ON(!PageLocked(page));
				4064
Qu Wenruo	963e4db	2021-07-26 14:35:07 +0800	[diff] [blame]	4065	btrfs_page_clear_error(btrfs_sb(inode->i_sb), page,
				4066	page_offset(page), PAGE_SIZE);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4067
Johannes Thumshirn	7073017	2018-12-05 15:23:03 +0100	[diff] [blame]	4068	pg_offset = offset_in_page(i_size);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4069	if (page->index > end_index \|\|
				4070	(page->index == end_index && !pg_offset)) {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	4071	page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4072	unlock_page(page);
				4073	return 0;
				4074	}
				4075
				4076	if (page->index == end_index) {
Ira Weiny	d048b9c	2021-05-04 18:40:07 -0700	[diff] [blame]	4077	memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4078	flush_dcache_page(page);
				4079	}
				4080
Qu Wenruo	32443de	2021-01-26 16:34:00 +0800	[diff] [blame]	4081	ret = set_page_extent_mapped(page);
				4082	if (ret < 0) {
				4083	SetPageError(page);
				4084	goto done;
				4085	}
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4086
Nikolay Borisov	7789a55	2018-11-08 10:18:06 +0200	[diff] [blame]	4087	if (!epd->extent_locked) {
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	4088	ret = writepage_delalloc(BTRFS_I(inode), page, wbc);
Nikolay Borisov	7789a55	2018-11-08 10:18:06 +0200	[diff] [blame]	4089	if (ret == 1)
Omar Sandoval	169d2c8	2019-12-02 17:34:21 -0800	[diff] [blame]	4090	return 0;
Nikolay Borisov	7789a55	2018-11-08 10:18:06 +0200	[diff] [blame]	4091	if (ret)
				4092	goto done;
				4093	}
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4094
Nikolay Borisov	d4580fe	2020-06-03 08:55:33 +0300	[diff] [blame]	4095	ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size,
Qu Wenruo	83f1b68	2021-11-12 13:33:14 +0800	[diff] [blame]	4096	&nr);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4097	if (ret == 1)
Omar Sandoval	169d2c8	2019-12-02 17:34:21 -0800	[diff] [blame]	4098	return 0;
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4099
				4100	done:
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4101	if (nr == 0) {
				4102	/* make sure the mapping tag for page dirty gets cleared */
				4103	set_page_writeback(page);
				4104	end_page_writeback(page);
				4105	}
Qu Wenruo	963e4db	2021-07-26 14:35:07 +0800	[diff] [blame]	4106	/*
				4107	* Here we used to have a check for PageError() and then set @ret and
				4108	* call end_extent_writepage().
				4109	*
				4110	* But in fact setting @ret here will cause different error paths
				4111	* between subpage and regular sectorsize.
				4112	*
				4113	* For regular page size, we never submit current page, but only add
				4114	* current page to current bio.
				4115	* The bio submission can only happen in next page.
				4116	* Thus if we hit the PageError() branch, @ret is already set to
				4117	* non-zero value and will not get updated for regular sectorsize.
				4118	*
				4119	* But for subpage case, it's possible we submit part of current page,
				4120	* thus can get PageError() set by submitted bio of the same page,
				4121	* while our @ret is still 0.
				4122	*
				4123	* So here we unify the behavior and don't set @ret.
				4124	* Error can still be properly passed to higher layer as page will
				4125	* be set error, here we just don't handle the IO failure.
				4126	*
				4127	* NOTE: This is just a hotfix for subpage.
				4128	* The root fix will be properly ending ordered extent when we hit
				4129	* an error during writeback.
				4130	*
				4131	* But that needs a bigger refactoring, as we not only need to grab the
				4132	* submitted OE, but also need to know exactly at which bytenr we hit
				4133	* the error.
				4134	* Currently the full page based __extent_writepage_io() is not
				4135	* capable of that.
				4136	*/
				4137	if (PageError(page))
Qu Wenruo	cf3075f	2021-09-27 15:21:44 +0800	[diff] [blame]	4138	end_extent_writepage(page, ret, page_start, page_end);
Qu Wenruo	e55a0de	2021-09-27 15:22:05 +0800	[diff] [blame]	4139	if (epd->extent_locked) {
				4140	/*
				4141	* If epd->extent_locked, it's from extent_write_locked_range(),
				4142	* the page can either be locked by lock_page() or
				4143	* process_one_page().
				4144	* Let btrfs_page_unlock_writer() handle both cases.
				4145	*/
				4146	ASSERT(wbc);
				4147	btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
				4148	wbc->range_end + 1 - wbc->range_start);
				4149	} else {
				4150	unlock_page(page);
				4151	}
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	4152	ASSERT(ret <= 0);
Chris Mason	40f7658	2014-05-21 13:35:51 -0700	[diff] [blame]	4153	return ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4154	}
				4155
Josef Bacik	fd8b2b6	2013-04-24 16:41:19 -0400	[diff] [blame]	4156	void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4157	{
NeilBrown	7431620	2014-07-07 15:16:04 +1000	[diff] [blame]	4158	wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK,
				4159	TASK_UNINTERRUPTIBLE);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4160	}
				4161
Filipe Manana	18dfa71	2019-09-11 17:42:00 +0100	[diff] [blame]	4162	static void end_extent_buffer_writeback(struct extent_buffer *eb)
				4163	{
Naohiro Aota	be1a1d7	2021-08-19 21:19:23 +0900	[diff] [blame]	4164	if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
				4165	btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
				4166
Filipe Manana	18dfa71	2019-09-11 17:42:00 +0100	[diff] [blame]	4167	clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
				4168	smp_mb__after_atomic();
				4169	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
				4170	}
				4171
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4172	/*
Qu Wenruo	a3efb2f	2020-10-21 14:24:49 +0800	[diff] [blame]	4173	* Lock extent buffer status and pages for writeback.
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4174	*
Qu Wenruo	a3efb2f	2020-10-21 14:24:49 +0800	[diff] [blame]	4175	* May try to flush write bio if we can't get the lock.
				4176	*
				4177	* Return 0 if the extent buffer doesn't need to be submitted.
				4178	* (E.g. the extent buffer is not dirty)
				4179	* Return >0 is the extent buffer is submitted to bio.
				4180	* Return <0 if something went wrong, no page is locked.
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4181	*/
David Sterba	9df76fb	2019-03-20 11:21:41 +0100	[diff] [blame]	4182	static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb,
Chris Mason	0e378df	2014-05-19 20:55:27 -0700	[diff] [blame]	4183	struct extent_page_data *epd)
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4184	{
David Sterba	9df76fb	2019-03-20 11:21:41 +0100	[diff] [blame]	4185	struct btrfs_fs_info *fs_info = eb->fs_info;
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4186	int i, num_pages, failed_page_nr;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4187	int flush = 0;
				4188	int ret = 0;
				4189
				4190	if (!btrfs_try_tree_write_lock(eb)) {
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	4191	ret = flush_write_bio(epd);
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4192	if (ret < 0)
				4193	return ret;
				4194	flush = 1;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4195	btrfs_tree_lock(eb);
				4196	}
				4197
				4198	if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) {
				4199	btrfs_tree_unlock(eb);
				4200	if (!epd->sync_io)
				4201	return 0;
				4202	if (!flush) {
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	4203	ret = flush_write_bio(epd);
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4204	if (ret < 0)
				4205	return ret;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4206	flush = 1;
				4207	}
Chris Mason	a098d8e8	2012-03-21 12:09:56 -0400	[diff] [blame]	4208	while (1) {
				4209	wait_on_extent_buffer_writeback(eb);
				4210	btrfs_tree_lock(eb);
				4211	if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags))
				4212	break;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4213	btrfs_tree_unlock(eb);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4214	}
				4215	}
				4216
Josef Bacik	51561ff	2012-07-20 16:25:24 -0400	[diff] [blame]	4217	/*
				4218	* We need to do this to prevent races in people who check if the eb is
				4219	* under IO since we can end up having no IO bits set for a short period
				4220	* of time.
				4221	*/
				4222	spin_lock(&eb->refs_lock);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4223	if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
				4224	set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
Josef Bacik	51561ff	2012-07-20 16:25:24 -0400	[diff] [blame]	4225	spin_unlock(&eb->refs_lock);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4226	btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
Nikolay Borisov	104b4e5	2017-06-20 21:01:20 +0300	[diff] [blame]	4227	percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
				4228	-eb->len,
				4229	fs_info->dirty_metadata_batch);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4230	ret = 1;
Josef Bacik	51561ff	2012-07-20 16:25:24 -0400	[diff] [blame]	4231	} else {
				4232	spin_unlock(&eb->refs_lock);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4233	}
				4234
				4235	btrfs_tree_unlock(eb);
				4236
Qu Wenruo	f3156df	2021-04-06 08:36:02 +0800	[diff] [blame]	4237	/*
				4238	* Either we don't need to submit any tree block, or we're submitting
				4239	* subpage eb.
				4240	* Subpage metadata doesn't use page locking at all, so we can skip
				4241	* the page locking.
				4242	*/
				4243	if (!ret \|\| fs_info->sectorsize < PAGE_SIZE)
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4244	return ret;
				4245
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	4246	num_pages = num_extent_pages(eb);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4247	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	4248	struct page *p = eb->pages[i];
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4249
				4250	if (!trylock_page(p)) {
				4251	if (!flush) {
Filipe Manana	18dfa71	2019-09-11 17:42:00 +0100	[diff] [blame]	4252	int err;
				4253
				4254	err = flush_write_bio(epd);
				4255	if (err < 0) {
				4256	ret = err;
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4257	failed_page_nr = i;
				4258	goto err_unlock;
				4259	}
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4260	flush = 1;
				4261	}
				4262	lock_page(p);
				4263	}
				4264	}
				4265
				4266	return ret;
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4267	err_unlock:
				4268	/* Unlock already locked pages */
				4269	for (i = 0; i < failed_page_nr; i++)
				4270	unlock_page(eb->pages[i]);
Filipe Manana	18dfa71	2019-09-11 17:42:00 +0100	[diff] [blame]	4271	/*
				4272	* Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it.
				4273	* Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can
				4274	* be made and undo everything done before.
				4275	*/
				4276	btrfs_tree_lock(eb);
				4277	spin_lock(&eb->refs_lock);
				4278	set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
				4279	end_extent_buffer_writeback(eb);
				4280	spin_unlock(&eb->refs_lock);
				4281	percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len,
				4282	fs_info->dirty_metadata_batch);
				4283	btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
				4284	btrfs_tree_unlock(eb);
Qu Wenruo	2e3c251	2019-03-20 14:27:46 +0800	[diff] [blame]	4285	return ret;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4286	}
				4287
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4288	static void set_btree_ioerr(struct page page, struct extent_buffer eb)
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4289	{
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4290	struct btrfs_fs_info *fs_info = eb->fs_info;
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4291
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4292	btrfs_page_set_error(fs_info, page, eb->start, eb->len);
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4293	if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags))
				4294	return;
				4295
				4296	/*
Josef Bacik	c2e3930	2021-11-24 14:14:23 -0500	[diff] [blame]	4297	* A read may stumble upon this buffer later, make sure that it gets an
				4298	* error and knows there was an error.
				4299	*/
				4300	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
				4301
				4302	/*
Josef Bacik	68b8558	2021-11-24 14:14:25 -0500	[diff] [blame]	4303	* We need to set the mapping with the io error as well because a write
				4304	* error will flip the file system readonly, and then syncfs() will
				4305	* return a 0 because we are readonly if we don't modify the err seq for
				4306	* the superblock.
				4307	*/
				4308	mapping_set_error(page->mapping, -EIO);
				4309
				4310	/*
Dennis Zhou	eb5b64f	2019-09-13 14:54:07 +0100	[diff] [blame]	4311	* If we error out, we should add back the dirty_metadata_bytes
				4312	* to make it consistent.
				4313	*/
Dennis Zhou	eb5b64f	2019-09-13 14:54:07 +0100	[diff] [blame]	4314	percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
				4315	eb->len, fs_info->dirty_metadata_batch);
				4316
				4317	/*
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4318	* If writeback for a btree extent that doesn't belong to a log tree
				4319	* failed, increment the counter transaction->eb_write_errors.
				4320	* We do this because while the transaction is running and before it's
				4321	* committing (when we call filemap_fdata[write\|wait]_range against
				4322	* the btree inode), we might have
				4323	* btree_inode->i_mapping->a_ops->writepages() called by the VM - if it
				4324	* returns an error or an error happens during writeback, when we're
				4325	* committing the transaction we wouldn't know about it, since the pages
				4326	* can be no longer dirty nor marked anymore for writeback (if a
				4327	* subsequent modification to the extent buffer didn't happen before the
				4328	* transaction commit), which makes filemap_fdata[write\|wait]_range not
				4329	* able to find the pages tagged with SetPageError at transaction
				4330	* commit time. So if this happens we must abort the transaction,
				4331	* otherwise we commit a super block with btree roots that point to
				4332	* btree nodes/leafs whose content on disk is invalid - either garbage
				4333	* or the content of some node/leaf from a past generation that got
				4334	* cowed or deleted and is no longer valid.
				4335	*
				4336	* Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would
				4337	* not be enough - we need to distinguish between log tree extents vs
				4338	* non-log tree extents, and the next filemap_fdatawait_range() call
				4339	* will catch and clear such errors in the mapping - and that call might
				4340	* be from a log sync and not from a transaction commit. Also, checking
				4341	* for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is
				4342	* not done and would not be reliable - the eb might have been released
				4343	* from memory and reading it back again means that flag would not be
				4344	* set (since it's a runtime flag, not persisted on disk).
				4345	*
				4346	* Using the flags below in the btree inode also makes us achieve the
				4347	* goal of AS_EIO/AS_ENOSPC when writepages() returns success, started
				4348	* writeback for all dirty pages and before filemap_fdatawait_range()
				4349	* is called, the writeback for all dirty pages had already finished
				4350	* with errors - because we were not using AS_EIO/AS_ENOSPC,
				4351	* filemap_fdatawait_range() would return success, as it could not know
				4352	* that writeback errors happened (the pages were no longer tagged for
				4353	* writeback).
				4354	*/
				4355	switch (eb->log_index) {
				4356	case -1:
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4357	set_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags);
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4358	break;
				4359	case 0:
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4360	set_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags);
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4361	break;
				4362	case 1:
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4363	set_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags);
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	4364	break;
				4365	default:
				4366	BUG(); /* unexpected, logic error */
				4367	}
				4368	}
				4369
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	4370	/*
				4371	* The endio specific version which won't touch any unsafe spinlock in endio
				4372	* context.
				4373	*/
				4374	static struct extent_buffer *find_extent_buffer_nolock(
				4375	struct btrfs_fs_info *fs_info, u64 start)
				4376	{
				4377	struct extent_buffer *eb;
				4378
				4379	rcu_read_lock();
				4380	eb = radix_tree_lookup(&fs_info->buffer_radix,
				4381	start >> fs_info->sectorsize_bits);
				4382	if (eb && atomic_inc_not_zero(&eb->refs)) {
				4383	rcu_read_unlock();
				4384	return eb;
				4385	}
				4386	rcu_read_unlock();
				4387	return NULL;
				4388	}
				4389
				4390	/*
				4391	* The endio function for subpage extent buffer write.
				4392	*
				4393	* Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
				4394	* after all extent buffers in the page has finished their writeback.
				4395	*/
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4396	static void end_bio_subpage_eb_writepage(struct bio *bio)
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	4397	{
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4398	struct btrfs_fs_info *fs_info;
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	4399	struct bio_vec *bvec;
				4400	struct bvec_iter_all iter_all;
				4401
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4402	fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
				4403	ASSERT(fs_info->sectorsize < PAGE_SIZE);
				4404
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	4405	ASSERT(!bio_flagged(bio, BIO_CLONED));
				4406	bio_for_each_segment_all(bvec, bio, iter_all) {
				4407	struct page *page = bvec->bv_page;
				4408	u64 bvec_start = page_offset(page) + bvec->bv_offset;
				4409	u64 bvec_end = bvec_start + bvec->bv_len - 1;
				4410	u64 cur_bytenr = bvec_start;
				4411
				4412	ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
				4413
				4414	/* Iterate through all extent buffers in the range */
				4415	while (cur_bytenr <= bvec_end) {
				4416	struct extent_buffer *eb;
				4417	int done;
				4418
				4419	/*
				4420	* Here we can't use find_extent_buffer(), as it may
				4421	* try to lock eb->refs_lock, which is not safe in endio
				4422	* context.
				4423	*/
				4424	eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
				4425	ASSERT(eb);
				4426
				4427	cur_bytenr = eb->start + eb->len;
				4428
				4429	ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
				4430	done = atomic_dec_and_test(&eb->io_pages);
				4431	ASSERT(done);
				4432
				4433	if (bio->bi_status \|\|
				4434	test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
				4435	ClearPageUptodate(page);
				4436	set_btree_ioerr(page, eb);
				4437	}
				4438
				4439	btrfs_subpage_clear_writeback(fs_info, page, eb->start,
				4440	eb->len);
				4441	end_extent_buffer_writeback(eb);
				4442	/*
				4443	* free_extent_buffer() will grab spinlock which is not
				4444	* safe in endio context. Thus here we manually dec
				4445	* the ref.
				4446	*/
				4447	atomic_dec(&eb->refs);
				4448	}
				4449	}
				4450	bio_put(bio);
				4451	}
				4452
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	4453	static void end_bio_extent_buffer_writepage(struct bio *bio)
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4454	{
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	4455	struct bio_vec *bvec;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4456	struct extent_buffer *eb;
Christoph Hellwig	2b070cf	2019-04-25 09:03:00 +0200	[diff] [blame]	4457	int done;
Ming Lei	6dc4f10	2019-02-15 19:13:19 +0800	[diff] [blame]	4458	struct bvec_iter_all iter_all;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4459
David Sterba	c09abff	2017-07-13 18:10:07 +0200	[diff] [blame]	4460	ASSERT(!bio_flagged(bio, BIO_CLONED));
Christoph Hellwig	2b070cf	2019-04-25 09:03:00 +0200	[diff] [blame]	4461	bio_for_each_segment_all(bvec, bio, iter_all) {
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4462	struct page *page = bvec->bv_page;
				4463
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4464	eb = (struct extent_buffer *)page->private;
				4465	BUG_ON(!eb);
				4466	done = atomic_dec_and_test(&eb->io_pages);
				4467
Christoph Hellwig	4e4cbee	2017-06-03 09:38:06 +0200	[diff] [blame]	4468	if (bio->bi_status \|\|
Christoph Hellwig	4246a0b	2015-07-20 15:29:37 +0200	[diff] [blame]	4469	test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4470	ClearPageUptodate(page);
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4471	set_btree_ioerr(page, eb);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4472	}
				4473
				4474	end_page_writeback(page);
				4475
				4476	if (!done)
				4477	continue;
				4478
				4479	end_extent_buffer_writeback(eb);
Kent Overstreet	2c30c71	2013-11-07 12:20:26 -0800	[diff] [blame]	4480	}
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4481
				4482	bio_put(bio);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4483	}
				4484
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4485	static void prepare_eb_write(struct extent_buffer *eb)
				4486	{
				4487	u32 nritems;
				4488	unsigned long start;
				4489	unsigned long end;
				4490
				4491	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
				4492	atomic_set(&eb->io_pages, num_extent_pages(eb));
				4493
				4494	/* Set btree blocks beyond nritems with 0 to avoid stale content */
				4495	nritems = btrfs_header_nritems(eb);
				4496	if (btrfs_header_level(eb) > 0) {
				4497	end = btrfs_node_key_ptr_offset(nritems);
				4498	memzero_extent_buffer(eb, end, eb->len - end);
				4499	} else {
				4500	/*
				4501	* Leaf:
				4502	* header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
				4503	*/
				4504	start = btrfs_item_nr_offset(nritems);
				4505	end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
				4506	memzero_extent_buffer(eb, start, end - start);
				4507	}
				4508	}
				4509
Qu Wenruo	35b6ddf	2021-04-06 08:36:01 +0800	[diff] [blame]	4510	/*
				4511	* Unlike the work in write_one_eb(), we rely completely on extent locking.
				4512	* Page locking is only utilized at minimum to keep the VMM code happy.
Qu Wenruo	35b6ddf	2021-04-06 08:36:01 +0800	[diff] [blame]	4513	*/
				4514	static int write_one_subpage_eb(struct extent_buffer *eb,
				4515	struct writeback_control *wbc,
				4516	struct extent_page_data *epd)
				4517	{
				4518	struct btrfs_fs_info *fs_info = eb->fs_info;
				4519	struct page *page = eb->pages[0];
				4520	unsigned int write_flags = wbc_to_write_flags(wbc) \| REQ_META;
				4521	bool no_dirty_ebs = false;
				4522	int ret;
				4523
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4524	prepare_eb_write(eb);
				4525
Qu Wenruo	35b6ddf	2021-04-06 08:36:01 +0800	[diff] [blame]	4526	/* clear_page_dirty_for_io() in subpage helper needs page locked */
				4527	lock_page(page);
				4528	btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
				4529
				4530	/* Check if this is the last dirty bit to update nr_written */
				4531	no_dirty_ebs = btrfs_subpage_clear_and_test_dirty(fs_info, page,
				4532	eb->start, eb->len);
				4533	if (no_dirty_ebs)
				4534	clear_page_dirty_for_io(page);
				4535
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4536	ret = submit_extent_page(REQ_OP_WRITE \| write_flags, wbc,
				4537	&epd->bio_ctrl, page, eb->start, eb->len,
				4538	eb->start - page_offset(page),
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4539	end_bio_subpage_eb_writepage, 0, 0, false);
Qu Wenruo	35b6ddf	2021-04-06 08:36:01 +0800	[diff] [blame]	4540	if (ret) {
				4541	btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len);
				4542	set_btree_ioerr(page, eb);
				4543	unlock_page(page);
				4544
				4545	if (atomic_dec_and_test(&eb->io_pages))
				4546	end_extent_buffer_writeback(eb);
				4547	return -EIO;
				4548	}
				4549	unlock_page(page);
				4550	/*
				4551	* Submission finished without problem, if no range of the page is
				4552	* dirty anymore, we have submitted a page. Update nr_written in wbc.
				4553	*/
				4554	if (no_dirty_ebs)
				4555	update_nr_written(wbc, 1);
				4556	return ret;
				4557	}
				4558
Chris Mason	0e378df	2014-05-19 20:55:27 -0700	[diff] [blame]	4559	static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4560	struct writeback_control *wbc,
				4561	struct extent_page_data *epd)
				4562	{
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	4563	u64 disk_bytenr = eb->start;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	4564	int i, num_pages;
Liu Bo	ff40adf	2017-08-24 18:19:48 -0600	[diff] [blame]	4565	unsigned int write_flags = wbc_to_write_flags(wbc) \| REQ_META;
Josef Bacik	d7dbe9e	2012-04-23 14:00:51 -0400	[diff] [blame]	4566	int ret = 0;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4567
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4568	prepare_eb_write(eb);
				4569
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	4570	num_pages = num_extent_pages(eb);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4571	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	4572	struct page *p = eb->pages[i];
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4573
				4574	clear_page_dirty_for_io(p);
				4575	set_page_writeback(p);
David Sterba	0ceb34b	2020-02-05 19:09:28 +0100	[diff] [blame]	4576	ret = submit_extent_page(REQ_OP_WRITE \| write_flags, wbc,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4577	&epd->bio_ctrl, p, disk_bytenr,
				4578	PAGE_SIZE, 0,
Mike Christie	1f7ad75	2016-06-05 14:31:51 -0500	[diff] [blame]	4579	end_bio_extent_buffer_writepage,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4580	0, 0, false);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4581	if (ret) {
Qu Wenruo	5a2c607	2021-03-25 15:14:44 +0800	[diff] [blame]	4582	set_btree_ioerr(p, eb);
Takafumi Kubota	fe01aa6	2017-02-09 17:24:33 +0900	[diff] [blame]	4583	if (PageWriteback(p))
				4584	end_page_writeback(p);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4585	if (atomic_sub_and_test(num_pages - i, &eb->io_pages))
				4586	end_extent_buffer_writeback(eb);
				4587	ret = -EIO;
				4588	break;
				4589	}
Qu Wenruo	0c64c33	2021-01-06 09:01:40 +0800	[diff] [blame]	4590	disk_bytenr += PAGE_SIZE;
David Sterba	3d4b949	2017-02-10 19:33:41 +0100	[diff] [blame]	4591	update_nr_written(wbc, 1);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4592	unlock_page(p);
				4593	}
				4594
				4595	if (unlikely(ret)) {
				4596	for (; i < num_pages; i++) {
Chris Mason	bbf65cf	2014-10-04 09:56:45 -0700	[diff] [blame]	4597	struct page *p = eb->pages[i];
Liu Bo	8146502	2014-09-23 22:22:33 +0800	[diff] [blame]	4598	clear_page_dirty_for_io(p);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4599	unlock_page(p);
				4600	}
				4601	}
				4602
				4603	return ret;
				4604	}
				4605
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4606	/*
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4607	* Submit one subpage btree page.
				4608	*
				4609	* The main difference to submit_eb_page() is:
				4610	* - Page locking
				4611	* For subpage, we don't rely on page locking at all.
				4612	*
				4613	* - Flush write bio
				4614	* We only flush bio if we may be unable to fit current extent buffers into
				4615	* current bio.
				4616	*
				4617	* Return >=0 for the number of submitted extent buffers.
				4618	* Return <0 for fatal error.
				4619	*/
				4620	static int submit_eb_subpage(struct page *page,
				4621	struct writeback_control *wbc,
				4622	struct extent_page_data *epd)
				4623	{
				4624	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
				4625	int submitted = 0;
				4626	u64 page_start = page_offset(page);
				4627	int bit_start = 0;
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4628	int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
				4629	int ret;
				4630
				4631	/* Lock and write each dirty extent buffers in the range */
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	4632	while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4633	struct btrfs_subpage subpage = (struct btrfs_subpage )page->private;
				4634	struct extent_buffer *eb;
				4635	unsigned long flags;
				4636	u64 start;
				4637
				4638	/*
				4639	* Take private lock to ensure the subpage won't be detached
				4640	* in the meantime.
				4641	*/
				4642	spin_lock(&page->mapping->private_lock);
				4643	if (!PagePrivate(page)) {
				4644	spin_unlock(&page->mapping->private_lock);
				4645	break;
				4646	}
				4647	spin_lock_irqsave(&subpage->lock, flags);
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	4648	if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
				4649	subpage->bitmaps)) {
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4650	spin_unlock_irqrestore(&subpage->lock, flags);
				4651	spin_unlock(&page->mapping->private_lock);
				4652	bit_start++;
				4653	continue;
				4654	}
				4655
				4656	start = page_start + bit_start * fs_info->sectorsize;
				4657	bit_start += sectors_per_node;
				4658
				4659	/*
				4660	* Here we just want to grab the eb without touching extra
				4661	* spin locks, so call find_extent_buffer_nolock().
				4662	*/
				4663	eb = find_extent_buffer_nolock(fs_info, start);
				4664	spin_unlock_irqrestore(&subpage->lock, flags);
				4665	spin_unlock(&page->mapping->private_lock);
				4666
				4667	/*
				4668	* The eb has already reached 0 refs thus find_extent_buffer()
				4669	* doesn't return it. We don't need to write back such eb
				4670	* anyway.
				4671	*/
				4672	if (!eb)
				4673	continue;
				4674
				4675	ret = lock_extent_buffer_for_io(eb, epd);
				4676	if (ret == 0) {
				4677	free_extent_buffer(eb);
				4678	continue;
				4679	}
				4680	if (ret < 0) {
				4681	free_extent_buffer(eb);
				4682	goto cleanup;
				4683	}
Qu Wenruo	fa04c16	2021-04-27 12:53:35 +0800	[diff] [blame]	4684	ret = write_one_subpage_eb(eb, wbc, epd);
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4685	free_extent_buffer(eb);
				4686	if (ret < 0)
				4687	goto cleanup;
				4688	submitted++;
				4689	}
				4690	return submitted;
				4691
				4692	cleanup:
				4693	/* We hit error, end bio for the submitted extent buffers */
				4694	end_write_bio(epd, ret);
				4695	return ret;
				4696	}
				4697
				4698	/*
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4699	* Submit all page(s) of one extent buffer.
				4700	*
				4701	* @page: the page of one extent buffer
				4702	* @eb_context: to determine if we need to submit this page, if current page
				4703	* belongs to this eb, we don't need to submit
				4704	*
				4705	* The caller should pass each page in their bytenr order, and here we use
				4706	* @eb_context to determine if we have submitted pages of one extent buffer.
				4707	*
				4708	* If we have, we just skip until we hit a new page that doesn't belong to
				4709	* current @eb_context.
				4710	*
				4711	* If not, we submit all the page(s) of the extent buffer.
				4712	*
				4713	* Return >0 if we have submitted the extent buffer successfully.
				4714	* Return 0 if we don't need to submit the page, as it's already submitted by
				4715	* previous call.
				4716	* Return <0 for fatal error.
				4717	*/
				4718	static int submit_eb_page(struct page page, struct writeback_control wbc,
				4719	struct extent_page_data *epd,
				4720	struct extent_buffer **eb_context)
				4721	{
				4722	struct address_space *mapping = page->mapping;
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4723	struct btrfs_block_group *cache = NULL;
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4724	struct extent_buffer *eb;
				4725	int ret;
				4726
				4727	if (!PagePrivate(page))
				4728	return 0;
				4729
Qu Wenruo	c4aec29	2021-04-06 08:36:03 +0800	[diff] [blame]	4730	if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
				4731	return submit_eb_subpage(page, wbc, epd);
				4732
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4733	spin_lock(&mapping->private_lock);
				4734	if (!PagePrivate(page)) {
				4735	spin_unlock(&mapping->private_lock);
				4736	return 0;
				4737	}
				4738
				4739	eb = (struct extent_buffer *)page->private;
				4740
				4741	/*
				4742	* Shouldn't happen and normally this would be a BUG_ON but no point
				4743	* crashing the machine for something we can survive anyway.
				4744	*/
				4745	if (WARN_ON(!eb)) {
				4746	spin_unlock(&mapping->private_lock);
				4747	return 0;
				4748	}
				4749
				4750	if (eb == *eb_context) {
				4751	spin_unlock(&mapping->private_lock);
				4752	return 0;
				4753	}
				4754	ret = atomic_inc_not_zero(&eb->refs);
				4755	spin_unlock(&mapping->private_lock);
				4756	if (!ret)
				4757	return 0;
				4758
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4759	if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
				4760	/*
				4761	* If for_sync, this hole will be filled with
				4762	* trasnsaction commit.
				4763	*/
				4764	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
				4765	ret = -EAGAIN;
				4766	else
				4767	ret = 0;
				4768	free_extent_buffer(eb);
				4769	return ret;
				4770	}
				4771
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4772	*eb_context = eb;
				4773
				4774	ret = lock_extent_buffer_for_io(eb, epd);
				4775	if (ret <= 0) {
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4776	btrfs_revert_meta_write_pointer(cache, eb);
				4777	if (cache)
				4778	btrfs_put_block_group(cache);
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4779	free_extent_buffer(eb);
				4780	return ret;
				4781	}
Naohiro Aota	be1a1d7	2021-08-19 21:19:23 +0900	[diff] [blame]	4782	if (cache) {
				4783	/* Impiles write in zoned mode */
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4784	btrfs_put_block_group(cache);
Naohiro Aota	be1a1d7	2021-08-19 21:19:23 +0900	[diff] [blame]	4785	/* Mark the last eb in a block group */
				4786	if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
				4787	set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
				4788	}
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4789	ret = write_one_eb(eb, wbc, epd);
				4790	free_extent_buffer(eb);
				4791	if (ret < 0)
				4792	return ret;
				4793	return 1;
				4794	}
				4795
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4796	int btree_write_cache_pages(struct address_space *mapping,
				4797	struct writeback_control *wbc)
				4798	{
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4799	struct extent_buffer *eb_context = NULL;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4800	struct extent_page_data epd = {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	4801	.bio_ctrl = { 0 },
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4802	.extent_locked = 0,
				4803	.sync_io = wbc->sync_mode == WB_SYNC_ALL,
				4804	};
Qu Wenruo	b3ff8f1	2020-02-12 14:12:44 +0800	[diff] [blame]	4805	struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4806	int ret = 0;
				4807	int done = 0;
				4808	int nr_to_write_done = 0;
				4809	struct pagevec pvec;
				4810	int nr_pages;
				4811	pgoff_t index;
				4812	pgoff_t end; /* Inclusive */
				4813	int scanned = 0;
Matthew Wilcox	10bbd23	2017-12-05 17:30:38 -0500	[diff] [blame]	4814	xa_mark_t tag;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4815
Mel Gorman	8667982	2017-11-15 17:37:52 -0800	[diff] [blame]	4816	pagevec_init(&pvec);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4817	if (wbc->range_cyclic) {
				4818	index = mapping->writeback_index; /* Start from prev offset */
				4819	end = -1;
Josef Bacik	556755a	2020-01-03 10:38:44 -0500	[diff] [blame]	4820	/*
				4821	* Start from the beginning does not need to cycle over the
				4822	* range, mark it as scanned.
				4823	*/
				4824	scanned = (index == 0);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4825	} else {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	4826	index = wbc->range_start >> PAGE_SHIFT;
				4827	end = wbc->range_end >> PAGE_SHIFT;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4828	scanned = 1;
				4829	}
				4830	if (wbc->sync_mode == WB_SYNC_ALL)
				4831	tag = PAGECACHE_TAG_TOWRITE;
				4832	else
				4833	tag = PAGECACHE_TAG_DIRTY;
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4834	btrfs_zoned_meta_io_lock(fs_info);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4835	retry:
				4836	if (wbc->sync_mode == WB_SYNC_ALL)
				4837	tag_pages_for_writeback(mapping, index, end);
				4838	while (!done && !nr_to_write_done && (index <= end) &&
Jan Kara	4006f43	2017-11-15 17:34:37 -0800	[diff] [blame]	4839	(nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
Jan Kara	67fd707	2017-11-15 17:35:19 -0800	[diff] [blame]	4840	tag))) {
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4841	unsigned i;
				4842
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4843	for (i = 0; i < nr_pages; i++) {
				4844	struct page *page = pvec.pages[i];
				4845
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4846	ret = submit_eb_page(page, wbc, &epd, &eb_context);
				4847	if (ret == 0)
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4848	continue;
Qu Wenruo	f91e0d0	2020-12-02 14:48:00 +0800	[diff] [blame]	4849	if (ret < 0) {
Filipe Manana	0607eb1d	2019-09-11 17:42:28 +0100	[diff] [blame]	4850	done = 1;
Filipe Manana	0607eb1d	2019-09-11 17:42:28 +0100	[diff] [blame]	4851	break;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4852	}
				4853
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4854	/*
				4855	* the filesystem may choose to bump up nr_to_write.
				4856	* We have to make sure to honor the new nr_to_write
				4857	* at any time
				4858	*/
				4859	nr_to_write_done = wbc->nr_to_write <= 0;
				4860	}
				4861	pagevec_release(&pvec);
				4862	cond_resched();
				4863	}
				4864	if (!scanned && !done) {
				4865	/*
				4866	* We hit the last page and there is more work to be done: wrap
				4867	* back to the start of the file
				4868	*/
				4869	scanned = 1;
				4870	index = 0;
				4871	goto retry;
				4872	}
Qu Wenruo	2b952ee	2019-03-20 14:27:43 +0800	[diff] [blame]	4873	if (ret < 0) {
				4874	end_write_bio(&epd, ret);
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4875	goto out;
Qu Wenruo	2b952ee	2019-03-20 14:27:43 +0800	[diff] [blame]	4876	}
Qu Wenruo	b3ff8f1	2020-02-12 14:12:44 +0800	[diff] [blame]	4877	/*
				4878	* If something went wrong, don't allow any metadata write bio to be
				4879	* submitted.
				4880	*
				4881	* This would prevent use-after-free if we had dirty pages not
				4882	* cleaned up, which can still happen by fuzzed images.
				4883	*
				4884	* - Bad extent tree
				4885	* Allowing existing tree block to be allocated for other trees.
				4886	*
				4887	* - Log tree operations
				4888	* Exiting tree blocks get allocated to log tree, bumps its
				4889	* generation, then get cleaned in tree re-balance.
				4890	* Such tree block will not be written back, since it's clean,
				4891	* thus no WRITTEN flag set.
				4892	* And after log writes back, this tree block is not traced by
				4893	* any dirty extent_io_tree.
				4894	*
				4895	* - Offending tree block gets re-dirtied from its original owner
				4896	* Since it has bumped generation, no WRITTEN flag, it can be
				4897	* reused without COWing. This tree block will not be traced
				4898	* by btrfs_transaction::dirty_pages.
				4899	*
				4900	* Now such dirty tree block will not be cleaned by any dirty
				4901	* extent io tree. Thus we don't want to submit such wild eb
				4902	* if the fs already has error.
				4903	*/
Josef Bacik	8496153	2021-10-05 16:35:25 -0400	[diff] [blame]	4904	if (!BTRFS_FS_ERROR(fs_info)) {
Qu Wenruo	b3ff8f1	2020-02-12 14:12:44 +0800	[diff] [blame]	4905	ret = flush_write_bio(&epd);
				4906	} else {
Josef Bacik	fbabd4a	2020-07-21 10:38:37 -0400	[diff] [blame]	4907	ret = -EROFS;
Qu Wenruo	b3ff8f1	2020-02-12 14:12:44 +0800	[diff] [blame]	4908	end_write_bio(&epd, ret);
				4909	}
Naohiro Aota	0bc09ca	2021-02-04 19:22:08 +0900	[diff] [blame]	4910	out:
				4911	btrfs_zoned_meta_io_unlock(fs_info);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	4912	return ret;
				4913	}
				4914
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4915	/**
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	4916	* Walk the list of dirty pages of the given address space and write all of them.
				4917	*
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4918	* @mapping: address space structure to write
Nikolay Borisov	3bed2da	2021-01-22 11:58:03 +0200	[diff] [blame]	4919	* @wbc: subtract the number of written pages from *@wbc->nr_to_write
				4920	* @epd: holds context for the write, namely the bio
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4921	*
				4922	* If a page is already under I/O, write_cache_pages() skips it, even
				4923	* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
				4924	* but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
				4925	* and msync() need to guarantee that all the data which was dirty at the time
				4926	* the call was made get new I/O started against them. If wbc->sync_mode is
				4927	* WB_SYNC_ALL then we were called for data integrity and we must wait for
				4928	* existing IO to complete.
				4929	*/
David Sterba	4242b64	2017-02-10 19:38:24 +0100	[diff] [blame]	4930	static int extent_write_cache_pages(struct address_space *mapping,
Chris Mason	4bef084	2008-09-08 11:18:08 -0400	[diff] [blame]	4931	struct writeback_control *wbc,
David Sterba	aab6e9e	2017-11-30 18:00:02 +0100	[diff] [blame]	4932	struct extent_page_data *epd)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4933	{
Josef Bacik	7fd1a3f	2012-06-27 17:18:41 -0400	[diff] [blame]	4934	struct inode *inode = mapping->host;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4935	int ret = 0;
				4936	int done = 0;
Chris Mason	f85d7d6c	2009-09-18 16:03:16 -0400	[diff] [blame]	4937	int nr_to_write_done = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4938	struct pagevec pvec;
				4939	int nr_pages;
				4940	pgoff_t index;
				4941	pgoff_t end; /* Inclusive */
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	4942	pgoff_t done_index;
				4943	int range_whole = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4944	int scanned = 0;
Matthew Wilcox	10bbd23	2017-12-05 17:30:38 -0500	[diff] [blame]	4945	xa_mark_t tag;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4946
Josef Bacik	7fd1a3f	2012-06-27 17:18:41 -0400	[diff] [blame]	4947	/*
				4948	* We have to hold onto the inode so that ordered extents can do their
				4949	* work when the IO finishes. The alternative to this is failing to add
				4950	* an ordered extent if the igrab() fails there and that is a huge pain
				4951	* to deal with, so instead just hold onto the inode throughout the
				4952	* writepages operation. If it fails here we are freeing up the inode
				4953	* anyway and we'd rather not waste our time writing out stuff that is
				4954	* going to be truncated anyway.
				4955	*/
				4956	if (!igrab(inode))
				4957	return 0;
				4958
Mel Gorman	8667982	2017-11-15 17:37:52 -0800	[diff] [blame]	4959	pagevec_init(&pvec);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4960	if (wbc->range_cyclic) {
				4961	index = mapping->writeback_index; /* Start from prev offset */
				4962	end = -1;
Josef Bacik	556755a	2020-01-03 10:38:44 -0500	[diff] [blame]	4963	/*
				4964	* Start from the beginning does not need to cycle over the
				4965	* range, mark it as scanned.
				4966	*/
				4967	scanned = (index == 0);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4968	} else {
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	4969	index = wbc->range_start >> PAGE_SHIFT;
				4970	end = wbc->range_end >> PAGE_SHIFT;
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	4971	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
				4972	range_whole = 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4973	scanned = 1;
				4974	}
Ethan Lien	3cd24c6	2018-11-01 14:49:03 +0800	[diff] [blame]	4975
				4976	/*
				4977	* We do the tagged writepage as long as the snapshot flush bit is set
				4978	* and we are the first one who do the filemap_flush() on this inode.
				4979	*
				4980	* The nr_to_write == LONG_MAX is needed to make sure other flushers do
				4981	* not race in and drop the bit.
				4982	*/
				4983	if (range_whole && wbc->nr_to_write == LONG_MAX &&
				4984	test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
				4985	&BTRFS_I(inode)->runtime_flags))
				4986	wbc->tagged_writepages = 1;
				4987
				4988	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
Josef Bacik	f7aaa06	2011-07-15 21:26:38 +0000	[diff] [blame]	4989	tag = PAGECACHE_TAG_TOWRITE;
				4990	else
				4991	tag = PAGECACHE_TAG_DIRTY;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4992	retry:
Ethan Lien	3cd24c6	2018-11-01 14:49:03 +0800	[diff] [blame]	4993	if (wbc->sync_mode == WB_SYNC_ALL \|\| wbc->tagged_writepages)
Josef Bacik	f7aaa06	2011-07-15 21:26:38 +0000	[diff] [blame]	4994	tag_pages_for_writeback(mapping, index, end);
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	4995	done_index = index;
Chris Mason	f85d7d6c	2009-09-18 16:03:16 -0400	[diff] [blame]	4996	while (!done && !nr_to_write_done && (index <= end) &&
Jan Kara	67fd707	2017-11-15 17:35:19 -0800	[diff] [blame]	4997	(nr_pages = pagevec_lookup_range_tag(&pvec, mapping,
				4998	&index, end, tag))) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	4999	unsigned i;
				5000
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5001	for (i = 0; i < nr_pages; i++) {
				5002	struct page *page = pvec.pages[i];
				5003
Tejun Heo	f7bddf1	2019-10-03 07:27:13 -0700	[diff] [blame]	5004	done_index = page->index + 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5005	/*
Matthew Wilcox	b93b016	2018-04-10 16:36:56 -0700	[diff] [blame]	5006	* At this point we hold neither the i_pages lock nor
				5007	* the page lock: the page may be truncated or
				5008	* invalidated (changing page->mapping to NULL),
				5009	* or even swizzled back from swapper_space to
				5010	* tmpfs file mapping
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5011	*/
Josef Bacik	c8f2f24	2013-02-11 11:33:00 -0500	[diff] [blame]	5012	if (!trylock_page(page)) {
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	5013	ret = flush_write_bio(epd);
				5014	BUG_ON(ret < 0);
Josef Bacik	c8f2f24	2013-02-11 11:33:00 -0500	[diff] [blame]	5015	lock_page(page);
Chris Mason	01d658f	2011-11-01 10:08:06 -0400	[diff] [blame]	5016	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5017
				5018	if (unlikely(page->mapping != mapping)) {
				5019	unlock_page(page);
				5020	continue;
				5021	}
				5022
Chris Mason	d2c3f4f	2008-11-19 12:44:22 -0500	[diff] [blame]	5023	if (wbc->sync_mode != WB_SYNC_NONE) {
Qu Wenruo	f434062	2019-03-20 14:27:41 +0800	[diff] [blame]	5024	if (PageWriteback(page)) {
				5025	ret = flush_write_bio(epd);
				5026	BUG_ON(ret < 0);
				5027	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5028	wait_on_page_writeback(page);
Chris Mason	d2c3f4f	2008-11-19 12:44:22 -0500	[diff] [blame]	5029	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5030
				5031	if (PageWriteback(page) \|\|
				5032	!clear_page_dirty_for_io(page)) {
				5033	unlock_page(page);
				5034	continue;
				5035	}
				5036
David Sterba	aab6e9e	2017-11-30 18:00:02 +0100	[diff] [blame]	5037	ret = __extent_writepage(page, wbc, epd);
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	5038	if (ret < 0) {
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	5039	done = 1;
				5040	break;
				5041	}
Chris Mason	f85d7d6c	2009-09-18 16:03:16 -0400	[diff] [blame]	5042
				5043	/*
				5044	* the filesystem may choose to bump up nr_to_write.
				5045	* We have to make sure to honor the new nr_to_write
				5046	* at any time
				5047	*/
				5048	nr_to_write_done = wbc->nr_to_write <= 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5049	}
				5050	pagevec_release(&pvec);
				5051	cond_resched();
				5052	}
Liu Bo	894b36e	2016-03-07 16:56:22 -0800	[diff] [blame]	5053	if (!scanned && !done) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5054	/*
				5055	* We hit the last page and there is more work to be done: wrap
				5056	* back to the start of the file
				5057	*/
				5058	scanned = 1;
				5059	index = 0;
Josef Bacik	42ffb0b	2020-01-23 15:33:02 -0500	[diff] [blame]	5060
				5061	/*
				5062	* If we're looping we could run into a page that is locked by a
				5063	* writer and that writer could be waiting on writeback for a
				5064	* page in our current bio, and thus deadlock, so flush the
				5065	* write bio here.
				5066	*/
				5067	ret = flush_write_bio(epd);
				5068	if (!ret)
				5069	goto retry;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5070	}
Liu Bo	a9132667	2016-03-07 16:56:21 -0800	[diff] [blame]	5071
				5072	if (wbc->range_cyclic \|\| (wbc->nr_to_write > 0 && range_whole))
				5073	mapping->writeback_index = done_index;
				5074
Josef Bacik	7fd1a3f	2012-06-27 17:18:41 -0400	[diff] [blame]	5075	btrfs_add_delayed_iput(inode);
Liu Bo	894b36e	2016-03-07 16:56:22 -0800	[diff] [blame]	5076	return ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5077	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5078
Nikolay Borisov	0a9b0e5	2017-12-08 15:55:59 +0200	[diff] [blame]	5079	int extent_write_full_page(struct page page, struct writeback_control wbc)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5080	{
				5081	int ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5082	struct extent_page_data epd = {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5083	.bio_ctrl = { 0 },
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5084	.extent_locked = 0,
Chris Mason	ffbd517	2009-04-20 15:50:09 -0400	[diff] [blame]	5085	.sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5086	};
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5087
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5088	ret = __extent_writepage(page, wbc, &epd);
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	5089	ASSERT(ret <= 0);
				5090	if (ret < 0) {
				5091	end_write_bio(&epd, ret);
				5092	return ret;
				5093	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5094
Qu Wenruo	3065976	2019-03-20 14:27:42 +0800	[diff] [blame]	5095	ret = flush_write_bio(&epd);
				5096	ASSERT(ret <= 0);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5097	return ret;
				5098	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5099
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5100	/*
				5101	* Submit the pages in the range to bio for call sites which delalloc range has
				5102	* already been ran (aka, ordered extent inserted) and all pages are still
				5103	* locked.
				5104	*/
				5105	int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5106	{
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5107	bool found_error = false;
				5108	int first_error = 0;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5109	int ret = 0;
				5110	struct address_space *mapping = inode->i_mapping;
				5111	struct page *page;
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5112	u64 cur = start;
Qu Wenruo	66448b9	2021-09-27 15:22:02 +0800	[diff] [blame]	5113	unsigned long nr_pages;
				5114	const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5115	struct extent_page_data epd = {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5116	.bio_ctrl = { 0 },
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5117	.extent_locked = 1,
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5118	.sync_io = 1,
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5119	};
				5120	struct writeback_control wbc_writepages = {
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5121	.sync_mode = WB_SYNC_ALL,
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5122	.range_start = start,
				5123	.range_end = end + 1,
Chris Mason	ec39f76	2019-07-10 12:28:17 -0700	[diff] [blame]	5124	/* We're called from an async helper function */
				5125	.punt_to_cgroup = 1,
				5126	.no_cgroup_owner = 1,
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5127	};
				5128
Qu Wenruo	66448b9	2021-09-27 15:22:02 +0800	[diff] [blame]	5129	ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
				5130	nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
				5131	PAGE_SHIFT;
				5132	wbc_writepages.nr_to_write = nr_pages * 2;
				5133
Chris Mason	dbb70be	2019-07-10 12:28:18 -0700	[diff] [blame]	5134	wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5135	while (cur <= end) {
Qu Wenruo	66448b9	2021-09-27 15:22:02 +0800	[diff] [blame]	5136	u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
				5137
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5138	page = find_get_page(mapping, cur >> PAGE_SHIFT);
				5139	/*
				5140	* All pages in the range are locked since
				5141	* btrfs_run_delalloc_range(), thus there is no way to clear
				5142	* the page dirty flag.
				5143	*/
Qu Wenruo	66448b9	2021-09-27 15:22:02 +0800	[diff] [blame]	5144	ASSERT(PageLocked(page));
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5145	ASSERT(PageDirty(page));
				5146	clear_page_dirty_for_io(page);
				5147	ret = __extent_writepage(page, &wbc_writepages, &epd);
				5148	ASSERT(ret <= 0);
				5149	if (ret < 0) {
				5150	found_error = true;
				5151	first_error = ret;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5152	}
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	5153	put_page(page);
Qu Wenruo	66448b9	2021-09-27 15:22:02 +0800	[diff] [blame]	5154	cur = cur_end + 1;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5155	}
				5156
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5157	if (!found_error)
Chris Mason	dbb70be	2019-07-10 12:28:18 -0700	[diff] [blame]	5158	ret = flush_write_bio(&epd);
				5159	else
Qu Wenruo	02c6db4	2019-03-20 14:27:45 +0800	[diff] [blame]	5160	end_write_bio(&epd, ret);
Chris Mason	dbb70be	2019-07-10 12:28:18 -0700	[diff] [blame]	5161
				5162	wbc_detach_inode(&wbc_writepages);
Qu Wenruo	2bd0fc9	2021-09-27 15:21:58 +0800	[diff] [blame]	5163	if (found_error)
				5164	return first_error;
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5165	return ret;
				5166	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5167
Nikolay Borisov	8ae225a	2018-04-19 10:46:38 +0300	[diff] [blame]	5168	int extent_writepages(struct address_space *mapping,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5169	struct writeback_control *wbc)
				5170	{
Johannes Thumshirn	35156d8	2021-09-09 01:19:27 +0900	[diff] [blame]	5171	struct inode *inode = mapping->host;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5172	int ret = 0;
				5173	struct extent_page_data epd = {
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5174	.bio_ctrl = { 0 },
Chris Mason	771ed68	2008-11-06 22:02:51 -0500	[diff] [blame]	5175	.extent_locked = 0,
Chris Mason	ffbd517	2009-04-20 15:50:09 -0400	[diff] [blame]	5176	.sync_io = wbc->sync_mode == WB_SYNC_ALL,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5177	};
				5178
Johannes Thumshirn	35156d8	2021-09-09 01:19:27 +0900	[diff] [blame]	5179	/*
				5180	* Allow only a single thread to do the reloc work in zoned mode to
				5181	* protect the write pointer updates.
				5182	*/
Johannes Thumshirn	869f4cd	2021-12-07 06:28:34 -0800	[diff] [blame]	5183	btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
David Sterba	935db85	2017-06-23 04:30:28 +0200	[diff] [blame]	5184	ret = extent_write_cache_pages(mapping, wbc, &epd);
Johannes Thumshirn	869f4cd	2021-12-07 06:28:34 -0800	[diff] [blame]	5185	btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
Qu Wenruo	a2a72fb	2019-03-20 14:27:48 +0800	[diff] [blame]	5186	ASSERT(ret <= 0);
				5187	if (ret < 0) {
				5188	end_write_bio(&epd, ret);
				5189	return ret;
				5190	}
				5191	ret = flush_write_bio(&epd);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5192	return ret;
				5193	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5194
Matthew Wilcox (Oracle)	ba206a0	2020-06-01 21:47:05 -0700	[diff] [blame]	5195	void extent_readahead(struct readahead_control *rac)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5196	{
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5197	struct btrfs_bio_ctrl bio_ctrl = { 0 };
Liu Bo	67c9684	2012-07-20 21:43:09 -0600	[diff] [blame]	5198	struct page *pagepool[16];
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	5199	struct extent_map *em_cached = NULL;
Filipe Manana	808f80b	2015-09-28 09:56:26 +0100	[diff] [blame]	5200	u64 prev_em_start = (u64)-1;
Matthew Wilcox (Oracle)	ba206a0	2020-06-01 21:47:05 -0700	[diff] [blame]	5201	int nr;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5202
Matthew Wilcox (Oracle)	ba206a0	2020-06-01 21:47:05 -0700	[diff] [blame]	5203	while ((nr = readahead_page_batch(rac, pagepool))) {
Matthew Wilcox (Oracle)	32c0a6b	2021-03-21 21:03:11 +0000	[diff] [blame]	5204	u64 contig_start = readahead_pos(rac);
				5205	u64 contig_end = contig_start + readahead_batch_length(rac) - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5206
Matthew Wilcox (Oracle)	ba206a0	2020-06-01 21:47:05 -0700	[diff] [blame]	5207	contiguous_readpages(pagepool, nr, contig_start, contig_end,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5208	&em_cached, &bio_ctrl, &prev_em_start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5209	}
Liu Bo	67c9684	2012-07-20 21:43:09 -0600	[diff] [blame]	5210
Miao Xie	125bac01	2013-07-25 19:22:37 +0800	[diff] [blame]	5211	if (em_cached)
				5212	free_extent_map(em_cached);
				5213
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	5214	if (bio_ctrl.bio) {
				5215	if (submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags))
Matthew Wilcox (Oracle)	ba206a0	2020-06-01 21:47:05 -0700	[diff] [blame]	5216	return;
				5217	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5218	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5219
				5220	/*
				5221	* basic invalidatepage code, this waits on any locked or writeback
				5222	* ranges corresponding to the page, and then deletes any extent state
				5223	* records from the tree
				5224	*/
				5225	int extent_invalidatepage(struct extent_io_tree *tree,
				5226	struct page *page, unsigned long offset)
				5227	{
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	5228	struct extent_state *cached_state = NULL;
Miao Xie	4eee4fa	2012-12-21 09:17:45 +0000	[diff] [blame]	5229	u64 start = page_offset(page);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	5230	u64 end = start + PAGE_SIZE - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5231	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
				5232
Qu Wenruo	829ddec	2020-11-13 20:51:39 +0800	[diff] [blame]	5233	/* This function is only called for the btree inode */
				5234	ASSERT(tree->owner == IO_TREE_BTREE_INODE_IO);
				5235
Qu Wenruo	fda2832	2013-02-26 08:10:22 +0000	[diff] [blame]	5236	start += ALIGN(offset, blocksize);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5237	if (start > end)
				5238	return 0;
				5239
David Sterba	ff13db4	2015-12-03 14:30:40 +0100	[diff] [blame]	5240	lock_extent_bits(tree, start, end, &cached_state);
Chris Mason	1edbb73	2009-09-02 13:24:36 -0400	[diff] [blame]	5241	wait_on_page_writeback(page);
Qu Wenruo	829ddec	2020-11-13 20:51:39 +0800	[diff] [blame]	5242
				5243	/*
				5244	* Currently for btree io tree, only EXTENT_LOCKED is utilized,
				5245	* so here we only need to unlock the extent range to free any
				5246	* existing extent state.
				5247	*/
				5248	unlock_extent_cached(tree, start, end, &cached_state);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5249	return 0;
				5250	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5251
				5252	/*
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5253	* a helper for releasepage, this tests for areas of the page that
				5254	* are locked or under IO and drops the related state bits if it is safe
				5255	* to drop the page.
				5256	*/
Nikolay Borisov	29c68b2d	2018-04-19 10:46:35 +0300	[diff] [blame]	5257	static int try_release_extent_state(struct extent_io_tree *tree,
Eric Sandeen	48a3b63	2013-04-25 20:41:01 +0000	[diff] [blame]	5258	struct page *page, gfp_t mask)
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5259	{
Miao Xie	4eee4fa	2012-12-21 09:17:45 +0000	[diff] [blame]	5260	u64 start = page_offset(page);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	5261	u64 end = start + PAGE_SIZE - 1;
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5262	int ret = 1;
				5263
Nikolay Borisov	8882679	2019-03-14 15:28:31 +0200	[diff] [blame]	5264	if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5265	ret = 0;
Nikolay Borisov	8882679	2019-03-14 15:28:31 +0200	[diff] [blame]	5266	} else {
Chris Mason	11ef160	2009-09-23 20:28:46 -0400	[diff] [blame]	5267	/*
Filipe Manana	2766ff6	2020-11-04 11:07:34 +0000	[diff] [blame]	5268	* At this point we can safely clear everything except the
				5269	* locked bit, the nodatasum bit and the delalloc new bit.
				5270	* The delalloc new bit will be cleared by ordered extent
				5271	* completion.
Chris Mason	11ef160	2009-09-23 20:28:46 -0400	[diff] [blame]	5272	*/
David Sterba	66b0c88	2017-10-31 16:30:47 +0100	[diff] [blame]	5273	ret = __clear_extent_bit(tree, start, end,
Filipe Manana	2766ff6	2020-11-04 11:07:34 +0000	[diff] [blame]	5274	~(EXTENT_LOCKED \| EXTENT_NODATASUM \| EXTENT_DELALLOC_NEW),
				5275	0, 0, NULL, mask, NULL);
Chris Mason	e3f24cc	2011-02-14 12:52:08 -0500	[diff] [blame]	5276
				5277	/* if clear_extent_bit failed for enomem reasons,
				5278	* we can't allow the release to continue.
				5279	*/
				5280	if (ret < 0)
				5281	ret = 0;
				5282	else
				5283	ret = 1;
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5284	}
				5285	return ret;
				5286	}
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5287
				5288	/*
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5289	* a helper for releasepage. As long as there are no locked extents
				5290	* in the range corresponding to the page, both state records and extent
				5291	* map records are removed
				5292	*/
Nikolay Borisov	477a30b	2018-04-19 10:46:34 +0300	[diff] [blame]	5293	int try_release_extent_mapping(struct page *page, gfp_t mask)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5294	{
				5295	struct extent_map *em;
Miao Xie	4eee4fa	2012-12-21 09:17:45 +0000	[diff] [blame]	5296	u64 start = page_offset(page);
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	5297	u64 end = start + PAGE_SIZE - 1;
Filipe Manana	bd3599a	2018-07-12 01:36:43 +0100	[diff] [blame]	5298	struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host);
				5299	struct extent_io_tree *tree = &btrfs_inode->io_tree;
				5300	struct extent_map_tree *map = &btrfs_inode->extent_tree;
Chris Mason	7b13b7b	2008-04-18 10:29:50 -0400	[diff] [blame]	5301
Mel Gorman	d0164ad	2015-11-06 16:28:21 -0800	[diff] [blame]	5302	if (gfpflags_allow_blocking(mask) &&
Byongho Lee	ee22184	2015-12-15 01:42:10 +0900	[diff] [blame]	5303	page->mapping->host->i_size > SZ_16M) {
Yan	39b5637	2008-02-15 10:40:50 -0500	[diff] [blame]	5304	u64 len;
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	5305	while (start <= end) {
Filipe Manana	fbc2bd7	2020-07-22 12:28:52 +0100	[diff] [blame]	5306	struct btrfs_fs_info *fs_info;
				5307	u64 cur_gen;
				5308
Yan	39b5637	2008-02-15 10:40:50 -0500	[diff] [blame]	5309	len = end - start + 1;
Chris Mason	890871b	2009-09-02 16:24:52 -0400	[diff] [blame]	5310	write_lock(&map->lock);
Yan	39b5637	2008-02-15 10:40:50 -0500	[diff] [blame]	5311	em = lookup_extent_mapping(map, start, len);
Tsutomu Itoh	285190d	2012-02-16 16:23:58 +0900	[diff] [blame]	5312	if (!em) {
Chris Mason	890871b	2009-09-02 16:24:52 -0400	[diff] [blame]	5313	write_unlock(&map->lock);
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	5314	break;
				5315	}
Chris Mason	7f3c74f	2008-07-18 12:01:11 -0400	[diff] [blame]	5316	if (test_bit(EXTENT_FLAG_PINNED, &em->flags) \|\|
				5317	em->start != start) {
Chris Mason	890871b	2009-09-02 16:24:52 -0400	[diff] [blame]	5318	write_unlock(&map->lock);
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	5319	free_extent_map(em);
				5320	break;
				5321	}
Filipe Manana	3d6448e	2020-07-22 12:28:37 +0100	[diff] [blame]	5322	if (test_range_bit(tree, em->start,
				5323	extent_map_end(em) - 1,
				5324	EXTENT_LOCKED, 0, NULL))
				5325	goto next;
				5326	/*
				5327	* If it's not in the list of modified extents, used
				5328	* by a fast fsync, we can remove it. If it's being
				5329	* logged we can safely remove it since fsync took an
				5330	* extra reference on the em.
				5331	*/
				5332	if (list_empty(&em->list) \|\|
Filipe Manana	fbc2bd7	2020-07-22 12:28:52 +0100	[diff] [blame]	5333	test_bit(EXTENT_FLAG_LOGGING, &em->flags))
				5334	goto remove_em;
				5335	/*
				5336	* If it's in the list of modified extents, remove it
				5337	* only if its generation is older then the current one,
				5338	* in which case we don't need it for a fast fsync.
				5339	* Otherwise don't remove it, we could be racing with an
				5340	* ongoing fast fsync that could miss the new extent.
				5341	*/
				5342	fs_info = btrfs_inode->root->fs_info;
				5343	spin_lock(&fs_info->trans_lock);
				5344	cur_gen = fs_info->generation;
				5345	spin_unlock(&fs_info->trans_lock);
				5346	if (em->generation >= cur_gen)
				5347	goto next;
				5348	remove_em:
Filipe Manana	5e548b3	2020-07-22 12:29:01 +0100	[diff] [blame]	5349	/*
				5350	* We only remove extent maps that are not in the list of
				5351	* modified extents or that are in the list but with a
				5352	* generation lower then the current generation, so there
				5353	* is no need to set the full fsync flag on the inode (it
				5354	* hurts the fsync performance for workloads with a data
				5355	* size that exceeds or is close to the system's memory).
				5356	*/
Filipe Manana	fbc2bd7	2020-07-22 12:28:52 +0100	[diff] [blame]	5357	remove_extent_mapping(map, em);
				5358	/* once for the rb tree */
				5359	free_extent_map(em);
Filipe Manana	3d6448e	2020-07-22 12:28:37 +0100	[diff] [blame]	5360	next:
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	5361	start = extent_map_end(em);
Chris Mason	890871b	2009-09-02 16:24:52 -0400	[diff] [blame]	5362	write_unlock(&map->lock);
Chris Mason	70dec80	2008-01-29 09:59:12 -0500	[diff] [blame]	5363
				5364	/* once for us */
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5365	free_extent_map(em);
Paul E. McKenney	9f47eb5	2020-05-08 14:15:37 -0700	[diff] [blame]	5366
				5367	cond_resched(); /* Allow large-extent preemption. */
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5368	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5369	}
Nikolay Borisov	29c68b2d	2018-04-19 10:46:35 +0300	[diff] [blame]	5370	return try_release_extent_state(tree, page, mask);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5371	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5372
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5373	/*
				5374	* helper function for fiemap, which doesn't want to see any holes.
				5375	* This maps until we find something past 'last'
				5376	*/
Nikolay Borisov	f1bbde8	2020-08-31 14:42:45 +0300	[diff] [blame]	5377	static struct extent_map get_extent_skip_holes(struct btrfs_inode inode,
David Sterba	e3350e1	2017-06-23 04:09:57 +0200	[diff] [blame]	5378	u64 offset, u64 last)
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5379	{
Nikolay Borisov	f1bbde8	2020-08-31 14:42:45 +0300	[diff] [blame]	5380	u64 sectorsize = btrfs_inode_sectorsize(inode);
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5381	struct extent_map *em;
				5382	u64 len;
				5383
				5384	if (offset >= last)
				5385	return NULL;
				5386
Dulshani Gunawardhana	6787125	2013-10-31 10:33:04 +0530	[diff] [blame]	5387	while (1) {
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5388	len = last - offset;
				5389	if (len == 0)
				5390	break;
Qu Wenruo	fda2832	2013-02-26 08:10:22 +0000	[diff] [blame]	5391	len = ALIGN(len, sectorsize);
Nikolay Borisov	f1bbde8	2020-08-31 14:42:45 +0300	[diff] [blame]	5392	em = btrfs_get_extent_fiemap(inode, offset, len);
David Sterba	c704005	2011-04-19 18:00:01 +0200	[diff] [blame]	5393	if (IS_ERR_OR_NULL(em))
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5394	return em;
				5395
				5396	/* if this isn't a hole return it */
Nikolay Borisov	4a2d25c	2017-11-23 10:51:43 +0200	[diff] [blame]	5397	if (em->block_start != EXTENT_MAP_HOLE)
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5398	return em;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5399
				5400	/* this is a hole, advance to the next extent */
				5401	offset = extent_map_end(em);
				5402	free_extent_map(em);
				5403	if (offset >= last)
				5404	break;
				5405	}
				5406	return NULL;
				5407	}
				5408
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5409	/*
				5410	* To cache previous fiemap extent
				5411	*
				5412	* Will be used for merging fiemap extent
				5413	*/
				5414	struct fiemap_cache {
				5415	u64 offset;
				5416	u64 phys;
				5417	u64 len;
				5418	u32 flags;
				5419	bool cached;
				5420	};
				5421
				5422	/*
				5423	* Helper to submit fiemap extent.
				5424	*
				5425	* Will try to merge current fiemap extent specified by @offset, @phys,
				5426	* @len and @flags with cached one.
				5427	* And only when we fails to merge, cached one will be submitted as
				5428	* fiemap extent.
				5429	*
				5430	* Return value is the same as fiemap_fill_next_extent().
				5431	*/
				5432	static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
				5433	struct fiemap_cache *cache,
				5434	u64 offset, u64 phys, u64 len, u32 flags)
				5435	{
				5436	int ret = 0;
				5437
				5438	if (!cache->cached)
				5439	goto assign;
				5440
				5441	/*
				5442	* Sanity check, extent_fiemap() should have ensured that new
Andrea Gelmini	52042d8	2018-11-28 12:05:13 +0100	[diff] [blame]	5443	* fiemap extent won't overlap with cached one.
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5444	* Not recoverable.
				5445	*
				5446	* NOTE: Physical address can overlap, due to compression
				5447	*/
				5448	if (cache->offset + cache->len > offset) {
				5449	WARN_ON(1);
				5450	return -EINVAL;
				5451	}
				5452
				5453	/*
				5454	* Only merges fiemap extents if
				5455	* 1) Their logical addresses are continuous
				5456	*
				5457	* 2) Their physical addresses are continuous
				5458	* So truly compressed (physical size smaller than logical size)
				5459	* extents won't get merged with each other
				5460	*
				5461	* 3) Share same flags except FIEMAP_EXTENT_LAST
				5462	* So regular extent won't get merged with prealloc extent
				5463	*/
				5464	if (cache->offset + cache->len == offset &&
				5465	cache->phys + cache->len == phys &&
				5466	(cache->flags & ~FIEMAP_EXTENT_LAST) ==
				5467	(flags & ~FIEMAP_EXTENT_LAST)) {
				5468	cache->len += len;
				5469	cache->flags \|= flags;
				5470	goto try_submit_last;
				5471	}
				5472
				5473	/* Not mergeable, need to submit cached one */
				5474	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
				5475	cache->len, cache->flags);
				5476	cache->cached = false;
				5477	if (ret)
				5478	return ret;
				5479	assign:
				5480	cache->cached = true;
				5481	cache->offset = offset;
				5482	cache->phys = phys;
				5483	cache->len = len;
				5484	cache->flags = flags;
				5485	try_submit_last:
				5486	if (cache->flags & FIEMAP_EXTENT_LAST) {
				5487	ret = fiemap_fill_next_extent(fieinfo, cache->offset,
				5488	cache->phys, cache->len, cache->flags);
				5489	cache->cached = false;
				5490	}
				5491	return ret;
				5492	}
				5493
				5494	/*
Qu Wenruo	848c23b	2017-06-22 10:01:21 +0800	[diff] [blame]	5495	* Emit last fiemap cache
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5496	*
Qu Wenruo	848c23b	2017-06-22 10:01:21 +0800	[diff] [blame]	5497	* The last fiemap cache may still be cached in the following case:
				5498	* 0 4k 8k
				5499	* \|<- Fiemap range ->\|
				5500	* \|<------------ First extent ----------->\|
				5501	*
				5502	* In this case, the first extent range will be cached but not emitted.
				5503	* So we must emit it before ending extent_fiemap().
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5504	*/
David Sterba	5c5aff9	2019-03-20 11:29:46 +0100	[diff] [blame]	5505	static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
Qu Wenruo	848c23b	2017-06-22 10:01:21 +0800	[diff] [blame]	5506	struct fiemap_cache *cache)
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5507	{
				5508	int ret;
				5509
				5510	if (!cache->cached)
				5511	return 0;
				5512
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5513	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
				5514	cache->len, cache->flags);
				5515	cache->cached = false;
				5516	if (ret > 0)
				5517	ret = 0;
				5518	return ret;
				5519	}
				5520
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5521	int extent_fiemap(struct btrfs_inode inode, struct fiemap_extent_info fieinfo,
David Sterba	bab16e2	2020-06-23 20:56:12 +0200	[diff] [blame]	5522	u64 start, u64 len)
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5523	{
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5524	int ret = 0;
Boris Burkov	15c7745	2021-04-06 15:31:18 -0700	[diff] [blame]	5525	u64 off;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5526	u64 max = start + len;
				5527	u32 flags = 0;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5528	u32 found_type;
				5529	u64 last;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5530	u64 last_for_get_extent = 0;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5531	u64 disko = 0;
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5532	u64 isize = i_size_read(&inode->vfs_inode);
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5533	struct btrfs_key found_key;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5534	struct extent_map *em = NULL;
Josef Bacik	2ac55d4	2010-02-03 19:33:23 +0000	[diff] [blame]	5535	struct extent_state *cached_state = NULL;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5536	struct btrfs_path *path;
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5537	struct btrfs_root *root = inode->root;
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5538	struct fiemap_cache cache = { 0 };
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5539	struct ulist *roots;
				5540	struct ulist *tmp_ulist;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5541	int end = 0;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5542	u64 em_start = 0;
				5543	u64 em_len = 0;
				5544	u64 em_end = 0;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5545
				5546	if (len == 0)
				5547	return -EINVAL;
				5548
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5549	path = btrfs_alloc_path();
				5550	if (!path)
				5551	return -ENOMEM;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5552
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5553	roots = ulist_alloc(GFP_KERNEL);
				5554	tmp_ulist = ulist_alloc(GFP_KERNEL);
				5555	if (!roots \|\| !tmp_ulist) {
				5556	ret = -ENOMEM;
				5557	goto out_free_ulist;
				5558	}
				5559
Boris Burkov	15c7745	2021-04-06 15:31:18 -0700	[diff] [blame]	5560	/*
				5561	* We can't initialize that to 'start' as this could miss extents due
				5562	* to extent item merging
				5563	*/
				5564	off = 0;
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5565	start = round_down(start, btrfs_inode_sectorsize(inode));
				5566	len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
Josef Bacik	4d479cf	2011-11-17 11:34:31 -0500	[diff] [blame]	5567
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5568	/*
				5569	* lookup the last file extent. We're not using i_size here
				5570	* because there might be preallocation past i_size
				5571	*/
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5572	ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1,
				5573	0);
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5574	if (ret < 0) {
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5575	goto out_free_ulist;
Liu Bo	2d324f5	2016-05-17 17:21:48 -0700	[diff] [blame]	5576	} else {
				5577	WARN_ON(!ret);
				5578	if (ret == 1)
				5579	ret = 0;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5580	}
Liu Bo	2d324f5	2016-05-17 17:21:48 -0700	[diff] [blame]	5581
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5582	path->slots[0]--;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5583	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
David Sterba	962a298	2014-06-04 18:41:45 +0200	[diff] [blame]	5584	found_type = found_key.type;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5585
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5586	/* No extents, but there might be delalloc bits */
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5587	if (found_key.objectid != btrfs_ino(inode) \|\|
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5588	found_type != BTRFS_EXTENT_DATA_KEY) {
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5589	/* have to trust i_size as the end */
				5590	last = (u64)-1;
				5591	last_for_get_extent = isize;
				5592	} else {
				5593	/*
				5594	* remember the start of the last extent. There are a
				5595	* bunch of different factors that go into the length of the
				5596	* extent, so its much less complex to remember where it started
				5597	*/
				5598	last = found_key.offset;
				5599	last_for_get_extent = last + 1;
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5600	}
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5601	btrfs_release_path(path);
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5602
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5603	/*
				5604	* we might have some extents allocated but more delalloc past those
				5605	* extents. so, we trust isize unless the start of the last extent is
				5606	* beyond isize
				5607	*/
				5608	if (last < isize) {
				5609	last = (u64)-1;
				5610	last_for_get_extent = isize;
				5611	}
				5612
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5613	lock_extent_bits(&inode->io_tree, start, start + len - 1,
Jeff Mahoney	d008237	2012-03-01 14:57:19 +0100	[diff] [blame]	5614	&cached_state);
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5615
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5616	em = get_extent_skip_holes(inode, start, last_for_get_extent);
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5617	if (!em)
				5618	goto out;
				5619	if (IS_ERR(em)) {
				5620	ret = PTR_ERR(em);
				5621	goto out;
				5622	}
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5623
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5624	while (!end) {
Josef Bacik	b76bb70	2013-07-05 13:52:51 -0400	[diff] [blame]	5625	u64 offset_in_extent = 0;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5626
Chris Mason	ea8efc7	2011-03-08 11:54:40 -0500	[diff] [blame]	5627	/* break if the extent we found is outside the range */
				5628	if (em->start >= max \|\| extent_map_end(em) < off)
				5629	break;
				5630
				5631	/*
				5632	* get_extent may return an extent that starts before our
				5633	* requested range. We have to make sure the ranges
				5634	* we return to fiemap always move forward and don't
				5635	* overlap, so adjust the offsets here
				5636	*/
				5637	em_start = max(em->start, off);
				5638
				5639	/*
				5640	* record the offset from the start of the extent
Josef Bacik	b76bb70	2013-07-05 13:52:51 -0400	[diff] [blame]	5641	* for adjusting the disk offset below. Only do this if the
				5642	* extent isn't compressed since our in ram offset may be past
				5643	* what we have actually allocated on disk.
Chris Mason	ea8efc7	2011-03-08 11:54:40 -0500	[diff] [blame]	5644	*/
Josef Bacik	b76bb70	2013-07-05 13:52:51 -0400	[diff] [blame]	5645	if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
				5646	offset_in_extent = em_start - em->start;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5647	em_end = extent_map_end(em);
Chris Mason	ea8efc7	2011-03-08 11:54:40 -0500	[diff] [blame]	5648	em_len = em_end - em_start;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5649	flags = 0;
Filipe Manana	f098631	2018-06-20 10:02:30 +0100	[diff] [blame]	5650	if (em->block_start < EXTENT_MAP_LAST_BYTE)
				5651	disko = em->block_start + offset_in_extent;
				5652	else
				5653	disko = 0;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5654
Chris Mason	ea8efc7	2011-03-08 11:54:40 -0500	[diff] [blame]	5655	/*
				5656	* bump off for our next call to get_extent
				5657	*/
				5658	off = extent_map_end(em);
				5659	if (off >= max)
				5660	end = 1;
				5661
Heiko Carstens	93dbfad	2009-04-03 10:33:45 -0400	[diff] [blame]	5662	if (em->block_start == EXTENT_MAP_LAST_BYTE) {
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5663	end = 1;
				5664	flags \|= FIEMAP_EXTENT_LAST;
Heiko Carstens	93dbfad	2009-04-03 10:33:45 -0400	[diff] [blame]	5665	} else if (em->block_start == EXTENT_MAP_INLINE) {
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5666	flags \|= (FIEMAP_EXTENT_DATA_INLINE \|
				5667	FIEMAP_EXTENT_NOT_ALIGNED);
Heiko Carstens	93dbfad	2009-04-03 10:33:45 -0400	[diff] [blame]	5668	} else if (em->block_start == EXTENT_MAP_DELALLOC) {
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5669	flags \|= (FIEMAP_EXTENT_DELALLOC \|
				5670	FIEMAP_EXTENT_UNKNOWN);
Josef Bacik	dc046b1	2014-09-10 16:20:45 -0400	[diff] [blame]	5671	} else if (fieinfo->fi_extents_max) {
				5672	u64 bytenr = em->block_start -
				5673	(em->start - em->orig_start);
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5674
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5675	/*
				5676	* As btrfs supports shared space, this information
				5677	* can be exported to userspace tools via
Josef Bacik	dc046b1	2014-09-10 16:20:45 -0400	[diff] [blame]	5678	* flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0
				5679	* then we're just getting a count and we can skip the
				5680	* lookup stuff.
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5681	*/
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5682	ret = btrfs_check_shared(root, btrfs_ino(inode),
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5683	bytenr, roots, tmp_ulist);
Josef Bacik	dc046b1	2014-09-10 16:20:45 -0400	[diff] [blame]	5684	if (ret < 0)
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5685	goto out_free;
Josef Bacik	dc046b1	2014-09-10 16:20:45 -0400	[diff] [blame]	5686	if (ret)
Liu Bo	fe09e16	2013-09-22 12:54:23 +0800	[diff] [blame]	5687	flags \|= FIEMAP_EXTENT_SHARED;
Josef Bacik	dc046b1	2014-09-10 16:20:45 -0400	[diff] [blame]	5688	ret = 0;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5689	}
				5690	if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
				5691	flags \|= FIEMAP_EXTENT_ENCODED;
Josef Bacik	0d2b237	2015-05-19 10:44:04 -0400	[diff] [blame]	5692	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
				5693	flags \|= FIEMAP_EXTENT_UNWRITTEN;
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5694
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5695	free_extent_map(em);
				5696	em = NULL;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5697	if ((em_start >= last) \|\| em_len == (u64)-1 \|\|
				5698	(last == (u64)-1 && isize <= em_end)) {
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5699	flags \|= FIEMAP_EXTENT_LAST;
				5700	end = 1;
				5701	}
				5702
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5703	/* now scan forward to see if this is really the last extent. */
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5704	em = get_extent_skip_holes(inode, off, last_for_get_extent);
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5705	if (IS_ERR(em)) {
				5706	ret = PTR_ERR(em);
				5707	goto out;
				5708	}
				5709	if (!em) {
Josef Bacik	975f84f	2010-11-23 19:36:57 +0000	[diff] [blame]	5710	flags \|= FIEMAP_EXTENT_LAST;
				5711	end = 1;
				5712	}
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5713	ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
				5714	em_len, flags);
Chengyu Song	26e726a	2015-03-24 18:12:56 -0400	[diff] [blame]	5715	if (ret) {
				5716	if (ret == 1)
				5717	ret = 0;
Chris Mason	ec29ed5	2011-02-23 16:23:20 -0500	[diff] [blame]	5718	goto out_free;
Chengyu Song	26e726a	2015-03-24 18:12:56 -0400	[diff] [blame]	5719	}
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5720	}
				5721	out_free:
Qu Wenruo	4751832	2017-04-07 10:43:15 +0800	[diff] [blame]	5722	if (!ret)
David Sterba	5c5aff9	2019-03-20 11:29:46 +0100	[diff] [blame]	5723	ret = emit_last_fiemap_cache(fieinfo, &cache);
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5724	free_extent_map(em);
				5725	out:
Nikolay Borisov	facee0a	2020-08-31 14:42:49 +0300	[diff] [blame]	5726	unlock_extent_cached(&inode->io_tree, start, start + len - 1,
David Sterba	e43bbe5	2017-12-12 21:43:52 +0100	[diff] [blame]	5727	&cached_state);
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5728
				5729	out_free_ulist:
Colin Ian King	e02d48e	2019-07-05 08:26:24 +0100	[diff] [blame]	5730	btrfs_free_path(path);
David Sterba	5911c8f	2019-05-15 15:31:04 +0200	[diff] [blame]	5731	ulist_free(roots);
				5732	ulist_free(tmp_ulist);
Yehuda Sadeh	1506fcc	2009-01-21 14:39:14 -0500	[diff] [blame]	5733	return ret;
				5734	}
				5735
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	5736	static void __free_extent_buffer(struct extent_buffer *eb)
				5737	{
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	5738	kmem_cache_free(extent_buffer_cache, eb);
				5739	}
				5740
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	5741	int extent_buffer_under_io(const struct extent_buffer *eb)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5742	{
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5743	return (atomic_read(&eb->io_pages) \|\|
				5744	test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) \|\|
				5745	test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	5746	}
				5747
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5748	static bool page_range_has_eb(struct btrfs_fs_info fs_info, struct page page)
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5749	{
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5750	struct btrfs_subpage *subpage;
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5751
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5752	lockdep_assert_held(&page->mapping->private_lock);
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5753
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5754	if (PagePrivate(page)) {
				5755	subpage = (struct btrfs_subpage *)page->private;
				5756	if (atomic_read(&subpage->eb_refs))
				5757	return true;
Qu Wenruo	3d078ef	2021-06-07 17:02:58 +0800	[diff] [blame]	5758	/*
				5759	* Even there is no eb refs here, we may still have
				5760	* end_page_read() call relying on page::private.
				5761	*/
				5762	if (atomic_read(&subpage->readers))
				5763	return true;
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5764	}
				5765	return false;
				5766	}
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5767
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5768	static void detach_extent_buffer_page(struct extent_buffer eb, struct page page)
				5769	{
				5770	struct btrfs_fs_info *fs_info = eb->fs_info;
				5771	const bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
				5772
				5773	/*
				5774	* For mapped eb, we're going to change the page private, which should
				5775	* be done under the private_lock.
				5776	*/
				5777	if (mapped)
				5778	spin_lock(&page->mapping->private_lock);
				5779
				5780	if (!PagePrivate(page)) {
Forrest Liu	5d2361d	2015-02-09 17:31:45 +0800	[diff] [blame]	5781	if (mapped)
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5782	spin_unlock(&page->mapping->private_lock);
				5783	return;
				5784	}
				5785
				5786	if (fs_info->sectorsize == PAGE_SIZE) {
Forrest Liu	5d2361d	2015-02-09 17:31:45 +0800	[diff] [blame]	5787	/*
				5788	* We do this since we'll remove the pages after we've
				5789	* removed the eb from the radix tree, so we could race
				5790	* and have this page now attached to the new eb. So
				5791	* only clear page_private if it's still connected to
				5792	* this eb.
				5793	*/
				5794	if (PagePrivate(page) &&
				5795	page->private == (unsigned long)eb) {
				5796	BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
				5797	BUG_ON(PageDirty(page));
				5798	BUG_ON(PageWriteback(page));
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	5799	/*
Forrest Liu	5d2361d	2015-02-09 17:31:45 +0800	[diff] [blame]	5800	* We need to make sure we haven't be attached
				5801	* to a new eb.
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	5802	*/
Guoqing Jiang	d1b89bc	2020-06-01 21:47:45 -0700	[diff] [blame]	5803	detach_page_private(page);
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	5804	}
Forrest Liu	5d2361d	2015-02-09 17:31:45 +0800	[diff] [blame]	5805	if (mapped)
				5806	spin_unlock(&page->mapping->private_lock);
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5807	return;
				5808	}
				5809
				5810	/*
				5811	* For subpage, we can have dummy eb with page private. In this case,
				5812	* we can directly detach the private as such page is only attached to
				5813	* one dummy eb, no sharing.
				5814	*/
				5815	if (!mapped) {
				5816	btrfs_detach_subpage(fs_info, page);
				5817	return;
				5818	}
				5819
				5820	btrfs_page_dec_eb_refs(fs_info, page);
				5821
				5822	/*
				5823	* We can only detach the page private if there are no other ebs in the
Qu Wenruo	3d078ef	2021-06-07 17:02:58 +0800	[diff] [blame]	5824	* page range and no unfinished IO.
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	5825	*/
				5826	if (!page_range_has_eb(fs_info, page))
				5827	btrfs_detach_subpage(fs_info, page);
				5828
				5829	spin_unlock(&page->mapping->private_lock);
				5830	}
				5831
				5832	/* Release all pages attached to the extent buffer */
				5833	static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
				5834	{
				5835	int i;
				5836	int num_pages;
				5837
				5838	ASSERT(!extent_buffer_under_io(eb));
				5839
				5840	num_pages = num_extent_pages(eb);
				5841	for (i = 0; i < num_pages; i++) {
				5842	struct page *page = eb->pages[i];
				5843
				5844	if (!page)
				5845	continue;
				5846
				5847	detach_extent_buffer_page(eb, page);
Forrest Liu	5d2361d	2015-02-09 17:31:45 +0800	[diff] [blame]	5848
Nicholas D Steeves	0132761	2016-05-19 21:18:45 -0400	[diff] [blame]	5849	/* One for when we allocated the page */
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	5850	put_page(page);
Nikolay Borisov	d64766f	2018-06-27 16:38:22 +0300	[diff] [blame]	5851	}
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5852	}
				5853
				5854	/*
				5855	* Helper for releasing the extent buffer.
				5856	*/
				5857	static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
				5858	{
David Sterba	55ac013	2018-07-19 17:24:32 +0200	[diff] [blame]	5859	btrfs_release_extent_buffer_pages(eb);
Josef Bacik	8c38938	2020-02-14 16:11:42 -0500	[diff] [blame]	5860	btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	5861	__free_extent_buffer(eb);
				5862	}
				5863
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	5864	static struct extent_buffer *
				5865	__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
David Sterba	23d79d8	2014-06-15 02:55:29 +0200	[diff] [blame]	5866	unsigned long len)
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5867	{
				5868	struct extent_buffer *eb = NULL;
				5869
Michal Hocko	d1b5c56	2015-08-19 14:17:40 +0200	[diff] [blame]	5870	eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS\|__GFP_NOFAIL);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5871	eb->start = start;
				5872	eb->len = len;
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	5873	eb->fs_info = fs_info;
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5874	eb->bflags = 0;
Josef Bacik	196d59a	2020-08-20 11:46:09 -0400	[diff] [blame]	5875	init_rwsem(&eb->lock);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5876
Josef Bacik	3fd6372	2020-02-14 16:11:40 -0500	[diff] [blame]	5877	btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
				5878	&fs_info->allocated_ebs);
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	5879	INIT_LIST_HEAD(&eb->release_list);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5880
				5881	spin_lock_init(&eb->refs_lock);
				5882	atomic_set(&eb->refs, 1);
				5883	atomic_set(&eb->io_pages, 0);
				5884
Qu Wenruo	deb6789	2020-12-02 14:48:01 +0800	[diff] [blame]	5885	ASSERT(len <= BTRFS_MAX_METADATA_BLOCKSIZE);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5886
				5887	return eb;
				5888	}
				5889
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	5890	struct extent_buffer btrfs_clone_extent_buffer(const struct extent_buffer src)
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5891	{
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	5892	int i;
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5893	struct page *p;
				5894	struct extent_buffer *new;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	5895	int num_pages = num_extent_pages(src);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5896
David Sterba	3f556f7	2014-06-15 03:20:26 +0200	[diff] [blame]	5897	new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5898	if (new == NULL)
				5899	return NULL;
				5900
Qu Wenruo	62c053f	2021-01-26 16:33:46 +0800	[diff] [blame]	5901	/*
				5902	* Set UNMAPPED before calling btrfs_release_extent_buffer(), as
				5903	* btrfs_release_extent_buffer() have different behavior for
				5904	* UNMAPPED subpage extent buffer.
				5905	*/
				5906	set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags);
				5907
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5908	for (i = 0; i < num_pages; i++) {
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	5909	int ret;
				5910
Josef Bacik	9ec7267	2013-08-07 16:57:23 -0400	[diff] [blame]	5911	p = alloc_page(GFP_NOFS);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5912	if (!p) {
				5913	btrfs_release_extent_buffer(new);
				5914	return NULL;
				5915	}
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	5916	ret = attach_extent_buffer_page(new, p, NULL);
				5917	if (ret < 0) {
				5918	put_page(p);
				5919	btrfs_release_extent_buffer(new);
				5920	return NULL;
				5921	}
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5922	WARN_ON(PageDirty(p));
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5923	new->pages[i] = p;
David Sterba	fba1acf	2016-11-08 17:56:24 +0100	[diff] [blame]	5924	copy_page(page_address(p), page_address(src->pages[i]));
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5925	}
Qu Wenruo	92d83e9	2021-01-26 16:33:55 +0800	[diff] [blame]	5926	set_extent_buffer_uptodate(new);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5927
				5928	return new;
				5929	}
				5930
Omar Sandoval	0f33122	2015-09-29 20:50:31 -0700	[diff] [blame]	5931	struct extent_buffer __alloc_dummy_extent_buffer(struct btrfs_fs_info fs_info,
				5932	u64 start, unsigned long len)
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5933	{
				5934	struct extent_buffer *eb;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	5935	int num_pages;
				5936	int i;
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5937
David Sterba	3f556f7	2014-06-15 03:20:26 +0200	[diff] [blame]	5938	eb = __alloc_extent_buffer(fs_info, start, len);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5939	if (!eb)
				5940	return NULL;
				5941
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	5942	num_pages = num_extent_pages(eb);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5943	for (i = 0; i < num_pages; i++) {
Qu Wenruo	09bc1f0	2021-01-26 16:33:51 +0800	[diff] [blame]	5944	int ret;
				5945
Josef Bacik	9ec7267	2013-08-07 16:57:23 -0400	[diff] [blame]	5946	eb->pages[i] = alloc_page(GFP_NOFS);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5947	if (!eb->pages[i])
				5948	goto err;
Qu Wenruo	09bc1f0	2021-01-26 16:33:51 +0800	[diff] [blame]	5949	ret = attach_extent_buffer_page(eb, eb->pages[i], NULL);
				5950	if (ret < 0)
				5951	goto err;
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5952	}
				5953	set_extent_buffer_uptodate(eb);
				5954	btrfs_set_header_nritems(eb, 0);
Nikolay Borisov	b0132a3	2018-06-27 16:38:24 +0300	[diff] [blame]	5955	set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5956
				5957	return eb;
				5958	err:
Qu Wenruo	09bc1f0	2021-01-26 16:33:51 +0800	[diff] [blame]	5959	for (; i > 0; i--) {
				5960	detach_extent_buffer_page(eb, eb->pages[i - 1]);
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5961	__free_page(eb->pages[i - 1]);
Qu Wenruo	09bc1f0	2021-01-26 16:33:51 +0800	[diff] [blame]	5962	}
Josef Bacik	db7f343	2013-08-07 14:54:37 -0400	[diff] [blame]	5963	__free_extent_buffer(eb);
				5964	return NULL;
				5965	}
				5966
Omar Sandoval	0f33122	2015-09-29 20:50:31 -0700	[diff] [blame]	5967	struct extent_buffer alloc_dummy_extent_buffer(struct btrfs_fs_info fs_info,
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	5968	u64 start)
Omar Sandoval	0f33122	2015-09-29 20:50:31 -0700	[diff] [blame]	5969	{
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	5970	return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize);
Omar Sandoval	0f33122	2015-09-29 20:50:31 -0700	[diff] [blame]	5971	}
				5972
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5973	static void check_buffer_tree_ref(struct extent_buffer *eb)
				5974	{
Chris Mason	242e18c	2013-01-29 17:49:37 -0500	[diff] [blame]	5975	int refs;
Boris Burkov	6bf9cd2	2020-06-17 11:35:19 -0700	[diff] [blame]	5976	/*
				5977	* The TREE_REF bit is first set when the extent_buffer is added
				5978	* to the radix tree. It is also reset, if unset, when a new reference
				5979	* is created by find_extent_buffer.
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5980	*
Boris Burkov	6bf9cd2	2020-06-17 11:35:19 -0700	[diff] [blame]	5981	* It is only cleared in two cases: freeing the last non-tree
				5982	* reference to the extent_buffer when its STALE bit is set or
				5983	* calling releasepage when the tree reference is the only reference.
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5984	*
Boris Burkov	6bf9cd2	2020-06-17 11:35:19 -0700	[diff] [blame]	5985	* In both cases, care is taken to ensure that the extent_buffer's
				5986	* pages are not under io. However, releasepage can be concurrently
				5987	* called with creating new references, which is prone to race
				5988	* conditions between the calls to check_buffer_tree_ref in those
				5989	* codepaths and clearing TREE_REF in try_release_extent_buffer.
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5990	*
Boris Burkov	6bf9cd2	2020-06-17 11:35:19 -0700	[diff] [blame]	5991	* The actual lifetime of the extent_buffer in the radix tree is
				5992	* adequately protected by the refcount, but the TREE_REF bit and
				5993	* its corresponding reference are not. To protect against this
				5994	* class of races, we call check_buffer_tree_ref from the codepaths
				5995	* which trigger io after they set eb->io_pages. Note that once io is
				5996	* initiated, TREE_REF can no longer be cleared, so that is the
				5997	* moment at which any such race is best fixed.
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	5998	*/
Chris Mason	242e18c	2013-01-29 17:49:37 -0500	[diff] [blame]	5999	refs = atomic_read(&eb->refs);
				6000	if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
				6001	return;
				6002
Josef Bacik	594831c	2012-07-20 16:11:08 -0400	[diff] [blame]	6003	spin_lock(&eb->refs_lock);
				6004	if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6005	atomic_inc(&eb->refs);
Josef Bacik	594831c	2012-07-20 16:11:08 -0400	[diff] [blame]	6006	spin_unlock(&eb->refs_lock);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6007	}
				6008
Mel Gorman	2457aec	2014-06-04 16:10:31 -0700	[diff] [blame]	6009	static void mark_extent_buffer_accessed(struct extent_buffer *eb,
				6010	struct page *accessed)
Josef Bacik	5df4235	2012-03-15 18:24:42 -0400	[diff] [blame]	6011	{
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6012	int num_pages, i;
Josef Bacik	5df4235	2012-03-15 18:24:42 -0400	[diff] [blame]	6013
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6014	check_buffer_tree_ref(eb);
				6015
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6016	num_pages = num_extent_pages(eb);
Josef Bacik	5df4235	2012-03-15 18:24:42 -0400	[diff] [blame]	6017	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6018	struct page *p = eb->pages[i];
				6019
Mel Gorman	2457aec	2014-06-04 16:10:31 -0700	[diff] [blame]	6020	if (p != accessed)
				6021	mark_page_accessed(p);
Josef Bacik	5df4235	2012-03-15 18:24:42 -0400	[diff] [blame]	6022	}
				6023	}
				6024
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6025	struct extent_buffer find_extent_buffer(struct btrfs_fs_info fs_info,
				6026	u64 start)
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6027	{
				6028	struct extent_buffer *eb;
				6029
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	6030	eb = find_extent_buffer_nolock(fs_info, start);
				6031	if (!eb)
				6032	return NULL;
				6033	/*
				6034	* Lock our eb's refs_lock to avoid races with free_extent_buffer().
				6035	* When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
				6036	* another task running free_extent_buffer() might have seen that flag
				6037	* set, eb->refs == 2, that the buffer isn't under IO (dirty and
				6038	* writeback flags not set) and it's still in the tree (flag
				6039	* EXTENT_BUFFER_TREE_REF set), therefore being in the process of
				6040	* decrementing the extent buffer's reference count twice. So here we
				6041	* could race and increment the eb's reference count, clear its stale
				6042	* flag, mark it as dirty and drop our reference before the other task
				6043	* finishes executing free_extent_buffer, which would later result in
				6044	* an attempt to free an extent buffer that is dirty.
				6045	*/
				6046	if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
				6047	spin_lock(&eb->refs_lock);
				6048	spin_unlock(&eb->refs_lock);
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6049	}
Qu Wenruo	2f3186d	2021-04-06 08:36:00 +0800	[diff] [blame]	6050	mark_extent_buffer_accessed(eb, NULL);
				6051	return eb;
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6052	}
				6053
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6054	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
				6055	struct extent_buffer alloc_test_extent_buffer(struct btrfs_fs_info fs_info,
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	6056	u64 start)
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6057	{
				6058	struct extent_buffer eb, exists = NULL;
				6059	int ret;
				6060
				6061	eb = find_extent_buffer(fs_info, start);
				6062	if (eb)
				6063	return eb;
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	6064	eb = alloc_dummy_extent_buffer(fs_info, start);
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6065	if (!eb)
Dan Carpenter	b6293c8	2019-12-03 14:24:58 +0300	[diff] [blame]	6066	return ERR_PTR(-ENOMEM);
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6067	eb->fs_info = fs_info;
				6068	again:
David Sterba	e1860a7	2016-05-09 14:11:38 +0200	[diff] [blame]	6069	ret = radix_tree_preload(GFP_NOFS);
Dan Carpenter	b6293c8	2019-12-03 14:24:58 +0300	[diff] [blame]	6070	if (ret) {
				6071	exists = ERR_PTR(ret);
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6072	goto free_eb;
Dan Carpenter	b6293c8	2019-12-03 14:24:58 +0300	[diff] [blame]	6073	}
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6074	spin_lock(&fs_info->buffer_lock);
				6075	ret = radix_tree_insert(&fs_info->buffer_radix,
Qu Wenruo	478ef88	2020-10-21 14:25:05 +0800	[diff] [blame]	6076	start >> fs_info->sectorsize_bits, eb);
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6077	spin_unlock(&fs_info->buffer_lock);
				6078	radix_tree_preload_end();
				6079	if (ret == -EEXIST) {
				6080	exists = find_extent_buffer(fs_info, start);
				6081	if (exists)
				6082	goto free_eb;
				6083	else
				6084	goto again;
				6085	}
				6086	check_buffer_tree_ref(eb);
				6087	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
				6088
Josef Bacik	faa2dbf	2014-05-07 17:06:09 -0400	[diff] [blame]	6089	return eb;
				6090	free_eb:
				6091	btrfs_release_extent_buffer(eb);
				6092	return exists;
				6093	}
				6094	#endif
				6095
Qu Wenruo	81982210	2021-01-26 16:33:49 +0800	[diff] [blame]	6096	static struct extent_buffer *grab_extent_buffer(
				6097	struct btrfs_fs_info fs_info, struct page page)
Qu Wenruo	c0f0a9e	2021-01-06 09:01:45 +0800	[diff] [blame]	6098	{
				6099	struct extent_buffer *exists;
				6100
Qu Wenruo	81982210	2021-01-26 16:33:49 +0800	[diff] [blame]	6101	/*
				6102	* For subpage case, we completely rely on radix tree to ensure we
				6103	* don't try to insert two ebs for the same bytenr. So here we always
				6104	* return NULL and just continue.
				6105	*/
				6106	if (fs_info->sectorsize < PAGE_SIZE)
				6107	return NULL;
				6108
Qu Wenruo	c0f0a9e	2021-01-06 09:01:45 +0800	[diff] [blame]	6109	/* Page not yet attached to an extent buffer */
				6110	if (!PagePrivate(page))
				6111	return NULL;
				6112
				6113	/*
				6114	* We could have already allocated an eb for this page and attached one
				6115	* so lets see if we can get a ref on the existing eb, and if we can we
				6116	* know it's good and we can just return that one, else we know we can
				6117	* just overwrite page->private.
				6118	*/
				6119	exists = (struct extent_buffer *)page->private;
				6120	if (atomic_inc_not_zero(&exists->refs))
				6121	return exists;
				6122
				6123	WARN_ON(PageDirty(page));
				6124	detach_page_private(page);
				6125	return NULL;
				6126	}
				6127
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6128	struct extent_buffer alloc_extent_buffer(struct btrfs_fs_info fs_info,
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	6129	u64 start, u64 owner_root, int level)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6130	{
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	6131	unsigned long len = fs_info->nodesize;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6132	int num_pages;
				6133	int i;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6134	unsigned long index = start >> PAGE_SHIFT;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6135	struct extent_buffer *eb;
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6136	struct extent_buffer *exists = NULL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6137	struct page *p;
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6138	struct address_space *mapping = fs_info->btree_inode->i_mapping;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6139	int uptodate = 1;
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	6140	int ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6141
Jeff Mahoney	da17066	2016-06-15 09:22:56 -0400	[diff] [blame]	6142	if (!IS_ALIGNED(start, fs_info->sectorsize)) {
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6143	btrfs_err(fs_info, "bad tree block start %llu", start);
				6144	return ERR_PTR(-EINVAL);
				6145	}
				6146
Qu Wenruo	e9306ad	2021-02-25 09:18:14 +0800	[diff] [blame]	6147	#if BITS_PER_LONG == 32
				6148	if (start >= MAX_LFS_FILESIZE) {
				6149	btrfs_err_rl(fs_info,
				6150	"extent buffer %llu is beyond 32bit page cache limit", start);
				6151	btrfs_err_32bit_limit(fs_info);
				6152	return ERR_PTR(-EOVERFLOW);
				6153	}
				6154	if (start >= BTRFS_32BIT_EARLY_WARN_THRESHOLD)
				6155	btrfs_warn_32bit_limit(fs_info);
				6156	#endif
				6157
Qu Wenruo	1aaac38	2020-12-02 14:48:02 +0800	[diff] [blame]	6158	if (fs_info->sectorsize < PAGE_SIZE &&
				6159	offset_in_page(start) + len > PAGE_SIZE) {
				6160	btrfs_err(fs_info,
				6161	"tree block crosses page boundary, start %llu nodesize %lu",
				6162	start, len);
				6163	return ERR_PTR(-EINVAL);
				6164	}
				6165
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6166	eb = find_extent_buffer(fs_info, start);
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6167	if (eb)
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6168	return eb;
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6169
David Sterba	23d79d8	2014-06-15 02:55:29 +0200	[diff] [blame]	6170	eb = __alloc_extent_buffer(fs_info, start, len);
Peter	2b114d1	2008-04-01 11:21:40 -0400	[diff] [blame]	6171	if (!eb)
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6172	return ERR_PTR(-ENOMEM);
Josef Bacik	e114c54	2020-11-05 10:45:21 -0500	[diff] [blame]	6173	btrfs_set_buffer_lockdep_class(owner_root, eb, level);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6174
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6175	num_pages = num_extent_pages(eb);
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6176	for (i = 0; i < num_pages; i++, index++) {
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6177	struct btrfs_subpage *prealloc = NULL;
				6178
Michal Hocko	d1b5c56	2015-08-19 14:17:40 +0200	[diff] [blame]	6179	p = find_or_create_page(mapping, index, GFP_NOFS\|__GFP_NOFAIL);
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6180	if (!p) {
				6181	exists = ERR_PTR(-ENOMEM);
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6182	goto free_eb;
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6183	}
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	6184
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6185	/*
				6186	* Preallocate page->private for subpage case, so that we won't
				6187	* allocate memory with private_lock hold. The memory will be
				6188	* freed by attach_extent_buffer_page() or freed manually if
				6189	* we exit earlier.
				6190	*
				6191	* Although we have ensured one subpage eb can only have one
				6192	* page, but it may change in the future for 16K page size
				6193	* support, so we still preallocate the memory in the loop.
				6194	*/
Qu Wenruo	fdf250d	2021-08-17 17:38:49 +0800	[diff] [blame]	6195	if (fs_info->sectorsize < PAGE_SIZE) {
Qu Wenruo	651fb41	2021-08-17 17:38:50 +0800	[diff] [blame]	6196	prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
				6197	if (IS_ERR(prealloc)) {
				6198	ret = PTR_ERR(prealloc);
Qu Wenruo	fdf250d	2021-08-17 17:38:49 +0800	[diff] [blame]	6199	unlock_page(p);
				6200	put_page(p);
				6201	exists = ERR_PTR(ret);
				6202	goto free_eb;
				6203	}
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6204	}
				6205
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	6206	spin_lock(&mapping->private_lock);
Qu Wenruo	81982210	2021-01-26 16:33:49 +0800	[diff] [blame]	6207	exists = grab_extent_buffer(fs_info, p);
Qu Wenruo	c0f0a9e	2021-01-06 09:01:45 +0800	[diff] [blame]	6208	if (exists) {
				6209	spin_unlock(&mapping->private_lock);
				6210	unlock_page(p);
				6211	put_page(p);
				6212	mark_extent_buffer_accessed(exists, p);
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6213	btrfs_free_subpage(prealloc);
Qu Wenruo	c0f0a9e	2021-01-06 09:01:45 +0800	[diff] [blame]	6214	goto free_eb;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6215	}
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6216	/* Should not fail, as we have preallocated the memory */
				6217	ret = attach_extent_buffer_page(eb, p, prealloc);
				6218	ASSERT(!ret);
Qu Wenruo	8ff8466	2021-01-26 16:33:50 +0800	[diff] [blame]	6219	/*
				6220	* To inform we have extra eb under allocation, so that
				6221	* detach_extent_buffer_page() won't release the page private
				6222	* when the eb hasn't yet been inserted into radix tree.
				6223	*
				6224	* The ref will be decreased when the eb released the page, in
				6225	* detach_extent_buffer_page().
				6226	* Thus needs no special handling in error path.
				6227	*/
				6228	btrfs_page_inc_eb_refs(fs_info, p);
Josef Bacik	4f2de97a	2012-03-07 16:20:05 -0500	[diff] [blame]	6229	spin_unlock(&mapping->private_lock);
Qu Wenruo	760f991	2021-01-26 16:33:48 +0800	[diff] [blame]	6230
Qu Wenruo	1e5eb3d	2021-03-25 15:14:41 +0800	[diff] [blame]	6231	WARN_ON(btrfs_page_test_dirty(fs_info, p, eb->start, eb->len));
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6232	eb->pages[i] = p;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6233	if (!PageUptodate(p))
				6234	uptodate = 0;
Chris Mason	eb14ab8	2011-02-10 12:35:00 -0500	[diff] [blame]	6235
				6236	/*
Nikolay Borisov	b16d011	2018-07-04 10:24:52 +0300	[diff] [blame]	6237	* We can't unlock the pages just yet since the extent buffer
				6238	* hasn't been properly inserted in the radix tree, this
				6239	* opens a race with btree_releasepage which can free a page
				6240	* while we are still filling in all pages for the buffer and
				6241	* we could crash.
Chris Mason	eb14ab8	2011-02-10 12:35:00 -0500	[diff] [blame]	6242	*/
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6243	}
				6244	if (uptodate)
Chris Mason	b4ce94d	2009-02-04 09:25:08 -0500	[diff] [blame]	6245	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Josef Bacik	115391d	2012-03-09 09:51:43 -0500	[diff] [blame]	6246	again:
David Sterba	e1860a7	2016-05-09 14:11:38 +0200	[diff] [blame]	6247	ret = radix_tree_preload(GFP_NOFS);
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6248	if (ret) {
				6249	exists = ERR_PTR(ret);
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	6250	goto free_eb;
Liu Bo	c871b0f	2016-06-06 12:01:23 -0700	[diff] [blame]	6251	}
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	6252
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6253	spin_lock(&fs_info->buffer_lock);
				6254	ret = radix_tree_insert(&fs_info->buffer_radix,
Qu Wenruo	478ef88	2020-10-21 14:25:05 +0800	[diff] [blame]	6255	start >> fs_info->sectorsize_bits, eb);
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6256	spin_unlock(&fs_info->buffer_lock);
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6257	radix_tree_preload_end();
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	6258	if (ret == -EEXIST) {
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6259	exists = find_extent_buffer(fs_info, start);
Chandra Seetharaman	452c75c	2013-10-07 10:45:25 -0500	[diff] [blame]	6260	if (exists)
				6261	goto free_eb;
				6262	else
Josef Bacik	115391d	2012-03-09 09:51:43 -0500	[diff] [blame]	6263	goto again;
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6264	}
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6265	/* add one reference for the tree */
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6266	check_buffer_tree_ref(eb);
Josef Bacik	34b41ac	2013-12-13 10:41:51 -0500	[diff] [blame]	6267	set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
Chris Mason	eb14ab8	2011-02-10 12:35:00 -0500	[diff] [blame]	6268
				6269	/*
Nikolay Borisov	b16d011	2018-07-04 10:24:52 +0300	[diff] [blame]	6270	* Now it's safe to unlock the pages because any calls to
				6271	* btree_releasepage will correctly detect that a page belongs to a
				6272	* live buffer and won't free them prematurely.
Chris Mason	eb14ab8	2011-02-10 12:35:00 -0500	[diff] [blame]	6273	*/
Nikolay Borisov	28187ae	2018-07-04 10:24:51 +0300	[diff] [blame]	6274	for (i = 0; i < num_pages; i++)
				6275	unlock_page(eb->pages[i]);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6276	return eb;
				6277
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6278	free_eb:
Omar Sandoval	5ca64f4	2015-02-24 02:47:05 -0800	[diff] [blame]	6279	WARN_ON(!atomic_dec_and_test(&eb->refs));
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6280	for (i = 0; i < num_pages; i++) {
				6281	if (eb->pages[i])
				6282	unlock_page(eb->pages[i]);
				6283	}
Chris Mason	eb14ab8	2011-02-10 12:35:00 -0500	[diff] [blame]	6284
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	6285	btrfs_release_extent_buffer(eb);
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	6286	return exists;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6287	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6288
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6289	static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
				6290	{
				6291	struct extent_buffer *eb =
				6292	container_of(head, struct extent_buffer, rcu_head);
				6293
				6294	__free_extent_buffer(eb);
				6295	}
				6296
David Sterba	f7a52a4	2013-04-26 14:56:29 +0000	[diff] [blame]	6297	static int release_extent_buffer(struct extent_buffer *eb)
Jules Irenge	5ce48d0	2020-02-23 23:16:42 +0000	[diff] [blame]	6298	__releases(&eb->refs_lock)
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6299	{
Nikolay Borisov	07e21c4	2018-06-27 16:38:23 +0300	[diff] [blame]	6300	lockdep_assert_held(&eb->refs_lock);
				6301
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6302	WARN_ON(atomic_read(&eb->refs) == 0);
				6303	if (atomic_dec_and_test(&eb->refs)) {
Josef Bacik	34b41ac	2013-12-13 10:41:51 -0500	[diff] [blame]	6304	if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6305	struct btrfs_fs_info *fs_info = eb->fs_info;
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6306
Jan Schmidt	815a51c	2012-05-16 17:00:02 +0200	[diff] [blame]	6307	spin_unlock(&eb->refs_lock);
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6308
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6309	spin_lock(&fs_info->buffer_lock);
				6310	radix_tree_delete(&fs_info->buffer_radix,
Qu Wenruo	478ef88	2020-10-21 14:25:05 +0800	[diff] [blame]	6311	eb->start >> fs_info->sectorsize_bits);
Josef Bacik	f28491e	2013-12-16 13:24:27 -0500	[diff] [blame]	6312	spin_unlock(&fs_info->buffer_lock);
Josef Bacik	34b41ac	2013-12-13 10:41:51 -0500	[diff] [blame]	6313	} else {
				6314	spin_unlock(&eb->refs_lock);
Jan Schmidt	815a51c	2012-05-16 17:00:02 +0200	[diff] [blame]	6315	}
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6316
Josef Bacik	8c38938	2020-02-14 16:11:42 -0500	[diff] [blame]	6317	btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6318	/* Should be safe to release our pages at this point */
David Sterba	55ac013	2018-07-19 17:24:32 +0200	[diff] [blame]	6319	btrfs_release_extent_buffer_pages(eb);
Josef Bacik	bcb7e44	2015-03-16 17:38:02 -0400	[diff] [blame]	6320	#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
Nikolay Borisov	b0132a3	2018-06-27 16:38:24 +0300	[diff] [blame]	6321	if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) {
Josef Bacik	bcb7e44	2015-03-16 17:38:02 -0400	[diff] [blame]	6322	__free_extent_buffer(eb);
				6323	return 1;
				6324	}
				6325	#endif
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6326	call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
Josef Bacik	e64860a	2012-07-20 16:05:36 -0400	[diff] [blame]	6327	return 1;
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6328	}
				6329	spin_unlock(&eb->refs_lock);
Josef Bacik	e64860a	2012-07-20 16:05:36 -0400	[diff] [blame]	6330
				6331	return 0;
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6332	}
				6333
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6334	void free_extent_buffer(struct extent_buffer *eb)
				6335	{
Chris Mason	242e18c	2013-01-29 17:49:37 -0500	[diff] [blame]	6336	int refs;
				6337	int old;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6338	if (!eb)
				6339	return;
				6340
Chris Mason	242e18c	2013-01-29 17:49:37 -0500	[diff] [blame]	6341	while (1) {
				6342	refs = atomic_read(&eb->refs);
Nikolay Borisov	46cc775	2018-10-15 17:04:01 +0300	[diff] [blame]	6343	if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3)
				6344	\|\| (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) &&
				6345	refs == 1))
Chris Mason	242e18c	2013-01-29 17:49:37 -0500	[diff] [blame]	6346	break;
				6347	old = atomic_cmpxchg(&eb->refs, refs, refs - 1);
				6348	if (old == refs)
				6349	return;
				6350	}
				6351
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6352	spin_lock(&eb->refs_lock);
				6353	if (atomic_read(&eb->refs) == 2 &&
				6354	test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6355	!extent_buffer_under_io(eb) &&
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6356	test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
				6357	atomic_dec(&eb->refs);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6358
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6359	/*
				6360	* I know this is terrible, but it's temporary until we stop tracking
				6361	* the uptodate bits and such for the extent buffers.
				6362	*/
David Sterba	f7a52a4	2013-04-26 14:56:29 +0000	[diff] [blame]	6363	release_extent_buffer(eb);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6364	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6365
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6366	void free_extent_buffer_stale(struct extent_buffer *eb)
				6367	{
				6368	if (!eb)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6369	return;
				6370
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6371	spin_lock(&eb->refs_lock);
				6372	set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
				6373
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6374	if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6375	test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
				6376	atomic_dec(&eb->refs);
David Sterba	f7a52a4	2013-04-26 14:56:29 +0000	[diff] [blame]	6377	release_extent_buffer(eb);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6378	}
				6379
Qu Wenruo	0d27797	2021-03-25 15:14:43 +0800	[diff] [blame]	6380	static void btree_clear_page_dirty(struct page *page)
				6381	{
				6382	ASSERT(PageDirty(page));
				6383	ASSERT(PageLocked(page));
				6384	clear_page_dirty_for_io(page);
				6385	xa_lock_irq(&page->mapping->i_pages);
				6386	if (!PageDirty(page))
				6387	__xa_clear_mark(&page->mapping->i_pages,
				6388	page_index(page), PAGECACHE_TAG_DIRTY);
				6389	xa_unlock_irq(&page->mapping->i_pages);
				6390	}
				6391
				6392	static void clear_subpage_extent_buffer_dirty(const struct extent_buffer *eb)
				6393	{
				6394	struct btrfs_fs_info *fs_info = eb->fs_info;
				6395	struct page *page = eb->pages[0];
				6396	bool last;
				6397
				6398	/* btree_clear_page_dirty() needs page locked */
				6399	lock_page(page);
				6400	last = btrfs_subpage_clear_and_test_dirty(fs_info, page, eb->start,
				6401	eb->len);
				6402	if (last)
				6403	btree_clear_page_dirty(page);
				6404	unlock_page(page);
				6405	WARN_ON(atomic_read(&eb->refs) == 0);
				6406	}
				6407
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6408	void clear_extent_buffer_dirty(const struct extent_buffer *eb)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6409	{
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6410	int i;
				6411	int num_pages;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6412	struct page *page;
				6413
Qu Wenruo	0d27797	2021-03-25 15:14:43 +0800	[diff] [blame]	6414	if (eb->fs_info->sectorsize < PAGE_SIZE)
				6415	return clear_subpage_extent_buffer_dirty(eb);
				6416
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6417	num_pages = num_extent_pages(eb);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6418
				6419	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6420	page = eb->pages[i];
Chris Mason	b947343	2009-03-13 11:00:37 -0400	[diff] [blame]	6421	if (!PageDirty(page))
Chris Mason	d2c3f4f	2008-11-19 12:44:22 -0500	[diff] [blame]	6422	continue;
Chris Mason	a61e6f2	2008-07-22 11:18:08 -0400	[diff] [blame]	6423	lock_page(page);
Qu Wenruo	0d27797	2021-03-25 15:14:43 +0800	[diff] [blame]	6424	btree_clear_page_dirty(page);
Chris Mason	bf0da8c	2011-11-04 12:29:37 -0400	[diff] [blame]	6425	ClearPageError(page);
Chris Mason	a61e6f2	2008-07-22 11:18:08 -0400	[diff] [blame]	6426	unlock_page(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6427	}
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6428	WARN_ON(atomic_read(&eb->refs) == 0);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6429	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6430
Liu Bo	abb57ef	2018-09-14 01:44:42 +0800	[diff] [blame]	6431	bool set_extent_buffer_dirty(struct extent_buffer *eb)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6432	{
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6433	int i;
				6434	int num_pages;
Liu Bo	abb57ef	2018-09-14 01:44:42 +0800	[diff] [blame]	6435	bool was_dirty;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6436
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6437	check_buffer_tree_ref(eb);
				6438
Chris Mason	b947343	2009-03-13 11:00:37 -0400	[diff] [blame]	6439	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6440
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6441	num_pages = num_extent_pages(eb);
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	6442	WARN_ON(atomic_read(&eb->refs) == 0);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6443	WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
				6444
Qu Wenruo	0d27797	2021-03-25 15:14:43 +0800	[diff] [blame]	6445	if (!was_dirty) {
				6446	bool subpage = eb->fs_info->sectorsize < PAGE_SIZE;
Liu Bo	51995c3	2018-09-14 01:46:08 +0800	[diff] [blame]	6447
Qu Wenruo	0d27797	2021-03-25 15:14:43 +0800	[diff] [blame]	6448	/*
				6449	* For subpage case, we can have other extent buffers in the
				6450	* same page, and in clear_subpage_extent_buffer_dirty() we
				6451	* have to clear page dirty without subpage lock held.
				6452	* This can cause race where our page gets dirty cleared after
				6453	* we just set it.
				6454	*
				6455	* Thankfully, clear_subpage_extent_buffer_dirty() has locked
				6456	* its page for other reasons, we can use page lock to prevent
				6457	* the above race.
				6458	*/
				6459	if (subpage)
				6460	lock_page(eb->pages[0]);
				6461	for (i = 0; i < num_pages; i++)
				6462	btrfs_page_set_dirty(eb->fs_info, eb->pages[i],
				6463	eb->start, eb->len);
				6464	if (subpage)
				6465	unlock_page(eb->pages[0]);
				6466	}
Liu Bo	51995c3	2018-09-14 01:46:08 +0800	[diff] [blame]	6467	#ifdef CONFIG_BTRFS_DEBUG
				6468	for (i = 0; i < num_pages; i++)
				6469	ASSERT(PageDirty(eb->pages[i]));
				6470	#endif
				6471
Chris Mason	b947343	2009-03-13 11:00:37 -0400	[diff] [blame]	6472	return was_dirty;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6473	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6474
David Sterba	69ba392	2015-12-03 13:08:59 +0100	[diff] [blame]	6475	void clear_extent_buffer_uptodate(struct extent_buffer *eb)
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6476	{
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6477	struct btrfs_fs_info *fs_info = eb->fs_info;
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6478	struct page *page;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6479	int num_pages;
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6480	int i;
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6481
Chris Mason	b4ce94d	2009-02-04 09:25:08 -0500	[diff] [blame]	6482	clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6483	num_pages = num_extent_pages(eb);
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6484	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6485	page = eb->pages[i];
Chris Mason	33958dc	2008-07-30 10:29:12 -0400	[diff] [blame]	6486	if (page)
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6487	btrfs_page_clear_uptodate(fs_info, page,
				6488	eb->start, eb->len);
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6489	}
Chris Mason	1259ab7	2008-05-12 13:39:03 -0400	[diff] [blame]	6490	}
				6491
David Sterba	09c25a8	2015-12-03 13:08:59 +0100	[diff] [blame]	6492	void set_extent_buffer_uptodate(struct extent_buffer *eb)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6493	{
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6494	struct btrfs_fs_info *fs_info = eb->fs_info;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6495	struct page *page;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6496	int num_pages;
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6497	int i;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6498
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6499	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6500	num_pages = num_extent_pages(eb);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6501	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6502	page = eb->pages[i];
Qu Wenruo	251f2ac	2021-01-26 16:33:54 +0800	[diff] [blame]	6503	btrfs_page_set_uptodate(fs_info, page, eb->start, eb->len);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6504	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6505	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6506
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6507	static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
				6508	int mirror_num)
				6509	{
				6510	struct btrfs_fs_info *fs_info = eb->fs_info;
				6511	struct extent_io_tree *io_tree;
				6512	struct page *page = eb->pages[0];
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6513	struct btrfs_bio_ctrl bio_ctrl = { 0 };
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6514	int ret = 0;
				6515
				6516	ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
				6517	ASSERT(PagePrivate(page));
				6518	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
				6519
				6520	if (wait == WAIT_NONE) {
Goldwyn Rodrigues	dc56219	2021-04-08 07:40:25 -0500	[diff] [blame]	6521	if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1))
				6522	return -EAGAIN;
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6523	} else {
				6524	ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
				6525	if (ret < 0)
				6526	return ret;
				6527	}
				6528
				6529	ret = 0;
				6530	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags) \|\|
				6531	PageUptodate(page) \|\|
				6532	btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) {
				6533	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
				6534	unlock_extent(io_tree, eb->start, eb->start + eb->len - 1);
				6535	return ret;
				6536	}
				6537
				6538	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
				6539	eb->read_mirror = 0;
				6540	atomic_set(&eb->io_pages, 1);
				6541	check_buffer_tree_ref(eb);
				6542	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
				6543
Qu Wenruo	3d078ef	2021-06-07 17:02:58 +0800	[diff] [blame]	6544	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6545	ret = submit_extent_page(REQ_OP_READ \| REQ_META, NULL, &bio_ctrl,
				6546	page, eb->start, eb->len,
				6547	eb->start - page_offset(page),
				6548	end_bio_extent_readpage, mirror_num, 0,
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6549	true);
				6550	if (ret) {
				6551	/*
				6552	* In the endio function, if we hit something wrong we will
				6553	* increase the io_pages, so here we need to decrease it for
				6554	* error path.
				6555	*/
				6556	atomic_dec(&eb->io_pages);
				6557	}
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6558	if (bio_ctrl.bio) {
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6559	int tmp;
				6560
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6561	tmp = submit_one_bio(bio_ctrl.bio, mirror_num, 0);
				6562	bio_ctrl.bio = NULL;
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6563	if (tmp < 0)
				6564	return tmp;
				6565	}
				6566	if (ret \|\| wait != WAIT_COMPLETE)
				6567	return ret;
				6568
				6569	wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, EXTENT_LOCKED);
				6570	if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
				6571	ret = -EIO;
				6572	return ret;
				6573	}
				6574
Nikolay Borisov	c2ccfbc	2019-04-10 17:24:40 +0300	[diff] [blame]	6575	int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6576	{
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6577	int i;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6578	struct page *page;
				6579	int err;
				6580	int ret = 0;
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6581	int locked_pages = 0;
				6582	int all_uptodate = 1;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6583	int num_pages;
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6584	unsigned long num_reads = 0;
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6585	struct btrfs_bio_ctrl bio_ctrl = { 0 };
Chris Mason	a86c12c	2008-02-07 10:50:54 -0500	[diff] [blame]	6586
Chris Mason	b4ce94d	2009-02-04 09:25:08 -0500	[diff] [blame]	6587	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6588	return 0;
				6589
Josef Bacik	651740a	2021-12-13 14:22:33 -0500	[diff] [blame]	6590	/*
				6591	* We could have had EXTENT_BUFFER_UPTODATE cleared by the write
				6592	* operation, which could potentially still be in flight. In this case
				6593	* we simply want to return an error.
				6594	*/
				6595	if (unlikely(test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)))
				6596	return -EIO;
				6597
Qu Wenruo	4012daf	2021-01-26 16:33:57 +0800	[diff] [blame]	6598	if (eb->fs_info->sectorsize < PAGE_SIZE)
				6599	return read_extent_buffer_subpage(eb, wait, mirror_num);
				6600
David Sterba	65ad010	2018-06-29 10:56:49 +0200	[diff] [blame]	6601	num_pages = num_extent_pages(eb);
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6602	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6603	page = eb->pages[i];
Arne Jansen	bb82ab8	2011-06-10 14:06:53 +0200	[diff] [blame]	6604	if (wait == WAIT_NONE) {
Qu Wenruo	2c4d8cb	2021-01-28 19:25:08 +0800	[diff] [blame]	6605	/*
				6606	* WAIT_NONE is only utilized by readahead. If we can't
				6607	* acquire the lock atomically it means either the eb
				6608	* is being read out or under modification.
				6609	* Either way the eb will be or has been cached,
				6610	* readahead can exit safely.
				6611	*/
David Woodhouse	2db0496	2008-08-07 11:19:43 -0400	[diff] [blame]	6612	if (!trylock_page(page))
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6613	goto unlock_exit;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6614	} else {
				6615	lock_page(page);
				6616	}
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6617	locked_pages++;
Liu Bo	2571e73	2016-08-03 12:33:01 -0700	[diff] [blame]	6618	}
				6619	/*
				6620	* We need to firstly lock all pages to make sure that
				6621	* the uptodate bit of our pages won't be affected by
				6622	* clear_extent_buffer_uptodate().
				6623	*/
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6624	for (i = 0; i < num_pages; i++) {
Liu Bo	2571e73	2016-08-03 12:33:01 -0700	[diff] [blame]	6625	page = eb->pages[i];
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6626	if (!PageUptodate(page)) {
				6627	num_reads++;
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6628	all_uptodate = 0;
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	6629	}
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6630	}
Liu Bo	2571e73	2016-08-03 12:33:01 -0700	[diff] [blame]	6631
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6632	if (all_uptodate) {
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6633	set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6634	goto unlock_exit;
				6635	}
				6636
Filipe Manana	656f30d	2014-09-26 12:25:56 +0100	[diff] [blame]	6637	clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
Josef Bacik	5cf1ab5	2012-04-16 09:42:26 -0400	[diff] [blame]	6638	eb->read_mirror = 0;
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	6639	atomic_set(&eb->io_pages, num_reads);
Boris Burkov	6bf9cd2	2020-06-17 11:35:19 -0700	[diff] [blame]	6640	/*
				6641	* It is possible for releasepage to clear the TREE_REF bit before we
				6642	* set io_pages. See check_buffer_tree_ref for a more detailed comment.
				6643	*/
				6644	check_buffer_tree_ref(eb);
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6645	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6646	page = eb->pages[i];
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6647
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6648	if (!PageUptodate(page)) {
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6649	if (ret) {
				6650	atomic_dec(&eb->io_pages);
				6651	unlock_page(page);
				6652	continue;
				6653	}
				6654
Chris Mason	f188591	2008-04-09 16:28:12 -0400	[diff] [blame]	6655	ClearPageError(page);
Nikolay Borisov	0420177	2020-09-14 12:37:04 +0300	[diff] [blame]	6656	err = submit_extent_page(REQ_OP_READ \| REQ_META, NULL,
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6657	&bio_ctrl, page, page_offset(page),
				6658	PAGE_SIZE, 0, end_bio_extent_readpage,
				6659	mirror_num, 0, false);
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6660	if (err) {
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6661	/*
Nikolay Borisov	0420177	2020-09-14 12:37:04 +0300	[diff] [blame]	6662	* We failed to submit the bio so it's the
				6663	* caller's responsibility to perform cleanup
				6664	* i.e unlock page/set error bit.
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6665	*/
Nikolay Borisov	0420177	2020-09-14 12:37:04 +0300	[diff] [blame]	6666	ret = err;
				6667	SetPageError(page);
				6668	unlock_page(page);
Liu Bo	baf863b	2016-07-11 10:39:07 -0700	[diff] [blame]	6669	atomic_dec(&eb->io_pages);
				6670	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6671	} else {
				6672	unlock_page(page);
				6673	}
				6674	}
				6675
Qu Wenruo	390ed29	2021-04-14 16:42:15 +0800	[diff] [blame]	6676	if (bio_ctrl.bio) {
				6677	err = submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.bio_flags);
				6678	bio_ctrl.bio = NULL;
Jeff Mahoney	79787ea	2012-03-12 16:03:00 +0100	[diff] [blame]	6679	if (err)
				6680	return err;
Jeff Mahoney	355808c	2011-10-03 23:23:14 -0400	[diff] [blame]	6681	}
Chris Mason	a86c12c	2008-02-07 10:50:54 -0500	[diff] [blame]	6682
Arne Jansen	bb82ab8	2011-06-10 14:06:53 +0200	[diff] [blame]	6683	if (ret \|\| wait != WAIT_COMPLETE)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6684	return ret;
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6685
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6686	for (i = 0; i < num_pages; i++) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6687	page = eb->pages[i];
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6688	wait_on_page_locked(page);
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6689	if (!PageUptodate(page))
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6690	ret = -EIO;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6691	}
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6692
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6693	return ret;
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6694
				6695	unlock_exit:
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6696	while (locked_pages > 0) {
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6697	locked_pages--;
Josef Bacik	8436ea91	2016-09-02 15:40:03 -0400	[diff] [blame]	6698	page = eb->pages[locked_pages];
				6699	unlock_page(page);
Chris Mason	ce9adaa	2008-04-09 16:28:12 -0400	[diff] [blame]	6700	}
				6701	return ret;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6702	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6703
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6704	static bool report_eb_range(const struct extent_buffer *eb, unsigned long start,
				6705	unsigned long len)
				6706	{
				6707	btrfs_warn(eb->fs_info,
				6708	"access to eb bytenr %llu len %lu out of range start %lu len %lu",
				6709	eb->start, eb->len, start, len);
				6710	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
				6711
				6712	return true;
				6713	}
				6714
				6715	/*
				6716	* Check if the [start, start + len) range is valid before reading/writing
				6717	* the eb.
				6718	* NOTE: @start and @len are offset inside the eb, not logical address.
				6719	*
				6720	* Caller should not touch the dst/src memory if this function returns error.
				6721	*/
				6722	static inline int check_eb_range(const struct extent_buffer *eb,
				6723	unsigned long start, unsigned long len)
				6724	{
				6725	unsigned long offset;
				6726
				6727	/* start, start + len should not go beyond eb->len nor overflow */
				6728	if (unlikely(check_add_overflow(start, len, &offset) \|\| offset > eb->len))
				6729	return report_eb_range(eb, start, len);
				6730
				6731	return false;
				6732	}
				6733
Jeff Mahoney	1cbb1f4	2017-06-28 21:56:53 -0600	[diff] [blame]	6734	void read_extent_buffer(const struct extent_buffer eb, void dstv,
				6735	unsigned long start, unsigned long len)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6736	{
				6737	size_t cur;
				6738	size_t offset;
				6739	struct page *page;
				6740	char *kaddr;
				6741	char dst = (char )dstv;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6742	unsigned long i = get_eb_page_index(start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6743
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6744	if (check_eb_range(eb, start, len))
Liu Bo	f716abd	2017-08-09 11:10:16 -0600	[diff] [blame]	6745	return;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6746
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6747	offset = get_eb_offset_in_page(eb, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6748
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6749	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6750	page = eb->pages[i];
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6751
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6752	cur = min(len, (PAGE_SIZE - offset));
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	6753	kaddr = page_address(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6754	memcpy(dst, kaddr + offset, cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6755
				6756	dst += cur;
				6757	len -= cur;
				6758	offset = 0;
				6759	i++;
				6760	}
				6761	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6762
Josef Bacik	a48b73e	2020-08-10 11:42:27 -0400	[diff] [blame]	6763	int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
				6764	void __user *dstv,
				6765	unsigned long start, unsigned long len)
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6766	{
				6767	size_t cur;
				6768	size_t offset;
				6769	struct page *page;
				6770	char *kaddr;
				6771	char __user dst = (char __user )dstv;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6772	unsigned long i = get_eb_page_index(start);
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6773	int ret = 0;
				6774
				6775	WARN_ON(start > eb->len);
				6776	WARN_ON(start + len > eb->start + eb->len);
				6777
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6778	offset = get_eb_offset_in_page(eb, start);
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6779
				6780	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6781	page = eb->pages[i];
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6782
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6783	cur = min(len, (PAGE_SIZE - offset));
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6784	kaddr = page_address(page);
Josef Bacik	a48b73e	2020-08-10 11:42:27 -0400	[diff] [blame]	6785	if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
Gerhard Heift	550ac1d	2014-01-30 16:24:01 +0100	[diff] [blame]	6786	ret = -EFAULT;
				6787	break;
				6788	}
				6789
				6790	dst += cur;
				6791	len -= cur;
				6792	offset = 0;
				6793	i++;
				6794	}
				6795
				6796	return ret;
				6797	}
				6798
Jeff Mahoney	1cbb1f4	2017-06-28 21:56:53 -0600	[diff] [blame]	6799	int memcmp_extent_buffer(const struct extent_buffer eb, const void ptrv,
				6800	unsigned long start, unsigned long len)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6801	{
				6802	size_t cur;
				6803	size_t offset;
				6804	struct page *page;
				6805	char *kaddr;
				6806	char ptr = (char )ptrv;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6807	unsigned long i = get_eb_page_index(start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6808	int ret = 0;
				6809
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6810	if (check_eb_range(eb, start, len))
				6811	return -EINVAL;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6812
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6813	offset = get_eb_offset_in_page(eb, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6814
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6815	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6816	page = eb->pages[i];
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6817
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6818	cur = min(len, (PAGE_SIZE - offset));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6819
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	6820	kaddr = page_address(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6821	ret = memcmp(ptr, kaddr + offset, cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6822	if (ret)
				6823	break;
				6824
				6825	ptr += cur;
				6826	len -= cur;
				6827	offset = 0;
				6828	i++;
				6829	}
				6830	return ret;
				6831	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6832
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6833	/*
				6834	* Check that the extent buffer is uptodate.
				6835	*
				6836	* For regular sector size == PAGE_SIZE case, check if @page is uptodate.
				6837	* For subpage case, check if the range covered by the eb has EXTENT_UPTODATE.
				6838	*/
				6839	static void assert_eb_page_uptodate(const struct extent_buffer *eb,
				6840	struct page *page)
				6841	{
				6842	struct btrfs_fs_info *fs_info = eb->fs_info;
				6843
				6844	if (fs_info->sectorsize < PAGE_SIZE) {
				6845	bool uptodate;
				6846
				6847	uptodate = btrfs_subpage_test_uptodate(fs_info, page,
				6848	eb->start, eb->len);
				6849	WARN_ON(!uptodate);
				6850	} else {
				6851	WARN_ON(!PageUptodate(page));
				6852	}
				6853	}
				6854
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6855	void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
David Sterba	f157bf7	2016-11-09 17:43:38 +0100	[diff] [blame]	6856	const void *srcv)
				6857	{
				6858	char *kaddr;
				6859
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6860	assert_eb_page_uptodate(eb, eb->pages[0]);
David Sterba	24880be5	2020-09-21 22:07:14 +0200	[diff] [blame]	6861	kaddr = page_address(eb->pages[0]) +
				6862	get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
				6863	chunk_tree_uuid));
				6864	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
David Sterba	f157bf7	2016-11-09 17:43:38 +0100	[diff] [blame]	6865	}
				6866
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6867	void write_extent_buffer_fsid(const struct extent_buffer eb, const void srcv)
David Sterba	f157bf7	2016-11-09 17:43:38 +0100	[diff] [blame]	6868	{
				6869	char *kaddr;
				6870
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6871	assert_eb_page_uptodate(eb, eb->pages[0]);
David Sterba	24880be5	2020-09-21 22:07:14 +0200	[diff] [blame]	6872	kaddr = page_address(eb->pages[0]) +
				6873	get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
				6874	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
David Sterba	f157bf7	2016-11-09 17:43:38 +0100	[diff] [blame]	6875	}
				6876
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6877	void write_extent_buffer(const struct extent_buffer eb, const void srcv,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6878	unsigned long start, unsigned long len)
				6879	{
				6880	size_t cur;
				6881	size_t offset;
				6882	struct page *page;
				6883	char *kaddr;
				6884	char src = (char )srcv;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6885	unsigned long i = get_eb_page_index(start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6886
Naohiro Aota	d3575156	2021-02-04 19:21:54 +0900	[diff] [blame]	6887	WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
				6888
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6889	if (check_eb_range(eb, start, len))
				6890	return;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6891
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6892	offset = get_eb_offset_in_page(eb, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6893
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6894	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6895	page = eb->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6896	assert_eb_page_uptodate(eb, page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6897
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6898	cur = min(len, PAGE_SIZE - offset);
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	6899	kaddr = page_address(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6900	memcpy(kaddr + offset, src, cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6901
				6902	src += cur;
				6903	len -= cur;
				6904	offset = 0;
				6905	i++;
				6906	}
				6907	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6908
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6909	void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
David Sterba	b159fa2	2016-11-08 18:09:03 +0100	[diff] [blame]	6910	unsigned long len)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6911	{
				6912	size_t cur;
				6913	size_t offset;
				6914	struct page *page;
				6915	char *kaddr;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6916	unsigned long i = get_eb_page_index(start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6917
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6918	if (check_eb_range(eb, start, len))
				6919	return;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6920
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6921	offset = get_eb_offset_in_page(eb, start);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6922
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6923	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6924	page = eb->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6925	assert_eb_page_uptodate(eb, page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6926
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6927	cur = min(len, PAGE_SIZE - offset);
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	6928	kaddr = page_address(page);
David Sterba	b159fa2	2016-11-08 18:09:03 +0100	[diff] [blame]	6929	memset(kaddr + offset, 0, cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6930
				6931	len -= cur;
				6932	offset = 0;
				6933	i++;
				6934	}
				6935	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6936
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6937	void copy_extent_buffer_full(const struct extent_buffer *dst,
				6938	const struct extent_buffer *src)
David Sterba	58e8012	2016-11-08 18:30:31 +0100	[diff] [blame]	6939	{
				6940	int i;
David Sterba	cc5e31a	2018-03-01 18:20:27 +0100	[diff] [blame]	6941	int num_pages;
David Sterba	58e8012	2016-11-08 18:30:31 +0100	[diff] [blame]	6942
				6943	ASSERT(dst->len == src->len);
				6944
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6945	if (dst->fs_info->sectorsize == PAGE_SIZE) {
				6946	num_pages = num_extent_pages(dst);
				6947	for (i = 0; i < num_pages; i++)
				6948	copy_page(page_address(dst->pages[i]),
				6949	page_address(src->pages[i]));
				6950	} else {
				6951	size_t src_offset = get_eb_offset_in_page(src, 0);
				6952	size_t dst_offset = get_eb_offset_in_page(dst, 0);
				6953
				6954	ASSERT(src->fs_info->sectorsize < PAGE_SIZE);
				6955	memcpy(page_address(dst->pages[0]) + dst_offset,
				6956	page_address(src->pages[0]) + src_offset,
				6957	src->len);
				6958	}
David Sterba	58e8012	2016-11-08 18:30:31 +0100	[diff] [blame]	6959	}
				6960
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	6961	void copy_extent_buffer(const struct extent_buffer *dst,
				6962	const struct extent_buffer *src,
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6963	unsigned long dst_offset, unsigned long src_offset,
				6964	unsigned long len)
				6965	{
				6966	u64 dst_len = dst->len;
				6967	size_t cur;
				6968	size_t offset;
				6969	struct page *page;
				6970	char *kaddr;
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6971	unsigned long i = get_eb_page_index(dst_offset);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6972
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	6973	if (check_eb_range(dst, dst_offset, len) \|\|
				6974	check_eb_range(src, src_offset, len))
				6975	return;
				6976
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6977	WARN_ON(src->len != dst_len);
				6978
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	6979	offset = get_eb_offset_in_page(dst, dst_offset);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6980
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	6981	while (len > 0) {
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	6982	page = dst->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	6983	assert_eb_page_uptodate(dst, page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6984
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	6985	cur = min(len, (unsigned long)(PAGE_SIZE - offset));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6986
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	6987	kaddr = page_address(page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6988	read_extent_buffer(src, kaddr + offset, src_offset, cur);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6989
				6990	src_offset += cur;
				6991	len -= cur;
				6992	offset = 0;
				6993	i++;
				6994	}
				6995	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	6996
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	6997	/*
				6998	* eb_bitmap_offset() - calculate the page and offset of the byte containing the
				6999	* given bit number
				7000	* @eb: the extent buffer
				7001	* @start: offset of the bitmap item in the extent buffer
				7002	* @nr: bit number
				7003	* @page_index: return index of the page in the extent buffer that contains the
				7004	* given bit number
				7005	* @page_offset: return offset into the page given by page_index
				7006	*
				7007	* This helper hides the ugliness of finding the byte in an extent buffer which
				7008	* contains a given bit.
				7009	*/
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7010	static inline void eb_bitmap_offset(const struct extent_buffer *eb,
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7011	unsigned long start, unsigned long nr,
				7012	unsigned long *page_index,
				7013	size_t *page_offset)
				7014	{
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7015	size_t byte_offset = BIT_BYTE(nr);
				7016	size_t offset;
				7017
				7018	/*
				7019	* The byte we want is the offset of the extent buffer + the offset of
				7020	* the bitmap item in the extent buffer + the offset of the byte in the
				7021	* bitmap item.
				7022	*/
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	7023	offset = start + offset_in_page(eb->start) + byte_offset;
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7024
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	7025	*page_index = offset >> PAGE_SHIFT;
Johannes Thumshirn	7073017	2018-12-05 15:23:03 +0100	[diff] [blame]	7026	*page_offset = offset_in_page(offset);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7027	}
				7028
				7029	/**
				7030	* extent_buffer_test_bit - determine whether a bit in a bitmap item is set
				7031	* @eb: the extent buffer
				7032	* @start: offset of the bitmap item in the extent buffer
				7033	* @nr: bit number to test
				7034	*/
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7035	int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7036	unsigned long nr)
				7037	{
Omar Sandoval	2fe1d55	2016-09-22 17:24:20 -0700	[diff] [blame]	7038	u8 *kaddr;
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7039	struct page *page;
				7040	unsigned long i;
				7041	size_t offset;
				7042
				7043	eb_bitmap_offset(eb, start, nr, &i, &offset);
				7044	page = eb->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	7045	assert_eb_page_uptodate(eb, page);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7046	kaddr = page_address(page);
				7047	return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
				7048	}
				7049
				7050	/**
				7051	* extent_buffer_bitmap_set - set an area of a bitmap
				7052	* @eb: the extent buffer
				7053	* @start: offset of the bitmap item in the extent buffer
				7054	* @pos: bit number of the first bit
				7055	* @len: number of bits to set
				7056	*/
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7057	void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7058	unsigned long pos, unsigned long len)
				7059	{
Omar Sandoval	2fe1d55	2016-09-22 17:24:20 -0700	[diff] [blame]	7060	u8 *kaddr;
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7061	struct page *page;
				7062	unsigned long i;
				7063	size_t offset;
				7064	const unsigned int size = pos + len;
				7065	int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
Omar Sandoval	2fe1d55	2016-09-22 17:24:20 -0700	[diff] [blame]	7066	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7067
				7068	eb_bitmap_offset(eb, start, pos, &i, &offset);
				7069	page = eb->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	7070	assert_eb_page_uptodate(eb, page);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7071	kaddr = page_address(page);
				7072
				7073	while (len >= bits_to_set) {
				7074	kaddr[offset] \|= mask_to_set;
				7075	len -= bits_to_set;
				7076	bits_to_set = BITS_PER_BYTE;
Dan Carpenter	9c89469	2016-10-12 11:33:21 +0300	[diff] [blame]	7077	mask_to_set = ~0;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	7078	if (++offset >= PAGE_SIZE && len > 0) {
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7079	offset = 0;
				7080	page = eb->pages[++i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	7081	assert_eb_page_uptodate(eb, page);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7082	kaddr = page_address(page);
				7083	}
				7084	}
				7085	if (len) {
				7086	mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
				7087	kaddr[offset] \|= mask_to_set;
				7088	}
				7089	}
				7090
				7091
				7092	/**
				7093	* extent_buffer_bitmap_clear - clear an area of a bitmap
				7094	* @eb: the extent buffer
				7095	* @start: offset of the bitmap item in the extent buffer
				7096	* @pos: bit number of the first bit
				7097	* @len: number of bits to clear
				7098	*/
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7099	void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
				7100	unsigned long start, unsigned long pos,
				7101	unsigned long len)
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7102	{
Omar Sandoval	2fe1d55	2016-09-22 17:24:20 -0700	[diff] [blame]	7103	u8 *kaddr;
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7104	struct page *page;
				7105	unsigned long i;
				7106	size_t offset;
				7107	const unsigned int size = pos + len;
				7108	int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
Omar Sandoval	2fe1d55	2016-09-22 17:24:20 -0700	[diff] [blame]	7109	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7110
				7111	eb_bitmap_offset(eb, start, pos, &i, &offset);
				7112	page = eb->pages[i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	7113	assert_eb_page_uptodate(eb, page);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7114	kaddr = page_address(page);
				7115
				7116	while (len >= bits_to_clear) {
				7117	kaddr[offset] &= ~mask_to_clear;
				7118	len -= bits_to_clear;
				7119	bits_to_clear = BITS_PER_BYTE;
Dan Carpenter	9c89469	2016-10-12 11:33:21 +0300	[diff] [blame]	7120	mask_to_clear = ~0;
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	7121	if (++offset >= PAGE_SIZE && len > 0) {
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7122	offset = 0;
				7123	page = eb->pages[++i];
Qu Wenruo	b8f9577	2021-03-25 15:14:42 +0800	[diff] [blame]	7124	assert_eb_page_uptodate(eb, page);
Omar Sandoval	3e1e8bb	2015-09-29 20:50:30 -0700	[diff] [blame]	7125	kaddr = page_address(page);
				7126	}
				7127	}
				7128	if (len) {
				7129	mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
				7130	kaddr[offset] &= ~mask_to_clear;
				7131	}
				7132	}
				7133
Sergei Trofimovich	3387206	2011-04-11 21:52:52 +0000	[diff] [blame]	7134	static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
				7135	{
				7136	unsigned long distance = (src > dst) ? src - dst : dst - src;
				7137	return distance < len;
				7138	}
				7139
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7140	static void copy_pages(struct page dst_page, struct page src_page,
				7141	unsigned long dst_off, unsigned long src_off,
				7142	unsigned long len)
				7143	{
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	7144	char *dst_kaddr = page_address(dst_page);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7145	char *src_kaddr;
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	7146	int must_memmove = 0;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7147
Sergei Trofimovich	3387206	2011-04-11 21:52:52 +0000	[diff] [blame]	7148	if (dst_page != src_page) {
Chris Mason	a659171	2011-07-19 12:04:14 -0400	[diff] [blame]	7149	src_kaddr = page_address(src_page);
Sergei Trofimovich	3387206	2011-04-11 21:52:52 +0000	[diff] [blame]	7150	} else {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7151	src_kaddr = dst_kaddr;
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	7152	if (areas_overlap(src_off, dst_off, len))
				7153	must_memmove = 1;
Sergei Trofimovich	3387206	2011-04-11 21:52:52 +0000	[diff] [blame]	7154	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7155
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	7156	if (must_memmove)
				7157	memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
				7158	else
				7159	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7160	}
				7161
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7162	void memcpy_extent_buffer(const struct extent_buffer *dst,
				7163	unsigned long dst_offset, unsigned long src_offset,
				7164	unsigned long len)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7165	{
				7166	size_t cur;
				7167	size_t dst_off_in_page;
				7168	size_t src_off_in_page;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7169	unsigned long dst_i;
				7170	unsigned long src_i;
				7171
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	7172	if (check_eb_range(dst, dst_offset, len) \|\|
				7173	check_eb_range(dst, src_offset, len))
				7174	return;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7175
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	7176	while (len > 0) {
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	7177	dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
				7178	src_off_in_page = get_eb_offset_in_page(dst, src_offset);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7179
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	7180	dst_i = get_eb_page_index(dst_offset);
				7181	src_i = get_eb_page_index(src_offset);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7182
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	7183	cur = min(len, (unsigned long)(PAGE_SIZE -
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7184	src_off_in_page));
				7185	cur = min_t(unsigned long, cur,
Kirill A. Shutemov	09cbfea	2016-04-01 15:29:47 +0300	[diff] [blame]	7186	(unsigned long)(PAGE_SIZE - dst_off_in_page));
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7187
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	7188	copy_pages(dst->pages[dst_i], dst->pages[src_i],
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7189	dst_off_in_page, src_off_in_page, cur);
				7190
				7191	src_offset += cur;
				7192	dst_offset += cur;
				7193	len -= cur;
				7194	}
				7195	}
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7196
David Sterba	2b48966	2020-04-29 03:04:10 +0200	[diff] [blame]	7197	void memmove_extent_buffer(const struct extent_buffer *dst,
				7198	unsigned long dst_offset, unsigned long src_offset,
				7199	unsigned long len)
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7200	{
				7201	size_t cur;
				7202	size_t dst_off_in_page;
				7203	size_t src_off_in_page;
				7204	unsigned long dst_end = dst_offset + len - 1;
				7205	unsigned long src_end = src_offset + len - 1;
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7206	unsigned long dst_i;
				7207	unsigned long src_i;
				7208
Qu Wenruo	f98b621	2020-08-19 14:35:47 +0800	[diff] [blame]	7209	if (check_eb_range(dst, dst_offset, len) \|\|
				7210	check_eb_range(dst, src_offset, len))
				7211	return;
Chris Mason	727011e	2010-08-06 13:21:20 -0400	[diff] [blame]	7212	if (dst_offset < src_offset) {
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7213	memcpy_extent_buffer(dst, dst_offset, src_offset, len);
				7214	return;
				7215	}
Chris Mason	d397712	2009-01-05 21:25:51 -0500	[diff] [blame]	7216	while (len > 0) {
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	7217	dst_i = get_eb_page_index(dst_end);
				7218	src_i = get_eb_page_index(src_end);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7219
Qu Wenruo	884b07d	2020-12-02 14:48:04 +0800	[diff] [blame]	7220	dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
				7221	src_off_in_page = get_eb_offset_in_page(dst, src_end);
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7222
				7223	cur = min_t(unsigned long, len, src_off_in_page + 1);
				7224	cur = min(cur, dst_off_in_page + 1);
David Sterba	fb85fc9	2014-07-31 01:03:53 +0200	[diff] [blame]	7225	copy_pages(dst->pages[dst_i], dst->pages[src_i],
Chris Mason	d1310b2	2008-01-24 16:13:08 -0500	[diff] [blame]	7226	dst_off_in_page - cur + 1,
				7227	src_off_in_page - cur + 1, cur);
				7228
				7229	dst_end -= cur;
				7230	src_end -= cur;
				7231	len -= cur;
				7232	}
				7233	}
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	7234
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7235	#define GANG_LOOKUP_SIZE 16
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7236	static struct extent_buffer *get_next_extent_buffer(
				7237	struct btrfs_fs_info fs_info, struct page page, u64 bytenr)
				7238	{
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7239	struct extent_buffer *gang[GANG_LOOKUP_SIZE];
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7240	struct extent_buffer *found = NULL;
				7241	u64 page_start = page_offset(page);
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7242	u64 cur = page_start;
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7243
				7244	ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7245	lockdep_assert_held(&fs_info->buffer_lock);
				7246
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7247	while (cur < page_start + PAGE_SIZE) {
				7248	int ret;
				7249	int i;
				7250
				7251	ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
				7252	(void **)gang, cur >> fs_info->sectorsize_bits,
				7253	min_t(unsigned int, GANG_LOOKUP_SIZE,
				7254	PAGE_SIZE / fs_info->nodesize));
				7255	if (ret == 0)
				7256	goto out;
				7257	for (i = 0; i < ret; i++) {
				7258	/* Already beyond page end */
				7259	if (gang[i]->start >= page_start + PAGE_SIZE)
				7260	goto out;
				7261	/* Found one */
				7262	if (gang[i]->start >= bytenr) {
				7263	found = gang[i];
				7264	goto out;
				7265	}
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7266	}
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7267	cur = gang[ret - 1]->start + gang[ret - 1]->len;
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7268	}
Qu Wenruo	72a69cd	2021-08-17 17:38:52 +0800	[diff] [blame]	7269	out:
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7270	return found;
				7271	}
				7272
				7273	static int try_release_subpage_extent_buffer(struct page *page)
				7274	{
				7275	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
				7276	u64 cur = page_offset(page);
				7277	const u64 end = page_offset(page) + PAGE_SIZE;
				7278	int ret;
				7279
				7280	while (cur < end) {
				7281	struct extent_buffer *eb = NULL;
				7282
				7283	/*
				7284	* Unlike try_release_extent_buffer() which uses page->private
				7285	* to grab buffer, for subpage case we rely on radix tree, thus
				7286	* we need to ensure radix tree consistency.
				7287	*
				7288	* We also want an atomic snapshot of the radix tree, thus go
				7289	* with spinlock rather than RCU.
				7290	*/
				7291	spin_lock(&fs_info->buffer_lock);
				7292	eb = get_next_extent_buffer(fs_info, page, cur);
				7293	if (!eb) {
				7294	/* No more eb in the page range after or at cur */
				7295	spin_unlock(&fs_info->buffer_lock);
				7296	break;
				7297	}
				7298	cur = eb->start + eb->len;
				7299
				7300	/*
				7301	* The same as try_release_extent_buffer(), to ensure the eb
				7302	* won't disappear out from under us.
				7303	*/
				7304	spin_lock(&eb->refs_lock);
				7305	if (atomic_read(&eb->refs) != 1 \|\| extent_buffer_under_io(eb)) {
				7306	spin_unlock(&eb->refs_lock);
				7307	spin_unlock(&fs_info->buffer_lock);
				7308	break;
				7309	}
				7310	spin_unlock(&fs_info->buffer_lock);
				7311
				7312	/*
				7313	* If tree ref isn't set then we know the ref on this eb is a
				7314	* real ref, so just return, this eb will likely be freed soon
				7315	* anyway.
				7316	*/
				7317	if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
				7318	spin_unlock(&eb->refs_lock);
				7319	break;
				7320	}
				7321
				7322	/*
				7323	* Here we don't care about the return value, we will always
				7324	* check the page private at the end. And
				7325	* release_extent_buffer() will release the refs_lock.
				7326	*/
				7327	release_extent_buffer(eb);
				7328	}
				7329	/*
				7330	* Finally to check if we have cleared page private, as if we have
				7331	* released all ebs in the page, the page private should be cleared now.
				7332	*/
				7333	spin_lock(&page->mapping->private_lock);
				7334	if (!PagePrivate(page))
				7335	ret = 1;
				7336	else
				7337	ret = 0;
				7338	spin_unlock(&page->mapping->private_lock);
				7339	return ret;
				7340
				7341	}
				7342
David Sterba	f7a52a4	2013-04-26 14:56:29 +0000	[diff] [blame]	7343	int try_release_extent_buffer(struct page *page)
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	7344	{
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	7345	struct extent_buffer *eb;
Miao Xie	897ca6e9	2010-10-26 20:57:29 -0400	[diff] [blame]	7346
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7347	if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
				7348	return try_release_subpage_extent_buffer(page);
				7349
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	7350	/*
Qu Wenruo	d1e86e3	2021-01-26 16:33:56 +0800	[diff] [blame]	7351	* We need to make sure nobody is changing page->private, as we rely on
				7352	* page->private as the pointer to extent buffer.
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	7353	*/
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7354	spin_lock(&page->mapping->private_lock);
				7355	if (!PagePrivate(page)) {
				7356	spin_unlock(&page->mapping->private_lock);
				7357	return 1;
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	7358	}
				7359
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7360	eb = (struct extent_buffer *)page->private;
				7361	BUG_ON(!eb);
Miao Xie	19fe0a8	2010-10-26 20:57:29 -0400	[diff] [blame]	7362
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	7363	/*
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7364	* This is a little awful but should be ok, we need to make sure that
				7365	* the eb doesn't disappear out from under us while we're looking at
				7366	* this page.
				7367	*/
				7368	spin_lock(&eb->refs_lock);
Josef Bacik	0b32f4b	2012-03-13 09:38:00 -0400	[diff] [blame]	7369	if (atomic_read(&eb->refs) != 1 \|\| extent_buffer_under_io(eb)) {
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7370	spin_unlock(&eb->refs_lock);
				7371	spin_unlock(&page->mapping->private_lock);
				7372	return 0;
				7373	}
				7374	spin_unlock(&page->mapping->private_lock);
				7375
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7376	/*
				7377	* If tree ref isn't set then we know the ref on this eb is a real ref,
				7378	* so just return, this page will likely be freed soon anyway.
				7379	*/
				7380	if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
				7381	spin_unlock(&eb->refs_lock);
				7382	return 0;
				7383	}
Josef Bacik	3083ee2	2012-03-09 16:01:49 -0500	[diff] [blame]	7384
David Sterba	f7a52a4	2013-04-26 14:56:29 +0000	[diff] [blame]	7385	return release_extent_buffer(eb);
Chris Mason	6af118ce	2008-07-22 11:18:07 -0400	[diff] [blame]	7386	}
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7387
				7388	/*
				7389	* btrfs_readahead_tree_block - attempt to readahead a child block
				7390	* @fs_info: the fs_info
				7391	* @bytenr: bytenr to read
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	7392	* @owner_root: objectid of the root that owns this eb
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7393	* @gen: generation for the uptodate check, can be 0
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	7394	* @level: level for the eb
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7395	*
				7396	* Attempt to readahead a tree block at @bytenr. If @gen is 0 then we do a
				7397	* normal uptodate check of the eb, without checking the generation. If we have
				7398	* to read the block we will not block on anything.
				7399	*/
				7400	void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	7401	u64 bytenr, u64 owner_root, u64 gen, int level)
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7402	{
				7403	struct extent_buffer *eb;
				7404	int ret;
				7405
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	7406	eb = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7407	if (IS_ERR(eb))
				7408	return;
				7409
				7410	if (btrfs_buffer_uptodate(eb, gen, 1)) {
				7411	free_extent_buffer(eb);
				7412	return;
				7413	}
				7414
				7415	ret = read_extent_buffer_pages(eb, WAIT_NONE, 0);
				7416	if (ret < 0)
				7417	free_extent_buffer_stale(eb);
				7418	else
				7419	free_extent_buffer(eb);
				7420	}
				7421
				7422	/*
				7423	* btrfs_readahead_node_child - readahead a node's child block
				7424	* @node: parent node we're reading from
				7425	* @slot: slot in the parent node for the child we want to read
				7426	*
				7427	* A helper for btrfs_readahead_tree_block, we simply read the bytenr pointed at
				7428	* the slot in the node provided.
				7429	*/
				7430	void btrfs_readahead_node_child(struct extent_buffer *node, int slot)
				7431	{
				7432	btrfs_readahead_tree_block(node->fs_info,
				7433	btrfs_node_blockptr(node, slot),
Josef Bacik	3fbaf25	2020-11-05 10:45:20 -0500	[diff] [blame]	7434	btrfs_header_owner(node),
				7435	btrfs_node_ptr_generation(node, slot),
				7436	btrfs_header_level(node) - 1);
Josef Bacik	bfb484d	2020-11-05 10:45:09 -0500	[diff] [blame]	7437	}