Blame - fs/btrfs/block-group.c - SHIFTPHONES/mainline/linux

blob: 763a3671b7afcad8c9cf3bf4b545954d7ed844f4 [file] [log] [blame]

Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
David Sterba	784352f	2019-08-21 18:54:28 +0200	[diff] [blame]	3	#include "misc.h"
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	4	#include "ctree.h"
				5	#include "block-group.h"
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	6	#include "space-info.h"
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	7	#include "disk-io.h"
				8	#include "free-space-cache.h"
				9	#include "free-space-tree.h"
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	10	#include "volumes.h"
				11	#include "transaction.h"
				12	#include "ref-verify.h"
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	13	#include "sysfs.h"
				14	#include "tree-log.h"
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	15	#include "delalloc-space.h"
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	16	#include "discard.h"
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	17	#include "raid56.h"
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	18
Josef Bacik	878d7b6	2019-06-20 15:38:05 -0400	[diff] [blame]	19	/*
				20	* Return target flags in extended format or 0 if restripe for this chunk_type
				21	* is not in progress
				22	*
				23	* Should be called with balance_lock held
				24	*/
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	25	static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
Josef Bacik	878d7b6	2019-06-20 15:38:05 -0400	[diff] [blame]	26	{
				27	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
				28	u64 target = 0;
				29
				30	if (!bctl)
				31	return 0;
				32
				33	if (flags & BTRFS_BLOCK_GROUP_DATA &&
				34	bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				35	target = BTRFS_BLOCK_GROUP_DATA \| bctl->data.target;
				36	} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
				37	bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				38	target = BTRFS_BLOCK_GROUP_SYSTEM \| bctl->sys.target;
				39	} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
				40	bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
				41	target = BTRFS_BLOCK_GROUP_METADATA \| bctl->meta.target;
				42	}
				43
				44	return target;
				45	}
				46
				47	/*
				48	* @flags: available profiles in extended format (see ctree.h)
				49	*
				50	* Return reduced profile in chunk format. If profile changing is in progress
				51	* (either running or paused) picks the target profile (if it's already
				52	* available), otherwise falls back to plain reducing.
				53	*/
				54	static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
				55	{
				56	u64 num_devices = fs_info->fs_devices->rw_devices;
				57	u64 target;
				58	u64 raid_type;
				59	u64 allowed = 0;
				60
				61	/*
				62	* See if restripe for this chunk_type is in progress, if so try to
				63	* reduce to the target profile
				64	*/
				65	spin_lock(&fs_info->balance_lock);
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	66	target = get_restripe_target(fs_info, flags);
Josef Bacik	878d7b6	2019-06-20 15:38:05 -0400	[diff] [blame]	67	if (target) {
Josef Bacik	162e0a1	2020-07-21 10:48:46 -0400	[diff] [blame]	68	spin_unlock(&fs_info->balance_lock);
				69	return extended_to_chunk(target);
Josef Bacik	878d7b6	2019-06-20 15:38:05 -0400	[diff] [blame]	70	}
				71	spin_unlock(&fs_info->balance_lock);
				72
				73	/* First, mask out the RAID levels which aren't possible */
				74	for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
				75	if (num_devices >= btrfs_raid_array[raid_type].devs_min)
				76	allowed \|= btrfs_raid_array[raid_type].bg_flag;
				77	}
				78	allowed &= flags;
				79
				80	if (allowed & BTRFS_BLOCK_GROUP_RAID6)
				81	allowed = BTRFS_BLOCK_GROUP_RAID6;
				82	else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
				83	allowed = BTRFS_BLOCK_GROUP_RAID5;
				84	else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
				85	allowed = BTRFS_BLOCK_GROUP_RAID10;
				86	else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
				87	allowed = BTRFS_BLOCK_GROUP_RAID1;
				88	else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
				89	allowed = BTRFS_BLOCK_GROUP_RAID0;
				90
				91	flags &= ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
				92
				93	return extended_to_chunk(flags \| allowed);
				94	}
				95
Johannes Thumshirn	ef0a82d	2020-01-02 17:14:57 +0100	[diff] [blame]	96	u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
Josef Bacik	878d7b6	2019-06-20 15:38:05 -0400	[diff] [blame]	97	{
				98	unsigned seq;
				99	u64 flags;
				100
				101	do {
				102	flags = orig_flags;
				103	seq = read_seqbegin(&fs_info->profiles_lock);
				104
				105	if (flags & BTRFS_BLOCK_GROUP_DATA)
				106	flags \|= fs_info->avail_data_alloc_bits;
				107	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				108	flags \|= fs_info->avail_system_alloc_bits;
				109	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
				110	flags \|= fs_info->avail_metadata_alloc_bits;
				111	} while (read_seqretry(&fs_info->profiles_lock, seq));
				112
				113	return btrfs_reduce_alloc_profile(fs_info, flags);
				114	}
				115
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	116	void btrfs_get_block_group(struct btrfs_block_group *cache)
Josef Bacik	3cad128	2019-06-20 15:37:46 -0400	[diff] [blame]	117	{
Josef Bacik	48aaeeb	2020-07-06 09:14:11 -0400	[diff] [blame]	118	refcount_inc(&cache->refs);
Josef Bacik	3cad128	2019-06-20 15:37:46 -0400	[diff] [blame]	119	}
				120
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	121	void btrfs_put_block_group(struct btrfs_block_group *cache)
Josef Bacik	3cad128	2019-06-20 15:37:46 -0400	[diff] [blame]	122	{
Josef Bacik	48aaeeb	2020-07-06 09:14:11 -0400	[diff] [blame]	123	if (refcount_dec_and_test(&cache->refs)) {
Josef Bacik	3cad128	2019-06-20 15:37:46 -0400	[diff] [blame]	124	WARN_ON(cache->pinned > 0);
				125	WARN_ON(cache->reserved > 0);
				126
				127	/*
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	128	* A block_group shouldn't be on the discard_list anymore.
				129	* Remove the block_group from the discard_list to prevent us
				130	* from causing a panic due to NULL pointer dereference.
				131	*/
				132	if (WARN_ON(!list_empty(&cache->discard_list)))
				133	btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
				134	cache);
				135
				136	/*
Josef Bacik	3cad128	2019-06-20 15:37:46 -0400	[diff] [blame]	137	* If not empty, someone is still holding mutex of
				138	* full_stripe_lock, which can only be released by caller.
				139	* And it will definitely cause use-after-free when caller
				140	* tries to release full stripe lock.
				141	*
				142	* No better way to resolve, but only to warn.
				143	*/
				144	WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
				145	kfree(cache->free_space_ctl);
				146	kfree(cache);
				147	}
				148	}
				149
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	150	/*
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	151	* This adds the block group to the fs_info rb tree for the block group cache
				152	*/
				153	static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	154	struct btrfs_block_group *block_group)
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	155	{
				156	struct rb_node **p;
				157	struct rb_node *parent = NULL;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	158	struct btrfs_block_group *cache;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	159
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	160	ASSERT(block_group->length != 0);
				161
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	162	spin_lock(&info->block_group_cache_lock);
				163	p = &info->block_group_cache_tree.rb_node;
				164
				165	while (*p) {
				166	parent = *p;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	167	cache = rb_entry(parent, struct btrfs_block_group, cache_node);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	168	if (block_group->start < cache->start) {
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	169	p = &(*p)->rb_left;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	170	} else if (block_group->start > cache->start) {
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	171	p = &(*p)->rb_right;
				172	} else {
				173	spin_unlock(&info->block_group_cache_lock);
				174	return -EEXIST;
				175	}
				176	}
				177
				178	rb_link_node(&block_group->cache_node, parent, p);
				179	rb_insert_color(&block_group->cache_node,
				180	&info->block_group_cache_tree);
				181
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	182	if (info->first_logical_byte > block_group->start)
				183	info->first_logical_byte = block_group->start;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	184
				185	spin_unlock(&info->block_group_cache_lock);
				186
				187	return 0;
				188	}
				189
				190	/*
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	191	* This will return the block group at or after bytenr if contains is 0, else
				192	* it will return the block group that contains the bytenr
				193	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	194	static struct btrfs_block_group *block_group_cache_tree_search(
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	195	struct btrfs_fs_info *info, u64 bytenr, int contains)
				196	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	197	struct btrfs_block_group cache, ret = NULL;
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	198	struct rb_node *n;
				199	u64 end, start;
				200
				201	spin_lock(&info->block_group_cache_lock);
				202	n = info->block_group_cache_tree.rb_node;
				203
				204	while (n) {
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	205	cache = rb_entry(n, struct btrfs_block_group, cache_node);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	206	end = cache->start + cache->length - 1;
				207	start = cache->start;
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	208
				209	if (bytenr < start) {
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	210	if (!contains && (!ret \|\| start < ret->start))
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	211	ret = cache;
				212	n = n->rb_left;
				213	} else if (bytenr > start) {
				214	if (contains && bytenr <= end) {
				215	ret = cache;
				216	break;
				217	}
				218	n = n->rb_right;
				219	} else {
				220	ret = cache;
				221	break;
				222	}
				223	}
				224	if (ret) {
				225	btrfs_get_block_group(ret);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	226	if (bytenr == 0 && info->first_logical_byte > ret->start)
				227	info->first_logical_byte = ret->start;
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	228	}
				229	spin_unlock(&info->block_group_cache_lock);
				230
				231	return ret;
				232	}
				233
				234	/*
				235	* Return the block group that starts at or after bytenr
				236	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	237	struct btrfs_block_group *btrfs_lookup_first_block_group(
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	238	struct btrfs_fs_info *info, u64 bytenr)
				239	{
				240	return block_group_cache_tree_search(info, bytenr, 0);
				241	}
				242
				243	/*
				244	* Return the block group that contains the given bytenr
				245	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	246	struct btrfs_block_group *btrfs_lookup_block_group(
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	247	struct btrfs_fs_info *info, u64 bytenr)
				248	{
				249	return block_group_cache_tree_search(info, bytenr, 1);
				250	}
				251
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	252	struct btrfs_block_group *btrfs_next_block_group(
				253	struct btrfs_block_group *cache)
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	254	{
				255	struct btrfs_fs_info *fs_info = cache->fs_info;
				256	struct rb_node *node;
				257
				258	spin_lock(&fs_info->block_group_cache_lock);
				259
				260	/* If our block group was removed, we need a full search. */
				261	if (RB_EMPTY_NODE(&cache->cache_node)) {
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	262	const u64 next_bytenr = cache->start + cache->length;
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	263
				264	spin_unlock(&fs_info->block_group_cache_lock);
				265	btrfs_put_block_group(cache);
				266	cache = btrfs_lookup_first_block_group(fs_info, next_bytenr); return cache;
				267	}
				268	node = rb_next(&cache->cache_node);
				269	btrfs_put_block_group(cache);
				270	if (node) {
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	271	cache = rb_entry(node, struct btrfs_block_group, cache_node);
Josef Bacik	2e405ad	2019-06-20 15:37:45 -0400	[diff] [blame]	272	btrfs_get_block_group(cache);
				273	} else
				274	cache = NULL;
				275	spin_unlock(&fs_info->block_group_cache_lock);
				276	return cache;
				277	}
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	278
				279	bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
				280	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	281	struct btrfs_block_group *bg;
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	282	bool ret = true;
				283
				284	bg = btrfs_lookup_block_group(fs_info, bytenr);
				285	if (!bg)
				286	return false;
				287
				288	spin_lock(&bg->lock);
				289	if (bg->ro)
				290	ret = false;
				291	else
				292	atomic_inc(&bg->nocow_writers);
				293	spin_unlock(&bg->lock);
				294
				295	/* No put on block group, done by btrfs_dec_nocow_writers */
				296	if (!ret)
				297	btrfs_put_block_group(bg);
				298
				299	return ret;
				300	}
				301
				302	void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
				303	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	304	struct btrfs_block_group *bg;
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	305
				306	bg = btrfs_lookup_block_group(fs_info, bytenr);
				307	ASSERT(bg);
				308	if (atomic_dec_and_test(&bg->nocow_writers))
				309	wake_up_var(&bg->nocow_writers);
				310	/*
				311	* Once for our lookup and once for the lookup done by a previous call
				312	* to btrfs_inc_nocow_writers()
				313	*/
				314	btrfs_put_block_group(bg);
				315	btrfs_put_block_group(bg);
				316	}
				317
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	318	void btrfs_wait_nocow_writers(struct btrfs_block_group *bg)
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	319	{
				320	wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
				321	}
				322
				323	void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
				324	const u64 start)
				325	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	326	struct btrfs_block_group *bg;
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	327
				328	bg = btrfs_lookup_block_group(fs_info, start);
				329	ASSERT(bg);
				330	if (atomic_dec_and_test(&bg->reservations))
				331	wake_up_var(&bg->reservations);
				332	btrfs_put_block_group(bg);
				333	}
				334
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	335	void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
Josef Bacik	3eeb322	2019-06-20 15:37:47 -0400	[diff] [blame]	336	{
				337	struct btrfs_space_info *space_info = bg->space_info;
				338
				339	ASSERT(bg->ro);
				340
				341	if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
				342	return;
				343
				344	/*
				345	* Our block group is read only but before we set it to read only,
				346	* some task might have had allocated an extent from it already, but it
				347	* has not yet created a respective ordered extent (and added it to a
				348	* root's list of ordered extents).
				349	* Therefore wait for any task currently allocating extents, since the
				350	* block group's reservations counter is incremented while a read lock
				351	* on the groups' semaphore is held and decremented after releasing
				352	* the read access on that semaphore and creating the ordered extent.
				353	*/
				354	down_write(&space_info->groups_sem);
				355	up_write(&space_info->groups_sem);
				356
				357	wait_var_event(&bg->reservations, !atomic_read(&bg->reservations));
				358	}
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	359
				360	struct btrfs_caching_control *btrfs_get_caching_control(
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	361	struct btrfs_block_group *cache)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	362	{
				363	struct btrfs_caching_control *ctl;
				364
				365	spin_lock(&cache->lock);
				366	if (!cache->caching_ctl) {
				367	spin_unlock(&cache->lock);
				368	return NULL;
				369	}
				370
				371	ctl = cache->caching_ctl;
				372	refcount_inc(&ctl->count);
				373	spin_unlock(&cache->lock);
				374	return ctl;
				375	}
				376
				377	void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
				378	{
				379	if (refcount_dec_and_test(&ctl->count))
				380	kfree(ctl);
				381	}
				382
				383	/*
				384	* When we wait for progress in the block group caching, its because our
				385	* allocation attempt failed at least once. So, we must sleep and let some
				386	* progress happen before we try again.
				387	*
				388	* This function will sleep at least once waiting for new free space to show
				389	* up, and then it will check the block group free space numbers for our min
				390	* num_bytes. Another option is to have it go ahead and look in the rbtree for
				391	* a free extent of a given size, but this is a good start.
				392	*
				393	* Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
				394	* any of the information in this block group.
				395	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	396	void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	397	u64 num_bytes)
				398	{
				399	struct btrfs_caching_control *caching_ctl;
				400
				401	caching_ctl = btrfs_get_caching_control(cache);
				402	if (!caching_ctl)
				403	return;
				404
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	405	wait_event(caching_ctl->wait, btrfs_block_group_done(cache) \|\|
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	406	(cache->free_space_ctl->free_space >= num_bytes));
				407
				408	btrfs_put_caching_control(caching_ctl);
				409	}
				410
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	411	int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	412	{
				413	struct btrfs_caching_control *caching_ctl;
				414	int ret = 0;
				415
				416	caching_ctl = btrfs_get_caching_control(cache);
				417	if (!caching_ctl)
				418	return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
				419
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	420	wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	421	if (cache->cached == BTRFS_CACHE_ERROR)
				422	ret = -EIO;
				423	btrfs_put_caching_control(caching_ctl);
				424	return ret;
				425	}
				426
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	427	static bool space_cache_v1_done(struct btrfs_block_group *cache)
				428	{
				429	bool ret;
				430
				431	spin_lock(&cache->lock);
				432	ret = cache->cached != BTRFS_CACHE_FAST;
				433	spin_unlock(&cache->lock);
				434
				435	return ret;
				436	}
				437
				438	void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
				439	struct btrfs_caching_control *caching_ctl)
				440	{
				441	wait_event(caching_ctl->wait, space_cache_v1_done(cache));
				442	}
				443
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	444	#ifdef CONFIG_BTRFS_DEBUG
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	445	static void fragment_free_space(struct btrfs_block_group *block_group)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	446	{
				447	struct btrfs_fs_info *fs_info = block_group->fs_info;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	448	u64 start = block_group->start;
				449	u64 len = block_group->length;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	450	u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
				451	fs_info->nodesize : fs_info->sectorsize;
				452	u64 step = chunk << 1;
				453
				454	while (len > chunk) {
				455	btrfs_remove_free_space(block_group, start, chunk);
				456	start += step;
				457	if (len < step)
				458	len = 0;
				459	else
				460	len -= step;
				461	}
				462	}
				463	#endif
				464
				465	/*
				466	* This is only called by btrfs_cache_block_group, since we could have freed
				467	* extents we need to check the pinned_extents for any extents that can't be
				468	* used yet since their free space will be released as soon as the transaction
				469	* commits.
				470	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	471	u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	472	{
				473	struct btrfs_fs_info *info = block_group->fs_info;
				474	u64 extent_start, extent_end, size, total_added = 0;
				475	int ret;
				476
				477	while (start < end) {
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	478	ret = find_first_extent_bit(&info->excluded_extents, start,
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	479	&extent_start, &extent_end,
				480	EXTENT_DIRTY \| EXTENT_UPTODATE,
				481	NULL);
				482	if (ret)
				483	break;
				484
				485	if (extent_start <= start) {
				486	start = extent_end + 1;
				487	} else if (extent_start > start && extent_start < end) {
				488	size = extent_start - start;
				489	total_added += size;
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	490	ret = btrfs_add_free_space_async_trimmed(block_group,
				491	start, size);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	492	BUG_ON(ret); /* -ENOMEM or logic error */
				493	start = extent_end + 1;
				494	} else {
				495	break;
				496	}
				497	}
				498
				499	if (start < end) {
				500	size = end - start;
				501	total_added += size;
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	502	ret = btrfs_add_free_space_async_trimmed(block_group, start,
				503	size);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	504	BUG_ON(ret); /* -ENOMEM or logic error */
				505	}
				506
				507	return total_added;
				508	}
				509
				510	static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
				511	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	512	struct btrfs_block_group *block_group = caching_ctl->block_group;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	513	struct btrfs_fs_info *fs_info = block_group->fs_info;
				514	struct btrfs_root *extent_root = fs_info->extent_root;
				515	struct btrfs_path *path;
				516	struct extent_buffer *leaf;
				517	struct btrfs_key key;
				518	u64 total_found = 0;
				519	u64 last = 0;
				520	u32 nritems;
				521	int ret;
				522	bool wakeup = true;
				523
				524	path = btrfs_alloc_path();
				525	if (!path)
				526	return -ENOMEM;
				527
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	528	last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	529
				530	#ifdef CONFIG_BTRFS_DEBUG
				531	/*
				532	* If we're fragmenting we don't want to make anybody think we can
				533	* allocate from this block group until we've had a chance to fragment
				534	* the free space.
				535	*/
				536	if (btrfs_should_fragment_free_space(block_group))
				537	wakeup = false;
				538	#endif
				539	/*
				540	* We don't want to deadlock with somebody trying to allocate a new
				541	* extent for the extent root while also trying to search the extent
				542	* root to add free space. So we skip locking and search the commit
				543	* root, since its read-only
				544	*/
				545	path->skip_locking = 1;
				546	path->search_commit_root = 1;
				547	path->reada = READA_FORWARD;
				548
				549	key.objectid = last;
				550	key.offset = 0;
				551	key.type = BTRFS_EXTENT_ITEM_KEY;
				552
				553	next:
				554	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
				555	if (ret < 0)
				556	goto out;
				557
				558	leaf = path->nodes[0];
				559	nritems = btrfs_header_nritems(leaf);
				560
				561	while (1) {
				562	if (btrfs_fs_closing(fs_info) > 1) {
				563	last = (u64)-1;
				564	break;
				565	}
				566
				567	if (path->slots[0] < nritems) {
				568	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
				569	} else {
				570	ret = btrfs_find_next_key(extent_root, path, &key, 0, 0);
				571	if (ret)
				572	break;
				573
				574	if (need_resched() \|\|
				575	rwsem_is_contended(&fs_info->commit_root_sem)) {
				576	if (wakeup)
				577	caching_ctl->progress = last;
				578	btrfs_release_path(path);
				579	up_read(&fs_info->commit_root_sem);
				580	mutex_unlock(&caching_ctl->mutex);
				581	cond_resched();
				582	mutex_lock(&caching_ctl->mutex);
				583	down_read(&fs_info->commit_root_sem);
				584	goto next;
				585	}
				586
				587	ret = btrfs_next_leaf(extent_root, path);
				588	if (ret < 0)
				589	goto out;
				590	if (ret)
				591	break;
				592	leaf = path->nodes[0];
				593	nritems = btrfs_header_nritems(leaf);
				594	continue;
				595	}
				596
				597	if (key.objectid < last) {
				598	key.objectid = last;
				599	key.offset = 0;
				600	key.type = BTRFS_EXTENT_ITEM_KEY;
				601
				602	if (wakeup)
				603	caching_ctl->progress = last;
				604	btrfs_release_path(path);
				605	goto next;
				606	}
				607
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	608	if (key.objectid < block_group->start) {
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	609	path->slots[0]++;
				610	continue;
				611	}
				612
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	613	if (key.objectid >= block_group->start + block_group->length)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	614	break;
				615
				616	if (key.type == BTRFS_EXTENT_ITEM_KEY \|\|
				617	key.type == BTRFS_METADATA_ITEM_KEY) {
				618	total_found += add_new_free_space(block_group, last,
				619	key.objectid);
				620	if (key.type == BTRFS_METADATA_ITEM_KEY)
				621	last = key.objectid +
				622	fs_info->nodesize;
				623	else
				624	last = key.objectid + key.offset;
				625
				626	if (total_found > CACHING_CTL_WAKE_UP) {
				627	total_found = 0;
				628	if (wakeup)
				629	wake_up(&caching_ctl->wait);
				630	}
				631	}
				632	path->slots[0]++;
				633	}
				634	ret = 0;
				635
				636	total_found += add_new_free_space(block_group, last,
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	637	block_group->start + block_group->length);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	638	caching_ctl->progress = (u64)-1;
				639
				640	out:
				641	btrfs_free_path(path);
				642	return ret;
				643	}
				644
				645	static noinline void caching_thread(struct btrfs_work *work)
				646	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	647	struct btrfs_block_group *block_group;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	648	struct btrfs_fs_info *fs_info;
				649	struct btrfs_caching_control *caching_ctl;
				650	int ret;
				651
				652	caching_ctl = container_of(work, struct btrfs_caching_control, work);
				653	block_group = caching_ctl->block_group;
				654	fs_info = block_group->fs_info;
				655
				656	mutex_lock(&caching_ctl->mutex);
				657	down_read(&fs_info->commit_root_sem);
				658
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	659	if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
				660	ret = load_free_space_cache(block_group);
				661	if (ret == 1) {
				662	ret = 0;
				663	goto done;
				664	}
				665
				666	/*
				667	* We failed to load the space cache, set ourselves to
				668	* CACHE_STARTED and carry on.
				669	*/
				670	spin_lock(&block_group->lock);
				671	block_group->cached = BTRFS_CACHE_STARTED;
				672	spin_unlock(&block_group->lock);
				673	wake_up(&caching_ctl->wait);
				674	}
				675
Josef Bacik	2f96e40	2021-01-15 16:26:17 -0500	[diff] [blame]	676	/*
				677	* If we are in the transaction that populated the free space tree we
				678	* can't actually cache from the free space tree as our commit root and
				679	* real root are the same, so we could change the contents of the blocks
				680	* while caching. Instead do the slow caching in this case, and after
				681	* the transaction has committed we will be safe.
				682	*/
				683	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
				684	!(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	685	ret = load_free_space_tree(caching_ctl);
				686	else
				687	ret = load_extent_tree_free(caching_ctl);
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	688	done:
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	689	spin_lock(&block_group->lock);
				690	block_group->caching_ctl = NULL;
				691	block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
				692	spin_unlock(&block_group->lock);
				693
				694	#ifdef CONFIG_BTRFS_DEBUG
				695	if (btrfs_should_fragment_free_space(block_group)) {
				696	u64 bytes_used;
				697
				698	spin_lock(&block_group->space_info->lock);
				699	spin_lock(&block_group->lock);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	700	bytes_used = block_group->length - block_group->used;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	701	block_group->space_info->bytes_used += bytes_used >> 1;
				702	spin_unlock(&block_group->lock);
				703	spin_unlock(&block_group->space_info->lock);
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	704	fragment_free_space(block_group);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	705	}
				706	#endif
				707
				708	caching_ctl->progress = (u64)-1;
				709
				710	up_read(&fs_info->commit_root_sem);
				711	btrfs_free_excluded_extents(block_group);
				712	mutex_unlock(&caching_ctl->mutex);
				713
				714	wake_up(&caching_ctl->wait);
				715
				716	btrfs_put_caching_control(caching_ctl);
				717	btrfs_put_block_group(block_group);
				718	}
				719
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	720	int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	721	{
				722	DEFINE_WAIT(wait);
				723	struct btrfs_fs_info *fs_info = cache->fs_info;
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	724	struct btrfs_caching_control *caching_ctl = NULL;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	725	int ret = 0;
				726
				727	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
				728	if (!caching_ctl)
				729	return -ENOMEM;
				730
				731	INIT_LIST_HEAD(&caching_ctl->list);
				732	mutex_init(&caching_ctl->mutex);
				733	init_waitqueue_head(&caching_ctl->wait);
				734	caching_ctl->block_group = cache;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	735	caching_ctl->progress = cache->start;
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	736	refcount_set(&caching_ctl->count, 2);
Omar Sandoval	a0cac0e	2019-09-16 11:30:57 -0700	[diff] [blame]	737	btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	738
				739	spin_lock(&cache->lock);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	740	if (cache->cached != BTRFS_CACHE_NO) {
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	741	kfree(caching_ctl);
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	742
				743	caching_ctl = cache->caching_ctl;
				744	if (caching_ctl)
				745	refcount_inc(&caching_ctl->count);
				746	spin_unlock(&cache->lock);
				747	goto out;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	748	}
				749	WARN_ON(cache->caching_ctl);
				750	cache->caching_ctl = caching_ctl;
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	751	if (btrfs_test_opt(fs_info, SPACE_CACHE))
				752	cache->cached = BTRFS_CACHE_FAST;
				753	else
				754	cache->cached = BTRFS_CACHE_STARTED;
				755	cache->has_caching_ctl = 1;
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	756	spin_unlock(&cache->lock);
				757
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	758	spin_lock(&fs_info->block_group_cache_lock);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	759	refcount_inc(&caching_ctl->count);
				760	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	761	spin_unlock(&fs_info->block_group_cache_lock);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	762
				763	btrfs_get_block_group(cache);
				764
				765	btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
Josef Bacik	e747853	2020-10-23 09:58:10 -0400	[diff] [blame]	766	out:
				767	if (load_cache_only && caching_ctl)
				768	btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
				769	if (caching_ctl)
				770	btrfs_put_caching_control(caching_ctl);
Josef Bacik	9f21246	2019-08-06 16:43:19 +0200	[diff] [blame]	771
				772	return ret;
				773	}
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	774
				775	static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
				776	{
				777	u64 extra_flags = chunk_to_extended(flags) &
				778	BTRFS_EXTENDED_PROFILE_MASK;
				779
				780	write_seqlock(&fs_info->profiles_lock);
				781	if (flags & BTRFS_BLOCK_GROUP_DATA)
				782	fs_info->avail_data_alloc_bits &= ~extra_flags;
				783	if (flags & BTRFS_BLOCK_GROUP_METADATA)
				784	fs_info->avail_metadata_alloc_bits &= ~extra_flags;
				785	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				786	fs_info->avail_system_alloc_bits &= ~extra_flags;
				787	write_sequnlock(&fs_info->profiles_lock);
				788	}
				789
				790	/*
				791	* Clear incompat bits for the following feature(s):
				792	*
				793	* - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
				794	* in the whole filesystem
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	795	*
				796	* - RAID1C34 - same as above for RAID1C3 and RAID1C4 block groups
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	797	*/
				798	static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
				799	{
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	800	bool found_raid56 = false;
				801	bool found_raid1c34 = false;
				802
				803	if ((flags & BTRFS_BLOCK_GROUP_RAID56_MASK) \|\|
				804	(flags & BTRFS_BLOCK_GROUP_RAID1C3) \|\|
				805	(flags & BTRFS_BLOCK_GROUP_RAID1C4)) {
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	806	struct list_head *head = &fs_info->space_info;
				807	struct btrfs_space_info *sinfo;
				808
				809	list_for_each_entry_rcu(sinfo, head, list) {
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	810	down_read(&sinfo->groups_sem);
				811	if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	812	found_raid56 = true;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	813	if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	814	found_raid56 = true;
				815	if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3]))
				816	found_raid1c34 = true;
				817	if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
				818	found_raid1c34 = true;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	819	up_read(&sinfo->groups_sem);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	820	}
Filipe Manana	d8e6fd5	2020-03-20 18:43:48 +0000	[diff] [blame]	821	if (!found_raid56)
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	822	btrfs_clear_fs_incompat(fs_info, RAID56);
Filipe Manana	d8e6fd5	2020-03-20 18:43:48 +0000	[diff] [blame]	823	if (!found_raid1c34)
David Sterba	9c90744	2019-10-31 15:52:01 +0100	[diff] [blame]	824	btrfs_clear_fs_incompat(fs_info, RAID1C34);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	825	}
				826	}
				827
Qu Wenruo	7357623	2020-05-05 07:58:21 +0800	[diff] [blame]	828	static int remove_block_group_item(struct btrfs_trans_handle *trans,
				829	struct btrfs_path *path,
				830	struct btrfs_block_group *block_group)
				831	{
				832	struct btrfs_fs_info *fs_info = trans->fs_info;
				833	struct btrfs_root *root;
				834	struct btrfs_key key;
				835	int ret;
				836
				837	root = fs_info->extent_root;
				838	key.objectid = block_group->start;
				839	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
				840	key.offset = block_group->length;
				841
				842	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
				843	if (ret > 0)
				844	ret = -ENOENT;
				845	if (ret < 0)
				846	return ret;
				847
				848	ret = btrfs_del_item(trans, root, path);
				849	return ret;
				850	}
				851
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	852	int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
				853	u64 group_start, struct extent_map *em)
				854	{
				855	struct btrfs_fs_info *fs_info = trans->fs_info;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	856	struct btrfs_path *path;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	857	struct btrfs_block_group *block_group;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	858	struct btrfs_free_cluster *cluster;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	859	struct inode *inode;
				860	struct kobject *kobj = NULL;
				861	int ret;
				862	int index;
				863	int factor;
				864	struct btrfs_caching_control *caching_ctl = NULL;
				865	bool remove_em;
				866	bool remove_rsv = false;
				867
				868	block_group = btrfs_lookup_block_group(fs_info, group_start);
				869	BUG_ON(!block_group);
				870	BUG_ON(!block_group->ro);
				871
				872	trace_btrfs_remove_block_group(block_group);
				873	/*
				874	* Free the reserved super bytes from this block group before
				875	* remove it.
				876	*/
				877	btrfs_free_excluded_extents(block_group);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	878	btrfs_free_ref_tree_range(fs_info, block_group->start,
				879	block_group->length);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	880
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	881	index = btrfs_bg_flags_to_raid_index(block_group->flags);
				882	factor = btrfs_bg_type_to_factor(block_group->flags);
				883
				884	/* make sure this block group isn't part of an allocation cluster */
				885	cluster = &fs_info->data_alloc_cluster;
				886	spin_lock(&cluster->refill_lock);
				887	btrfs_return_cluster_to_free_space(block_group, cluster);
				888	spin_unlock(&cluster->refill_lock);
				889
				890	/*
				891	* make sure this block group isn't part of a metadata
				892	* allocation cluster
				893	*/
				894	cluster = &fs_info->meta_alloc_cluster;
				895	spin_lock(&cluster->refill_lock);
				896	btrfs_return_cluster_to_free_space(block_group, cluster);
				897	spin_unlock(&cluster->refill_lock);
				898
				899	path = btrfs_alloc_path();
				900	if (!path) {
				901	ret = -ENOMEM;
Filipe Manana	9fecd13	2020-06-01 19:12:06 +0100	[diff] [blame]	902	goto out;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	903	}
				904
				905	/*
				906	* get the inode first so any iput calls done for the io_list
				907	* aren't the final iput (no unlinks allowed now)
				908	*/
				909	inode = lookup_free_space_inode(block_group, path);
				910
				911	mutex_lock(&trans->transaction->cache_write_mutex);
				912	/*
				913	* Make sure our free space cache IO is done before removing the
				914	* free space inode
				915	*/
				916	spin_lock(&trans->transaction->dirty_bgs_lock);
				917	if (!list_empty(&block_group->io_list)) {
				918	list_del_init(&block_group->io_list);
				919
				920	WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
				921
				922	spin_unlock(&trans->transaction->dirty_bgs_lock);
				923	btrfs_wait_cache_io(trans, block_group, path);
				924	btrfs_put_block_group(block_group);
				925	spin_lock(&trans->transaction->dirty_bgs_lock);
				926	}
				927
				928	if (!list_empty(&block_group->dirty_list)) {
				929	list_del_init(&block_group->dirty_list);
				930	remove_rsv = true;
				931	btrfs_put_block_group(block_group);
				932	}
				933	spin_unlock(&trans->transaction->dirty_bgs_lock);
				934	mutex_unlock(&trans->transaction->cache_write_mutex);
				935
Boris Burkov	36b216c	2020-11-18 15:06:25 -0800	[diff] [blame]	936	ret = btrfs_remove_free_space_inode(trans, inode, block_group);
				937	if (ret)
Filipe Manana	9fecd13	2020-06-01 19:12:06 +0100	[diff] [blame]	938	goto out;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	939
				940	spin_lock(&fs_info->block_group_cache_lock);
				941	rb_erase(&block_group->cache_node,
				942	&fs_info->block_group_cache_tree);
				943	RB_CLEAR_NODE(&block_group->cache_node);
				944
Filipe Manana	9fecd13	2020-06-01 19:12:06 +0100	[diff] [blame]	945	/* Once for the block groups rbtree */
				946	btrfs_put_block_group(block_group);
				947
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	948	if (fs_info->first_logical_byte == block_group->start)
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	949	fs_info->first_logical_byte = (u64)-1;
				950	spin_unlock(&fs_info->block_group_cache_lock);
				951
				952	down_write(&block_group->space_info->groups_sem);
				953	/*
				954	* we must use list_del_init so people can check to see if they
				955	* are still on the list after taking the semaphore
				956	*/
				957	list_del_init(&block_group->list);
				958	if (list_empty(&block_group->space_info->block_groups[index])) {
				959	kobj = block_group->space_info->block_group_kobjs[index];
				960	block_group->space_info->block_group_kobjs[index] = NULL;
				961	clear_avail_alloc_bits(fs_info, block_group->flags);
				962	}
				963	up_write(&block_group->space_info->groups_sem);
				964	clear_incompat_bg_bits(fs_info, block_group->flags);
				965	if (kobj) {
				966	kobject_del(kobj);
				967	kobject_put(kobj);
				968	}
				969
				970	if (block_group->has_caching_ctl)
				971	caching_ctl = btrfs_get_caching_control(block_group);
				972	if (block_group->cached == BTRFS_CACHE_STARTED)
				973	btrfs_wait_block_group_cache_done(block_group);
				974	if (block_group->has_caching_ctl) {
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	975	spin_lock(&fs_info->block_group_cache_lock);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	976	if (!caching_ctl) {
				977	struct btrfs_caching_control *ctl;
				978
				979	list_for_each_entry(ctl,
				980	&fs_info->caching_block_groups, list)
				981	if (ctl->block_group == block_group) {
				982	caching_ctl = ctl;
				983	refcount_inc(&caching_ctl->count);
				984	break;
				985	}
				986	}
				987	if (caching_ctl)
				988	list_del_init(&caching_ctl->list);
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	989	spin_unlock(&fs_info->block_group_cache_lock);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	990	if (caching_ctl) {
				991	/* Once for the caching bgs list and once for us. */
				992	btrfs_put_caching_control(caching_ctl);
				993	btrfs_put_caching_control(caching_ctl);
				994	}
				995	}
				996
				997	spin_lock(&trans->transaction->dirty_bgs_lock);
				998	WARN_ON(!list_empty(&block_group->dirty_list));
				999	WARN_ON(!list_empty(&block_group->io_list));
				1000	spin_unlock(&trans->transaction->dirty_bgs_lock);
				1001
				1002	btrfs_remove_free_space_cache(block_group);
				1003
				1004	spin_lock(&block_group->space_info->lock);
				1005	list_del_init(&block_group->ro_list);
				1006
				1007	if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
				1008	WARN_ON(block_group->space_info->total_bytes
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1009	< block_group->length);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1010	WARN_ON(block_group->space_info->bytes_readonly
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1011	< block_group->length);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1012	WARN_ON(block_group->space_info->disk_total
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1013	< block_group->length * factor);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1014	}
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1015	block_group->space_info->total_bytes -= block_group->length;
				1016	block_group->space_info->bytes_readonly -= block_group->length;
				1017	block_group->space_info->disk_total -= block_group->length * factor;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1018
				1019	spin_unlock(&block_group->space_info->lock);
				1020
Filipe Manana	ffcb9d4	2020-06-01 19:12:19 +0100	[diff] [blame]	1021	/*
				1022	* Remove the free space for the block group from the free space tree
				1023	* and the block group's item from the extent tree before marking the
				1024	* block group as removed. This is to prevent races with tasks that
				1025	* freeze and unfreeze a block group, this task and another task
				1026	* allocating a new block group - the unfreeze task ends up removing
				1027	* the block group's extent map before the task calling this function
				1028	* deletes the block group item from the extent tree, allowing for
				1029	* another task to attempt to create another block group with the same
				1030	* item key (and failing with -EEXIST and a transaction abort).
				1031	*/
				1032	ret = remove_block_group_free_space(trans, block_group);
				1033	if (ret)
				1034	goto out;
				1035
				1036	ret = remove_block_group_item(trans, path, block_group);
				1037	if (ret < 0)
				1038	goto out;
				1039
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1040	spin_lock(&block_group->lock);
				1041	block_group->removed = 1;
				1042	/*
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1043	* At this point trimming or scrub can't start on this block group,
				1044	* because we removed the block group from the rbtree
				1045	* fs_info->block_group_cache_tree so no one can't find it anymore and
				1046	* even if someone already got this block group before we removed it
				1047	* from the rbtree, they have already incremented block_group->frozen -
				1048	* if they didn't, for the trimming case they won't find any free space
				1049	* entries because we already removed them all when we called
				1050	* btrfs_remove_free_space_cache().
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1051	*
				1052	* And we must not remove the extent map from the fs_info->mapping_tree
				1053	* to prevent the same logical address range and physical device space
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1054	* ranges from being reused for a new block group. This is needed to
				1055	* avoid races with trimming and scrub.
				1056	*
				1057	* An fs trim operation (btrfs_trim_fs() / btrfs_ioctl_fitrim()) is
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1058	* completely transactionless, so while it is trimming a range the
				1059	* currently running transaction might finish and a new one start,
				1060	* allowing for new block groups to be created that can reuse the same
				1061	* physical device locations unless we take this special care.
				1062	*
				1063	* There may also be an implicit trim operation if the file system
				1064	* is mounted with -odiscard. The same protections must remain
				1065	* in place until the extents have been discarded completely when
				1066	* the transaction commit has completed.
				1067	*/
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1068	remove_em = (atomic_read(&block_group->frozen) == 0);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1069	spin_unlock(&block_group->lock);
				1070
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1071	if (remove_em) {
				1072	struct extent_map_tree *em_tree;
				1073
				1074	em_tree = &fs_info->mapping_tree;
				1075	write_lock(&em_tree->lock);
				1076	remove_extent_mapping(em_tree, em);
				1077	write_unlock(&em_tree->lock);
				1078	/* once for the tree */
				1079	free_extent_map(em);
				1080	}
Xiyu Yang	f6033c5	2020-04-21 10:54:11 +0800	[diff] [blame]	1081
Filipe Manana	9fecd13	2020-06-01 19:12:06 +0100	[diff] [blame]	1082	out:
Xiyu Yang	f6033c5	2020-04-21 10:54:11 +0800	[diff] [blame]	1083	/* Once for the lookup reference */
				1084	btrfs_put_block_group(block_group);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1085	if (remove_rsv)
				1086	btrfs_delayed_refs_rsv_release(fs_info, 1);
				1087	btrfs_free_path(path);
				1088	return ret;
				1089	}
				1090
				1091	struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
				1092	struct btrfs_fs_info *fs_info, const u64 chunk_offset)
				1093	{
				1094	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
				1095	struct extent_map *em;
				1096	struct map_lookup *map;
				1097	unsigned int num_items;
				1098
				1099	read_lock(&em_tree->lock);
				1100	em = lookup_extent_mapping(em_tree, chunk_offset, 1);
				1101	read_unlock(&em_tree->lock);
				1102	ASSERT(em && em->start == chunk_offset);
				1103
				1104	/*
				1105	* We need to reserve 3 + N units from the metadata space info in order
				1106	* to remove a block group (done at btrfs_remove_chunk() and at
				1107	* btrfs_remove_block_group()), which are used for:
				1108	*
				1109	* 1 unit for adding the free space inode's orphan (located in the tree
				1110	* of tree roots).
				1111	* 1 unit for deleting the block group item (located in the extent
				1112	* tree).
				1113	* 1 unit for deleting the free space item (located in tree of tree
				1114	* roots).
				1115	* N units for deleting N device extent items corresponding to each
				1116	* stripe (located in the device tree).
				1117	*
				1118	* In order to remove a block group we also need to reserve units in the
				1119	* system space info in order to update the chunk tree (update one or
				1120	* more device items and remove one chunk item), but this is done at
				1121	* btrfs_remove_chunk() through a call to check_system_chunk().
				1122	*/
				1123	map = em->map_lookup;
				1124	num_items = 3 + map->num_stripes;
				1125	free_extent_map(em);
				1126
				1127	return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root,
Josef Bacik	7f9fe61	2020-03-13 15:58:05 -0400	[diff] [blame]	1128	num_items);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1129	}
				1130
				1131	/*
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1132	* Mark block group @cache read-only, so later write won't happen to block
				1133	* group @cache.
				1134	*
				1135	* If @force is not set, this function will only mark the block group readonly
				1136	* if we have enough free space (1M) in other metadata/system block groups.
				1137	* If @force is not set, this function will mark the block group readonly
				1138	* without checking free space.
				1139	*
				1140	* NOTE: This function doesn't care if other block groups can contain all the
				1141	* data in this block group. That check should be done by relocation routine,
				1142	* not this function.
				1143	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1144	static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1145	{
				1146	struct btrfs_space_info *sinfo = cache->space_info;
				1147	u64 num_bytes;
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1148	int ret = -ENOSPC;
				1149
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1150	spin_lock(&sinfo->lock);
				1151	spin_lock(&cache->lock);
				1152
				1153	if (cache->ro) {
				1154	cache->ro++;
				1155	ret = 0;
				1156	goto out;
				1157	}
				1158
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1159	num_bytes = cache->length - cache->reserved - cache->pinned -
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	1160	cache->bytes_super - cache->used;
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1161
				1162	/*
Josef Bacik	a30a3d2	2020-01-17 09:07:39 -0500	[diff] [blame]	1163	* Data never overcommits, even in mixed mode, so do just the straight
				1164	* check of left over space in how much we have allocated.
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1165	*/
Josef Bacik	a30a3d2	2020-01-17 09:07:39 -0500	[diff] [blame]	1166	if (force) {
				1167	ret = 0;
				1168	} else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
				1169	u64 sinfo_used = btrfs_space_info_used(sinfo, true);
				1170
				1171	/*
				1172	* Here we make sure if we mark this bg RO, we still have enough
				1173	* free space as buffer.
				1174	*/
				1175	if (sinfo_used + num_bytes <= sinfo->total_bytes)
				1176	ret = 0;
				1177	} else {
				1178	/*
				1179	* We overcommit metadata, so we need to do the
				1180	* btrfs_can_overcommit check here, and we need to pass in
				1181	* BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
				1182	* leeway to allow us to mark this block group as read only.
				1183	*/
				1184	if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
				1185	BTRFS_RESERVE_NO_FLUSH))
				1186	ret = 0;
				1187	}
				1188
				1189	if (!ret) {
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1190	sinfo->bytes_readonly += num_bytes;
				1191	cache->ro++;
				1192	list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1193	}
				1194	out:
				1195	spin_unlock(&cache->lock);
				1196	spin_unlock(&sinfo->lock);
				1197	if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
				1198	btrfs_info(cache->fs_info,
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1199	"unable to make block group %llu ro", cache->start);
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1200	btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
				1201	}
				1202	return ret;
				1203	}
				1204
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1205	static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
				1206	struct btrfs_block_group *bg)
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1207	{
				1208	struct btrfs_fs_info *fs_info = bg->fs_info;
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1209	struct btrfs_transaction *prev_trans = NULL;
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1210	const u64 start = bg->start;
				1211	const u64 end = start + bg->length - 1;
				1212	int ret;
				1213
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1214	spin_lock(&fs_info->trans_lock);
				1215	if (trans->transaction->list.prev != &fs_info->trans_list) {
				1216	prev_trans = list_last_entry(&trans->transaction->list,
				1217	struct btrfs_transaction, list);
				1218	refcount_inc(&prev_trans->use_count);
				1219	}
				1220	spin_unlock(&fs_info->trans_lock);
				1221
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1222	/*
				1223	* Hold the unused_bg_unpin_mutex lock to avoid racing with
				1224	* btrfs_finish_extent_commit(). If we are at transaction N, another
				1225	* task might be running finish_extent_commit() for the previous
				1226	* transaction N - 1, and have seen a range belonging to the block
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1227	* group in pinned_extents before we were able to clear the whole block
				1228	* group range from pinned_extents. This means that task can lookup for
				1229	* the block group after we unpinned it from pinned_extents and removed
				1230	* it, leading to a BUG_ON() at unpin_extent_range().
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1231	*/
				1232	mutex_lock(&fs_info->unused_bg_unpin_mutex);
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1233	if (prev_trans) {
				1234	ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
				1235	EXTENT_DIRTY);
				1236	if (ret)
Filipe Manana	534cf53	2020-04-17 16:36:50 +0100	[diff] [blame]	1237	goto out;
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1238	}
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1239
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	1240	ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1241	EXTENT_DIRTY);
Filipe Manana	534cf53	2020-04-17 16:36:50 +0100	[diff] [blame]	1242	out:
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1243	mutex_unlock(&fs_info->unused_bg_unpin_mutex);
Filipe Manana	5150bf1	2020-04-17 16:36:15 +0100	[diff] [blame]	1244	if (prev_trans)
				1245	btrfs_put_transaction(prev_trans);
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1246
Filipe Manana	534cf53	2020-04-17 16:36:50 +0100	[diff] [blame]	1247	return ret == 0;
Nikolay Borisov	45bb5d6	2020-01-20 16:09:17 +0200	[diff] [blame]	1248	}
				1249
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	1250	/*
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1251	* Process the unused_bgs list and remove any that don't have any allocated
				1252	* space inside of them.
				1253	*/
				1254	void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
				1255	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1256	struct btrfs_block_group *block_group;
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1257	struct btrfs_space_info *space_info;
				1258	struct btrfs_trans_handle *trans;
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1259	const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1260	int ret = 0;
				1261
				1262	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
				1263	return;
				1264
				1265	spin_lock(&fs_info->unused_bgs_lock);
				1266	while (!list_empty(&fs_info->unused_bgs)) {
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1267	int trimming;
				1268
				1269	block_group = list_first_entry(&fs_info->unused_bgs,
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1270	struct btrfs_block_group,
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1271	bg_list);
				1272	list_del_init(&block_group->bg_list);
				1273
				1274	space_info = block_group->space_info;
				1275
				1276	if (ret \|\| btrfs_mixed_space_info(space_info)) {
				1277	btrfs_put_block_group(block_group);
				1278	continue;
				1279	}
				1280	spin_unlock(&fs_info->unused_bgs_lock);
				1281
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	1282	btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
				1283
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1284	mutex_lock(&fs_info->delete_unused_bgs_mutex);
				1285
				1286	/* Don't want to race with allocators so take the groups_sem */
				1287	down_write(&space_info->groups_sem);
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1288
				1289	/*
				1290	* Async discard moves the final block group discard to be prior
				1291	* to the unused_bgs code path. Therefore, if it's not fully
				1292	* trimmed, punt it back to the async discard lists.
				1293	*/
				1294	if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
				1295	!btrfs_is_free_space_trimmed(block_group)) {
				1296	trace_btrfs_skip_unused_block_group(block_group);
				1297	up_write(&space_info->groups_sem);
				1298	/* Requeue if we failed because of async discard */
				1299	btrfs_discard_queue_work(&fs_info->discard_ctl,
				1300	block_group);
				1301	goto next;
				1302	}
				1303
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1304	spin_lock(&block_group->lock);
				1305	if (block_group->reserved \|\| block_group->pinned \|\|
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	1306	block_group->used \|\| block_group->ro \|\|
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1307	list_is_singular(&block_group->list)) {
				1308	/*
				1309	* We want to bail if we made new allocations or have
				1310	* outstanding allocations in this block group. We do
				1311	* the ro check in case balance is currently acting on
				1312	* this block group.
				1313	*/
				1314	trace_btrfs_skip_unused_block_group(block_group);
				1315	spin_unlock(&block_group->lock);
				1316	up_write(&space_info->groups_sem);
				1317	goto next;
				1318	}
				1319	spin_unlock(&block_group->lock);
				1320
				1321	/* We don't want to force the issue, only flip if it's ok. */
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	1322	ret = inc_block_group_ro(block_group, 0);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1323	up_write(&space_info->groups_sem);
				1324	if (ret < 0) {
				1325	ret = 0;
				1326	goto next;
				1327	}
				1328
				1329	/*
				1330	* Want to do this before we do anything else so we can recover
				1331	* properly if we fail to join the transaction.
				1332	*/
				1333	trans = btrfs_start_trans_remove_block_group(fs_info,
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1334	block_group->start);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1335	if (IS_ERR(trans)) {
				1336	btrfs_dec_block_group_ro(block_group);
				1337	ret = PTR_ERR(trans);
				1338	goto next;
				1339	}
				1340
				1341	/*
				1342	* We could have pending pinned extents for this block group,
				1343	* just delete them, we don't care about them anymore.
				1344	*/
Filipe Manana	534cf53	2020-04-17 16:36:50 +0100	[diff] [blame]	1345	if (!clean_pinned_extents(trans, block_group)) {
				1346	btrfs_dec_block_group_ro(block_group);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1347	goto end_trans;
Filipe Manana	534cf53	2020-04-17 16:36:50 +0100	[diff] [blame]	1348	}
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1349
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	1350	/*
				1351	* At this point, the block_group is read only and should fail
				1352	* new allocations. However, btrfs_finish_extent_commit() can
				1353	* cause this block_group to be placed back on the discard
				1354	* lists because now the block_group isn't fully discarded.
				1355	* Bail here and try again later after discarding everything.
				1356	*/
				1357	spin_lock(&fs_info->discard_ctl.lock);
				1358	if (!list_empty(&block_group->discard_list)) {
				1359	spin_unlock(&fs_info->discard_ctl.lock);
				1360	btrfs_dec_block_group_ro(block_group);
				1361	btrfs_discard_queue_work(&fs_info->discard_ctl,
				1362	block_group);
				1363	goto end_trans;
				1364	}
				1365	spin_unlock(&fs_info->discard_ctl.lock);
				1366
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1367	/* Reset pinned so btrfs_put_block_group doesn't complain */
				1368	spin_lock(&space_info->lock);
				1369	spin_lock(&block_group->lock);
				1370
				1371	btrfs_space_info_update_bytes_pinned(fs_info, space_info,
				1372	-block_group->pinned);
				1373	space_info->bytes_readonly += block_group->pinned;
Josef Bacik	2187374	2021-01-15 16:48:55 -0500	[diff] [blame^]	1374	__btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1375	block_group->pinned = 0;
				1376
				1377	spin_unlock(&block_group->lock);
				1378	spin_unlock(&space_info->lock);
				1379
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1380	/*
				1381	* The normal path here is an unused block group is passed here,
				1382	* then trimming is handled in the transaction commit path.
				1383	* Async discard interposes before this to do the trimming
				1384	* before coming down the unused block group path as trimming
				1385	* will no longer be done later in the transaction commit path.
				1386	*/
				1387	if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
				1388	goto flip_async;
				1389
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1390	/* DISCARD can flip during remount */
Dennis Zhou	46b27f5	2019-12-13 16:22:11 -0800	[diff] [blame]	1391	trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1392
				1393	/* Implicit trim during transaction commit. */
				1394	if (trimming)
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1395	btrfs_freeze_block_group(block_group);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1396
				1397	/*
				1398	* Btrfs_remove_chunk will abort the transaction if things go
				1399	* horribly wrong.
				1400	*/
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1401	ret = btrfs_remove_chunk(trans, block_group->start);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1402
				1403	if (ret) {
				1404	if (trimming)
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1405	btrfs_unfreeze_block_group(block_group);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1406	goto end_trans;
				1407	}
				1408
				1409	/*
				1410	* If we're not mounted with -odiscard, we can just forget
				1411	* about this block group. Otherwise we'll need to wait
				1412	* until transaction commit to do the actual discard.
				1413	*/
				1414	if (trimming) {
				1415	spin_lock(&fs_info->unused_bgs_lock);
				1416	/*
				1417	* A concurrent scrub might have added us to the list
				1418	* fs_info->unused_bgs, so use a list_move operation
				1419	* to add the block group to the deleted_bgs list.
				1420	*/
				1421	list_move(&block_group->bg_list,
				1422	&trans->transaction->deleted_bgs);
				1423	spin_unlock(&fs_info->unused_bgs_lock);
				1424	btrfs_get_block_group(block_group);
				1425	}
				1426	end_trans:
				1427	btrfs_end_transaction(trans);
				1428	next:
				1429	mutex_unlock(&fs_info->delete_unused_bgs_mutex);
				1430	btrfs_put_block_group(block_group);
				1431	spin_lock(&fs_info->unused_bgs_lock);
				1432	}
				1433	spin_unlock(&fs_info->unused_bgs_lock);
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1434	return;
				1435
				1436	flip_async:
				1437	btrfs_end_transaction(trans);
				1438	mutex_unlock(&fs_info->delete_unused_bgs_mutex);
				1439	btrfs_put_block_group(block_group);
				1440	btrfs_discard_punt_unused_bgs_list(fs_info);
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1441	}
				1442
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1443	void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
Josef Bacik	e3e0520	2019-06-20 15:37:55 -0400	[diff] [blame]	1444	{
				1445	struct btrfs_fs_info *fs_info = bg->fs_info;
				1446
				1447	spin_lock(&fs_info->unused_bgs_lock);
				1448	if (list_empty(&bg->bg_list)) {
				1449	btrfs_get_block_group(bg);
				1450	trace_btrfs_add_unused_block_group(bg);
				1451	list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
				1452	}
				1453	spin_unlock(&fs_info->unused_bgs_lock);
				1454	}
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1455
Johannes Thumshirn	e3ba67a	2020-06-02 19:05:57 +0900	[diff] [blame]	1456	static int read_bg_from_eb(struct btrfs_fs_info fs_info, struct btrfs_key key,
				1457	struct btrfs_path *path)
				1458	{
				1459	struct extent_map_tree *em_tree;
				1460	struct extent_map *em;
				1461	struct btrfs_block_group_item bg;
				1462	struct extent_buffer *leaf;
				1463	int slot;
				1464	u64 flags;
				1465	int ret = 0;
				1466
				1467	slot = path->slots[0];
				1468	leaf = path->nodes[0];
				1469
				1470	em_tree = &fs_info->mapping_tree;
				1471	read_lock(&em_tree->lock);
				1472	em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
				1473	read_unlock(&em_tree->lock);
				1474	if (!em) {
				1475	btrfs_err(fs_info,
				1476	"logical %llu len %llu found bg but no related chunk",
				1477	key->objectid, key->offset);
				1478	return -ENOENT;
				1479	}
				1480
				1481	if (em->start != key->objectid \|\| em->len != key->offset) {
				1482	btrfs_err(fs_info,
				1483	"block group %llu len %llu mismatch with chunk %llu len %llu",
				1484	key->objectid, key->offset, em->start, em->len);
				1485	ret = -EUCLEAN;
				1486	goto out_free_em;
				1487	}
				1488
				1489	read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
				1490	sizeof(bg));
				1491	flags = btrfs_stack_block_group_flags(&bg) &
				1492	BTRFS_BLOCK_GROUP_TYPE_MASK;
				1493
				1494	if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
				1495	btrfs_err(fs_info,
				1496	"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
				1497	key->objectid, key->offset, flags,
				1498	(BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
				1499	ret = -EUCLEAN;
				1500	}
				1501
				1502	out_free_em:
				1503	free_extent_map(em);
				1504	return ret;
				1505	}
				1506
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1507	static int find_first_block_group(struct btrfs_fs_info *fs_info,
				1508	struct btrfs_path *path,
				1509	struct btrfs_key *key)
				1510	{
				1511	struct btrfs_root *root = fs_info->extent_root;
Johannes Thumshirn	e3ba67a	2020-06-02 19:05:57 +0900	[diff] [blame]	1512	int ret;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1513	struct btrfs_key found_key;
				1514	struct extent_buffer *leaf;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1515	int slot;
				1516
				1517	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
				1518	if (ret < 0)
Johannes Thumshirn	e3ba67a	2020-06-02 19:05:57 +0900	[diff] [blame]	1519	return ret;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1520
				1521	while (1) {
				1522	slot = path->slots[0];
				1523	leaf = path->nodes[0];
				1524	if (slot >= btrfs_header_nritems(leaf)) {
				1525	ret = btrfs_next_leaf(root, path);
				1526	if (ret == 0)
				1527	continue;
				1528	if (ret < 0)
				1529	goto out;
				1530	break;
				1531	}
				1532	btrfs_item_key_to_cpu(leaf, &found_key, slot);
				1533
				1534	if (found_key.objectid >= key->objectid &&
				1535	found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
Johannes Thumshirn	e3ba67a	2020-06-02 19:05:57 +0900	[diff] [blame]	1536	ret = read_bg_from_eb(fs_info, &found_key, path);
				1537	break;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1538	}
Johannes Thumshirn	e3ba67a	2020-06-02 19:05:57 +0900	[diff] [blame]	1539
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1540	path->slots[0]++;
				1541	}
				1542	out:
				1543	return ret;
				1544	}
				1545
				1546	static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
				1547	{
				1548	u64 extra_flags = chunk_to_extended(flags) &
				1549	BTRFS_EXTENDED_PROFILE_MASK;
				1550
				1551	write_seqlock(&fs_info->profiles_lock);
				1552	if (flags & BTRFS_BLOCK_GROUP_DATA)
				1553	fs_info->avail_data_alloc_bits \|= extra_flags;
				1554	if (flags & BTRFS_BLOCK_GROUP_METADATA)
				1555	fs_info->avail_metadata_alloc_bits \|= extra_flags;
				1556	if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
				1557	fs_info->avail_system_alloc_bits \|= extra_flags;
				1558	write_sequnlock(&fs_info->profiles_lock);
				1559	}
				1560
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1561	/**
Nikolay Borisov	9ee9b97	2021-01-22 11:57:58 +0200	[diff] [blame]	1562	* Map a physical disk address to a list of logical addresses
				1563	*
				1564	* @fs_info: the filesystem
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1565	* @chunk_start: logical address of block group
				1566	* @physical: physical address to map to logical addresses
				1567	* @logical: return array of logical addresses which map to @physical
				1568	* @naddrs: length of @logical
				1569	* @stripe_len: size of IO stripe for the given block group
				1570	*
				1571	* Maps a particular @physical disk address to a list of @logical addresses.
				1572	* Used primarily to exclude those portions of a block group that contain super
				1573	* block copies.
				1574	*/
				1575	EXPORT_FOR_TESTS
				1576	int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
				1577	u64 physical, u64 *logical, int naddrs, int *stripe_len)
				1578	{
				1579	struct extent_map *em;
				1580	struct map_lookup *map;
				1581	u64 *buf;
				1582	u64 bytenr;
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1583	u64 data_stripe_length;
				1584	u64 io_stripe_size;
				1585	int i, nr = 0;
				1586	int ret = 0;
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1587
				1588	em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
				1589	if (IS_ERR(em))
				1590	return -EIO;
				1591
				1592	map = em->map_lookup;
Nikolay Borisov	9e22b92	2020-04-03 16:40:34 +0300	[diff] [blame]	1593	data_stripe_length = em->orig_block_len;
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1594	io_stripe_size = map->stripe_len;
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1595
Nikolay Borisov	9e22b92	2020-04-03 16:40:34 +0300	[diff] [blame]	1596	/* For RAID5/6 adjust to a full IO stripe length */
				1597	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1598	io_stripe_size = map->stripe_len * nr_data_stripes(map);
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1599
				1600	buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1601	if (!buf) {
				1602	ret = -ENOMEM;
				1603	goto out;
				1604	}
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1605
				1606	for (i = 0; i < map->num_stripes; i++) {
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1607	bool already_inserted = false;
				1608	u64 stripe_nr;
				1609	int j;
				1610
				1611	if (!in_range(physical, map->stripes[i].physical,
				1612	data_stripe_length))
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1613	continue;
				1614
				1615	stripe_nr = physical - map->stripes[i].physical;
				1616	stripe_nr = div64_u64(stripe_nr, map->stripe_len);
				1617
				1618	if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
				1619	stripe_nr = stripe_nr * map->num_stripes + i;
				1620	stripe_nr = div_u64(stripe_nr, map->sub_stripes);
				1621	} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
				1622	stripe_nr = stripe_nr * map->num_stripes + i;
				1623	}
				1624	/*
				1625	* The remaining case would be for RAID56, multiply by
				1626	* nr_data_stripes(). Alternatively, just use rmap_len below
				1627	* instead of map->stripe_len
				1628	*/
				1629
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1630	bytenr = chunk_start + stripe_nr * io_stripe_size;
				1631
				1632	/* Ensure we don't add duplicate addresses */
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1633	for (j = 0; j < nr; j++) {
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1634	if (buf[j] == bytenr) {
				1635	already_inserted = true;
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1636	break;
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1637	}
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1638	}
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1639
				1640	if (!already_inserted)
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1641	buf[nr++] = bytenr;
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1642	}
				1643
				1644	*logical = buf;
				1645	*naddrs = nr;
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1646	*stripe_len = io_stripe_size;
				1647	out:
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1648	free_extent_map(em);
Nikolay Borisov	1776ad1	2019-11-19 14:05:53 +0200	[diff] [blame]	1649	return ret;
Nikolay Borisov	96a1433	2019-12-10 19:57:51 +0200	[diff] [blame]	1650	}
				1651
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1652	static int exclude_super_stripes(struct btrfs_block_group *cache)
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1653	{
				1654	struct btrfs_fs_info *fs_info = cache->fs_info;
Naohiro Aota	1265925	2020-11-10 20:26:14 +0900	[diff] [blame]	1655	const bool zoned = btrfs_is_zoned(fs_info);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1656	u64 bytenr;
				1657	u64 *logical;
				1658	int stripe_len;
				1659	int i, nr, ret;
				1660
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1661	if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
				1662	stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1663	cache->bytes_super += stripe_len;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1664	ret = btrfs_add_excluded_extent(fs_info, cache->start,
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1665	stripe_len);
				1666	if (ret)
				1667	return ret;
				1668	}
				1669
				1670	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
				1671	bytenr = btrfs_sb_offset(i);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1672	ret = btrfs_rmap_block(fs_info, cache->start,
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1673	bytenr, &logical, &nr, &stripe_len);
				1674	if (ret)
				1675	return ret;
				1676
Naohiro Aota	1265925	2020-11-10 20:26:14 +0900	[diff] [blame]	1677	/* Shouldn't have super stripes in sequential zones */
				1678	if (zoned && nr) {
				1679	btrfs_err(fs_info,
				1680	"zoned: block group %llu must not contain super block",
				1681	cache->start);
				1682	return -EUCLEAN;
				1683	}
				1684
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1685	while (nr--) {
Nikolay Borisov	96f9b0f	2020-04-03 16:40:35 +0300	[diff] [blame]	1686	u64 len = min_t(u64, stripe_len,
				1687	cache->start + cache->length - logical[nr]);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1688
				1689	cache->bytes_super += len;
Nikolay Borisov	96f9b0f	2020-04-03 16:40:35 +0300	[diff] [blame]	1690	ret = btrfs_add_excluded_extent(fs_info, logical[nr],
				1691	len);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1692	if (ret) {
				1693	kfree(logical);
				1694	return ret;
				1695	}
				1696	}
				1697
				1698	kfree(logical);
				1699	}
				1700	return 0;
				1701	}
				1702
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1703	static void link_block_group(struct btrfs_block_group *cache)
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1704	{
				1705	struct btrfs_space_info *space_info = cache->space_info;
				1706	int index = btrfs_bg_flags_to_raid_index(cache->flags);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1707
				1708	down_write(&space_info->groups_sem);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1709	list_add_tail(&cache->list, &space_info->block_groups[index]);
				1710	up_write(&space_info->groups_sem);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1711	}
				1712
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1713	static struct btrfs_block_group *btrfs_create_block_group_cache(
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1714	struct btrfs_fs_info *fs_info, u64 start)
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1715	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1716	struct btrfs_block_group *cache;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1717
				1718	cache = kzalloc(sizeof(*cache), GFP_NOFS);
				1719	if (!cache)
				1720	return NULL;
				1721
				1722	cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
				1723	GFP_NOFS);
				1724	if (!cache->free_space_ctl) {
				1725	kfree(cache);
				1726	return NULL;
				1727	}
				1728
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1729	cache->start = start;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1730
				1731	cache->fs_info = fs_info;
				1732	cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1733
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1734	cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
				1735
Josef Bacik	48aaeeb	2020-07-06 09:14:11 -0400	[diff] [blame]	1736	refcount_set(&cache->refs, 1);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1737	spin_lock_init(&cache->lock);
				1738	init_rwsem(&cache->data_rwsem);
				1739	INIT_LIST_HEAD(&cache->list);
				1740	INIT_LIST_HEAD(&cache->cluster_list);
				1741	INIT_LIST_HEAD(&cache->bg_list);
				1742	INIT_LIST_HEAD(&cache->ro_list);
Dennis Zhou	b0643e5	2019-12-13 16:22:14 -0800	[diff] [blame]	1743	INIT_LIST_HEAD(&cache->discard_list);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1744	INIT_LIST_HEAD(&cache->dirty_list);
				1745	INIT_LIST_HEAD(&cache->io_list);
Josef Bacik	cd79909	2020-10-23 09:58:08 -0400	[diff] [blame]	1746	btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
Filipe Manana	6b7304a	2020-05-08 11:01:47 +0100	[diff] [blame]	1747	atomic_set(&cache->frozen, 0);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1748	mutex_init(&cache->free_space_lock);
				1749	btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
				1750
				1751	return cache;
				1752	}
				1753
				1754	/*
				1755	* Iterate all chunks and verify that each of them has the corresponding block
				1756	* group
				1757	*/
				1758	static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
				1759	{
				1760	struct extent_map_tree *map_tree = &fs_info->mapping_tree;
				1761	struct extent_map *em;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1762	struct btrfs_block_group *bg;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1763	u64 start = 0;
				1764	int ret = 0;
				1765
				1766	while (1) {
				1767	read_lock(&map_tree->lock);
				1768	/*
				1769	* lookup_extent_mapping will return the first extent map
				1770	* intersecting the range, so setting @len to 1 is enough to
				1771	* get the first chunk.
				1772	*/
				1773	em = lookup_extent_mapping(map_tree, start, 1);
				1774	read_unlock(&map_tree->lock);
				1775	if (!em)
				1776	break;
				1777
				1778	bg = btrfs_lookup_block_group(fs_info, em->start);
				1779	if (!bg) {
				1780	btrfs_err(fs_info,
				1781	"chunk start=%llu len=%llu doesn't have corresponding block group",
				1782	em->start, em->len);
				1783	ret = -EUCLEAN;
				1784	free_extent_map(em);
				1785	break;
				1786	}
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1787	if (bg->start != em->start \|\| bg->length != em->len \|\|
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1788	(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
				1789	(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
				1790	btrfs_err(fs_info,
				1791	"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
				1792	em->start, em->len,
				1793	em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	1794	bg->start, bg->length,
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1795	bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
				1796	ret = -EUCLEAN;
				1797	free_extent_map(em);
				1798	btrfs_put_block_group(bg);
				1799	break;
				1800	}
				1801	start = em->start + em->len;
				1802	free_extent_map(em);
				1803	btrfs_put_block_group(bg);
				1804	}
				1805	return ret;
				1806	}
				1807
Marcos Paulo de Souza	4c448ce	2020-08-17 10:56:10 -0300	[diff] [blame]	1808	static void read_block_group_item(struct btrfs_block_group *cache,
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1809	struct btrfs_path *path,
				1810	const struct btrfs_key *key)
				1811	{
				1812	struct extent_buffer *leaf = path->nodes[0];
				1813	struct btrfs_block_group_item bgi;
				1814	int slot = path->slots[0];
				1815
				1816	cache->length = key->offset;
				1817
				1818	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
				1819	sizeof(bgi));
				1820	cache->used = btrfs_stack_block_group_used(&bgi);
				1821	cache->flags = btrfs_stack_block_group_flags(&bgi);
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1822	}
				1823
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1824	static int read_one_block_group(struct btrfs_fs_info *info,
				1825	struct btrfs_path *path,
Qu Wenruo	d49a2dd	2019-11-05 09:35:35 +0800	[diff] [blame]	1826	const struct btrfs_key *key,
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1827	int need_clear)
				1828	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1829	struct btrfs_block_group *cache;
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1830	struct btrfs_space_info *space_info;
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1831	const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1832	int ret;
				1833
Qu Wenruo	d49a2dd	2019-11-05 09:35:35 +0800	[diff] [blame]	1834	ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1835
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1836	cache = btrfs_create_block_group_cache(info, key->objectid);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1837	if (!cache)
				1838	return -ENOMEM;
				1839
Marcos Paulo de Souza	4c448ce	2020-08-17 10:56:10 -0300	[diff] [blame]	1840	read_block_group_item(cache, path, key);
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1841
Marcos Paulo de Souza	e3e39c7	2020-08-21 11:54:44 -0300	[diff] [blame]	1842	set_free_space_tree_thresholds(cache);
				1843
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1844	if (need_clear) {
				1845	/*
				1846	* When we mount with old space cache, we need to
				1847	* set BTRFS_DC_CLEAR and set dirty flag.
				1848	*
				1849	* a) Setting 'BTRFS_DC_CLEAR' makes sure that we
				1850	* truncate the old free space cache inode and
				1851	* setup a new one.
				1852	* b) Setting 'dirty flag' makes sure that we flush
				1853	* the new space cache info onto disk.
				1854	*/
				1855	if (btrfs_test_opt(info, SPACE_CACHE))
				1856	cache->disk_cache_state = BTRFS_DC_CLEAR;
				1857	}
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1858	if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
				1859	(cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
				1860	btrfs_err(info,
				1861	"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
				1862	cache->start);
				1863	ret = -EINVAL;
				1864	goto error;
				1865	}
				1866
				1867	/*
				1868	* We need to exclude the super stripes now so that the space info has
				1869	* super bytes accounted for, otherwise we'll think we have more space
				1870	* than we actually do.
				1871	*/
				1872	ret = exclude_super_stripes(cache);
				1873	if (ret) {
				1874	/* We may have excluded something, so call this just in case. */
				1875	btrfs_free_excluded_extents(cache);
				1876	goto error;
				1877	}
				1878
				1879	/*
				1880	* Check for two cases, either we are full, and therefore don't need
				1881	* to bother with the caching work since we won't find any space, or we
				1882	* are empty, and we can just add all the space in and be done with it.
				1883	* This saves us _a_lot_ of time, particularly in the full case.
				1884	*/
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1885	if (cache->length == cache->used) {
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1886	cache->last_byte_to_unpin = (u64)-1;
				1887	cache->cached = BTRFS_CACHE_FINISHED;
				1888	btrfs_free_excluded_extents(cache);
				1889	} else if (cache->used == 0) {
				1890	cache->last_byte_to_unpin = (u64)-1;
				1891	cache->cached = BTRFS_CACHE_FINISHED;
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1892	add_new_free_space(cache, cache->start,
				1893	cache->start + cache->length);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1894	btrfs_free_excluded_extents(cache);
				1895	}
				1896
				1897	ret = btrfs_add_block_group_cache(info, cache);
				1898	if (ret) {
				1899	btrfs_remove_free_space_cache(cache);
				1900	goto error;
				1901	}
				1902	trace_btrfs_add_block_group(info, cache, 0);
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	1903	btrfs_update_space_info(info, cache->flags, cache->length,
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1904	cache->used, cache->bytes_super, &space_info);
				1905
				1906	cache->space_info = space_info;
				1907
				1908	link_block_group(cache);
				1909
				1910	set_avail_alloc_bits(info, cache->flags);
				1911	if (btrfs_chunk_readonly(info, cache->start)) {
				1912	inc_block_group_ro(cache, 1);
				1913	} else if (cache->used == 0) {
				1914	ASSERT(list_empty(&cache->bg_list));
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	1915	if (btrfs_test_opt(info, DISCARD_ASYNC))
				1916	btrfs_discard_queue_work(&info->discard_ctl, cache);
				1917	else
				1918	btrfs_mark_bg_unused(cache);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	1919	}
				1920	return 0;
				1921	error:
				1922	btrfs_put_block_group(cache);
				1923	return ret;
				1924	}
				1925
Josef Bacik	42437a6	2020-10-16 11:29:18 -0400	[diff] [blame]	1926	static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
				1927	{
				1928	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
				1929	struct btrfs_space_info *space_info;
				1930	struct rb_node *node;
				1931	int ret = 0;
				1932
				1933	for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
				1934	struct extent_map *em;
				1935	struct map_lookup *map;
				1936	struct btrfs_block_group *bg;
				1937
				1938	em = rb_entry(node, struct extent_map, rb_node);
				1939	map = em->map_lookup;
				1940	bg = btrfs_create_block_group_cache(fs_info, em->start);
				1941	if (!bg) {
				1942	ret = -ENOMEM;
				1943	break;
				1944	}
				1945
				1946	/* Fill dummy cache as FULL */
				1947	bg->length = em->len;
				1948	bg->flags = map->type;
				1949	bg->last_byte_to_unpin = (u64)-1;
				1950	bg->cached = BTRFS_CACHE_FINISHED;
				1951	bg->used = em->len;
				1952	bg->flags = map->type;
				1953	ret = btrfs_add_block_group_cache(fs_info, bg);
				1954	if (ret) {
				1955	btrfs_remove_free_space_cache(bg);
				1956	btrfs_put_block_group(bg);
				1957	break;
				1958	}
				1959	btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
				1960	0, &space_info);
				1961	bg->space_info = space_info;
				1962	link_block_group(bg);
				1963
				1964	set_avail_alloc_bits(fs_info, bg->flags);
				1965	}
				1966	if (!ret)
				1967	btrfs_init_global_block_rsv(fs_info);
				1968	return ret;
				1969	}
				1970
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1971	int btrfs_read_block_groups(struct btrfs_fs_info *info)
				1972	{
				1973	struct btrfs_path *path;
				1974	int ret;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	1975	struct btrfs_block_group *cache;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1976	struct btrfs_space_info *space_info;
				1977	struct btrfs_key key;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1978	int need_clear = 0;
				1979	u64 cache_gen;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1980
Josef Bacik	42437a6	2020-10-16 11:29:18 -0400	[diff] [blame]	1981	if (!info->extent_root)
				1982	return fill_dummy_bgs(info);
				1983
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1984	key.objectid = 0;
				1985	key.offset = 0;
				1986	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
				1987	path = btrfs_alloc_path();
				1988	if (!path)
				1989	return -ENOMEM;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	1990
				1991	cache_gen = btrfs_super_cache_generation(info->super_copy);
				1992	if (btrfs_test_opt(info, SPACE_CACHE) &&
				1993	btrfs_super_generation(info->super_copy) != cache_gen)
				1994	need_clear = 1;
				1995	if (btrfs_test_opt(info, CLEAR_CACHE))
				1996	need_clear = 1;
				1997
				1998	while (1) {
				1999	ret = find_first_block_group(info, path, &key);
				2000	if (ret > 0)
				2001	break;
				2002	if (ret != 0)
				2003	goto error;
				2004
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	2005	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
Qu Wenruo	d49a2dd	2019-11-05 09:35:35 +0800	[diff] [blame]	2006	ret = read_one_block_group(info, path, &key, need_clear);
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	2007	if (ret < 0)
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2008	goto error;
Qu Wenruo	ffb9e0f	2019-10-10 10:39:27 +0800	[diff] [blame]	2009	key.objectid += key.offset;
				2010	key.offset = 0;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2011	btrfs_release_path(path);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2012	}
Josef Bacik	7837fa8	2020-10-14 17:00:51 -0400	[diff] [blame]	2013	btrfs_release_path(path);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2014
Josef Bacik	7280490	2020-09-01 17:40:37 -0400	[diff] [blame]	2015	list_for_each_entry(space_info, &info->space_info, list) {
Josef Bacik	49ea112	2020-09-01 17:40:38 -0400	[diff] [blame]	2016	int i;
				2017
				2018	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
				2019	if (list_empty(&space_info->block_groups[i]))
				2020	continue;
				2021	cache = list_first_entry(&space_info->block_groups[i],
				2022	struct btrfs_block_group,
				2023	list);
				2024	btrfs_sysfs_add_block_group_type(cache);
				2025	}
				2026
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2027	if (!(btrfs_get_alloc_profile(info, space_info->flags) &
				2028	(BTRFS_BLOCK_GROUP_RAID10 \|
				2029	BTRFS_BLOCK_GROUP_RAID1_MASK \|
				2030	BTRFS_BLOCK_GROUP_RAID56_MASK \|
				2031	BTRFS_BLOCK_GROUP_DUP)))
				2032	continue;
				2033	/*
				2034	* Avoid allocating from un-mirrored block group if there are
				2035	* mirrored block groups.
				2036	*/
				2037	list_for_each_entry(cache,
				2038	&space_info->block_groups[BTRFS_RAID_RAID0],
				2039	list)
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	2040	inc_block_group_ro(cache, 1);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2041	list_for_each_entry(cache,
				2042	&space_info->block_groups[BTRFS_RAID_SINGLE],
				2043	list)
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	2044	inc_block_group_ro(cache, 1);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2045	}
				2046
				2047	btrfs_init_global_block_rsv(info);
				2048	ret = check_chunk_block_group_mappings(info);
				2049	error:
				2050	btrfs_free_path(path);
				2051	return ret;
				2052	}
				2053
Qu Wenruo	97f4728	2020-05-05 07:58:22 +0800	[diff] [blame]	2054	static int insert_block_group_item(struct btrfs_trans_handle *trans,
				2055	struct btrfs_block_group *block_group)
				2056	{
				2057	struct btrfs_fs_info *fs_info = trans->fs_info;
				2058	struct btrfs_block_group_item bgi;
				2059	struct btrfs_root *root;
				2060	struct btrfs_key key;
				2061
				2062	spin_lock(&block_group->lock);
				2063	btrfs_set_stack_block_group_used(&bgi, block_group->used);
				2064	btrfs_set_stack_block_group_chunk_objectid(&bgi,
				2065	BTRFS_FIRST_CHUNK_TREE_OBJECTID);
				2066	btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
				2067	key.objectid = block_group->start;
				2068	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
				2069	key.offset = block_group->length;
				2070	spin_unlock(&block_group->lock);
				2071
				2072	root = fs_info->extent_root;
				2073	return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
				2074	}
				2075
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2076	void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
				2077	{
				2078	struct btrfs_fs_info *fs_info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2079	struct btrfs_block_group *block_group;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2080	int ret = 0;
				2081
				2082	if (!trans->can_flush_pending_bgs)
				2083	return;
				2084
				2085	while (!list_empty(&trans->new_bgs)) {
Josef Bacik	49ea112	2020-09-01 17:40:38 -0400	[diff] [blame]	2086	int index;
				2087
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2088	block_group = list_first_entry(&trans->new_bgs,
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2089	struct btrfs_block_group,
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2090	bg_list);
				2091	if (ret)
				2092	goto next;
				2093
Josef Bacik	49ea112	2020-09-01 17:40:38 -0400	[diff] [blame]	2094	index = btrfs_bg_flags_to_raid_index(block_group->flags);
				2095
Qu Wenruo	97f4728	2020-05-05 07:58:22 +0800	[diff] [blame]	2096	ret = insert_block_group_item(trans, block_group);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2097	if (ret)
				2098	btrfs_abort_transaction(trans, ret);
Qu Wenruo	97f4728	2020-05-05 07:58:22 +0800	[diff] [blame]	2099	ret = btrfs_finish_chunk_alloc(trans, block_group->start,
				2100	block_group->length);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2101	if (ret)
				2102	btrfs_abort_transaction(trans, ret);
				2103	add_block_group_free_space(trans, block_group);
Josef Bacik	49ea112	2020-09-01 17:40:38 -0400	[diff] [blame]	2104
				2105	/*
				2106	* If we restriped during balance, we may have added a new raid
				2107	* type, so now add the sysfs entries when it is safe to do so.
				2108	* We don't have to worry about locking here as it's handled in
				2109	* btrfs_sysfs_add_block_group_type.
				2110	*/
				2111	if (block_group->space_info->block_group_kobjs[index] == NULL)
				2112	btrfs_sysfs_add_block_group_type(block_group);
				2113
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2114	/* Already aborted the transaction if it failed. */
				2115	next:
				2116	btrfs_delayed_refs_rsv_release(fs_info, 1);
				2117	list_del_init(&block_group->bg_list);
				2118	}
				2119	btrfs_trans_release_chunk_metadata(trans);
				2120	}
				2121
				2122	int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
				2123	u64 type, u64 chunk_offset, u64 size)
				2124	{
				2125	struct btrfs_fs_info *fs_info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2126	struct btrfs_block_group *cache;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2127	int ret;
				2128
				2129	btrfs_set_log_full_commit(trans);
				2130
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	2131	cache = btrfs_create_block_group_cache(fs_info, chunk_offset);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2132	if (!cache)
				2133	return -ENOMEM;
				2134
Qu Wenruo	9afc664	2020-05-05 07:58:20 +0800	[diff] [blame]	2135	cache->length = size;
Marcos Paulo de Souza	e3e39c7	2020-08-21 11:54:44 -0300	[diff] [blame]	2136	set_free_space_tree_thresholds(cache);
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2137	cache->used = bytes_used;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2138	cache->flags = type;
				2139	cache->last_byte_to_unpin = (u64)-1;
				2140	cache->cached = BTRFS_CACHE_FINISHED;
Boris Burkov	997e3e2	2020-11-18 15:06:18 -0800	[diff] [blame]	2141	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
				2142	cache->needs_free_space = 1;
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2143	ret = exclude_super_stripes(cache);
				2144	if (ret) {
				2145	/* We may have excluded something, so call this just in case */
				2146	btrfs_free_excluded_extents(cache);
				2147	btrfs_put_block_group(cache);
				2148	return ret;
				2149	}
				2150
				2151	add_new_free_space(cache, chunk_offset, chunk_offset + size);
				2152
				2153	btrfs_free_excluded_extents(cache);
				2154
				2155	#ifdef CONFIG_BTRFS_DEBUG
				2156	if (btrfs_should_fragment_free_space(cache)) {
				2157	u64 new_bytes_used = size - bytes_used;
				2158
				2159	bytes_used += new_bytes_used >> 1;
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	2160	fragment_free_space(cache);
Josef Bacik	4358d963	2019-06-20 15:37:57 -0400	[diff] [blame]	2161	}
				2162	#endif
				2163	/*
				2164	* Ensure the corresponding space_info object is created and
				2165	* assigned to our block group. We want our bg to be added to the rbtree
				2166	* with its ->space_info set.
				2167	*/
				2168	cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
				2169	ASSERT(cache->space_info);
				2170
				2171	ret = btrfs_add_block_group_cache(fs_info, cache);
				2172	if (ret) {
				2173	btrfs_remove_free_space_cache(cache);
				2174	btrfs_put_block_group(cache);
				2175	return ret;
				2176	}
				2177
				2178	/*
				2179	* Now that our block group has its ->space_info set and is inserted in
				2180	* the rbtree, update the space info's counters.
				2181	*/
				2182	trace_btrfs_add_block_group(fs_info, cache, 1);
				2183	btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
				2184	cache->bytes_super, &cache->space_info);
				2185	btrfs_update_global_block_rsv(fs_info);
				2186
				2187	link_block_group(cache);
				2188
				2189	list_add_tail(&cache->bg_list, &trans->new_bgs);
				2190	trans->delayed_ref_updates++;
				2191	btrfs_update_delayed_refs_rsv(trans);
				2192
				2193	set_avail_alloc_bits(fs_info, type);
				2194	return 0;
				2195	}
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2196
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2197	/*
				2198	* Mark one block group RO, can be called several times for the same block
				2199	* group.
				2200	*
				2201	* @cache: the destination block group
				2202	* @do_chunk_alloc: whether need to do chunk pre-allocation, this is to
				2203	* ensure we still have some free space after marking this
				2204	* block group RO.
				2205	*/
				2206	int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
				2207	bool do_chunk_alloc)
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2208	{
				2209	struct btrfs_fs_info *fs_info = cache->fs_info;
				2210	struct btrfs_trans_handle *trans;
				2211	u64 alloc_flags;
				2212	int ret;
				2213
				2214	again:
				2215	trans = btrfs_join_transaction(fs_info->extent_root);
				2216	if (IS_ERR(trans))
				2217	return PTR_ERR(trans);
				2218
				2219	/*
				2220	* we're not allowed to set block groups readonly after the dirty
				2221	* block groups cache has started writing. If it already started,
				2222	* back off and let this transaction commit
				2223	*/
				2224	mutex_lock(&fs_info->ro_block_group_mutex);
				2225	if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
				2226	u64 transid = trans->transid;
				2227
				2228	mutex_unlock(&fs_info->ro_block_group_mutex);
				2229	btrfs_end_transaction(trans);
				2230
				2231	ret = btrfs_wait_for_commit(fs_info, transid);
				2232	if (ret)
				2233	return ret;
				2234	goto again;
				2235	}
				2236
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2237	if (do_chunk_alloc) {
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2238	/*
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2239	* If we are changing raid levels, try to allocate a
				2240	* corresponding block group with the new raid level.
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2241	*/
Josef Bacik	349e120	2020-07-21 10:48:45 -0400	[diff] [blame]	2242	alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2243	if (alloc_flags != cache->flags) {
				2244	ret = btrfs_chunk_alloc(trans, alloc_flags,
				2245	CHUNK_ALLOC_FORCE);
				2246	/*
				2247	* ENOSPC is allowed here, we may have enough space
				2248	* already allocated at the new raid level to carry on
				2249	*/
				2250	if (ret == -ENOSPC)
				2251	ret = 0;
				2252	if (ret < 0)
				2253	goto out;
				2254	}
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2255	}
				2256
Josef Bacik	a7a63acc	2020-01-17 09:07:38 -0500	[diff] [blame]	2257	ret = inc_block_group_ro(cache, 0);
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2258	if (!do_chunk_alloc)
				2259	goto unlock_out;
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2260	if (!ret)
				2261	goto out;
				2262	alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
				2263	ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
				2264	if (ret < 0)
				2265	goto out;
Josef Bacik	e11c040	2019-06-20 15:38:07 -0400	[diff] [blame]	2266	ret = inc_block_group_ro(cache, 0);
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2267	out:
				2268	if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
Josef Bacik	349e120	2020-07-21 10:48:45 -0400	[diff] [blame]	2269	alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2270	mutex_lock(&fs_info->chunk_mutex);
				2271	check_system_chunk(trans, alloc_flags);
				2272	mutex_unlock(&fs_info->chunk_mutex);
				2273	}
Qu Wenruo	b12de52	2019-11-15 10:09:00 +0800	[diff] [blame]	2274	unlock_out:
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2275	mutex_unlock(&fs_info->ro_block_group_mutex);
				2276
				2277	btrfs_end_transaction(trans);
				2278	return ret;
				2279	}
				2280
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2281	void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2282	{
				2283	struct btrfs_space_info *sinfo = cache->space_info;
				2284	u64 num_bytes;
				2285
				2286	BUG_ON(!cache->ro);
				2287
				2288	spin_lock(&sinfo->lock);
				2289	spin_lock(&cache->lock);
				2290	if (!--cache->ro) {
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2291	num_bytes = cache->length - cache->reserved -
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2292	cache->pinned - cache->bytes_super - cache->used;
Josef Bacik	26ce209	2019-06-20 15:37:59 -0400	[diff] [blame]	2293	sinfo->bytes_readonly -= num_bytes;
				2294	list_del_init(&cache->ro_list);
				2295	}
				2296	spin_unlock(&cache->lock);
				2297	spin_unlock(&sinfo->lock);
				2298	}
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2299
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2300	static int update_block_group_item(struct btrfs_trans_handle *trans,
				2301	struct btrfs_path *path,
				2302	struct btrfs_block_group *cache)
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2303	{
				2304	struct btrfs_fs_info *fs_info = trans->fs_info;
				2305	int ret;
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2306	struct btrfs_root *root = fs_info->extent_root;
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2307	unsigned long bi;
				2308	struct extent_buffer *leaf;
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2309	struct btrfs_block_group_item bgi;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2310	struct btrfs_key key;
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2311
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2312	key.objectid = cache->start;
				2313	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
				2314	key.offset = cache->length;
				2315
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2316	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2317	if (ret) {
				2318	if (ret > 0)
				2319	ret = -ENOENT;
				2320	goto fail;
				2321	}
				2322
				2323	leaf = path->nodes[0];
				2324	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
David Sterba	de0dc45	2019-10-23 18:48:18 +0200	[diff] [blame]	2325	btrfs_set_stack_block_group_used(&bgi, cache->used);
				2326	btrfs_set_stack_block_group_chunk_objectid(&bgi,
David Sterba	3d97638	2019-10-23 18:48:15 +0200	[diff] [blame]	2327	BTRFS_FIRST_CHUNK_TREE_OBJECTID);
David Sterba	de0dc45	2019-10-23 18:48:18 +0200	[diff] [blame]	2328	btrfs_set_stack_block_group_flags(&bgi, cache->flags);
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2329	write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2330	btrfs_mark_buffer_dirty(leaf);
				2331	fail:
				2332	btrfs_release_path(path);
				2333	return ret;
				2334
				2335	}
				2336
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2337	static int cache_save_setup(struct btrfs_block_group *block_group,
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2338	struct btrfs_trans_handle *trans,
				2339	struct btrfs_path *path)
				2340	{
				2341	struct btrfs_fs_info *fs_info = block_group->fs_info;
				2342	struct btrfs_root *root = fs_info->tree_root;
				2343	struct inode *inode = NULL;
				2344	struct extent_changeset *data_reserved = NULL;
				2345	u64 alloc_hint = 0;
				2346	int dcs = BTRFS_DC_ERROR;
				2347	u64 num_pages = 0;
				2348	int retries = 0;
				2349	int ret = 0;
				2350
Boris Burkov	af456a2	2020-11-18 15:06:26 -0800	[diff] [blame]	2351	if (!btrfs_test_opt(fs_info, SPACE_CACHE))
				2352	return 0;
				2353
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2354	/*
				2355	* If this block group is smaller than 100 megs don't bother caching the
				2356	* block group.
				2357	*/
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2358	if (block_group->length < (100 * SZ_1M)) {
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2359	spin_lock(&block_group->lock);
				2360	block_group->disk_cache_state = BTRFS_DC_WRITTEN;
				2361	spin_unlock(&block_group->lock);
				2362	return 0;
				2363	}
				2364
David Sterba	bf31f87	2020-02-05 17:34:34 +0100	[diff] [blame]	2365	if (TRANS_ABORTED(trans))
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2366	return 0;
				2367	again:
				2368	inode = lookup_free_space_inode(block_group, path);
				2369	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
				2370	ret = PTR_ERR(inode);
				2371	btrfs_release_path(path);
				2372	goto out;
				2373	}
				2374
				2375	if (IS_ERR(inode)) {
				2376	BUG_ON(retries);
				2377	retries++;
				2378
				2379	if (block_group->ro)
				2380	goto out_free;
				2381
				2382	ret = create_free_space_inode(trans, block_group, path);
				2383	if (ret)
				2384	goto out_free;
				2385	goto again;
				2386	}
				2387
				2388	/*
				2389	* We want to set the generation to 0, that way if anything goes wrong
				2390	* from here on out we know not to trust this cache when we load up next
				2391	* time.
				2392	*/
				2393	BTRFS_I(inode)->generation = 0;
Nikolay Borisov	9a56fcd	2020-11-02 16:48:59 +0200	[diff] [blame]	2394	ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2395	if (ret) {
				2396	/*
				2397	* So theoretically we could recover from this, simply set the
				2398	* super cache generation to 0 so we know to invalidate the
				2399	* cache, but then we'd have to keep track of the block groups
				2400	* that fail this way so we know we _have_ to reset this cache
				2401	* before the next commit or risk reading stale cache. So to
				2402	* limit our exposure to horrible edge cases lets just abort the
				2403	* transaction, this only happens in really bad situations
				2404	* anyway.
				2405	*/
				2406	btrfs_abort_transaction(trans, ret);
				2407	goto out_put;
				2408	}
				2409	WARN_ON(ret);
				2410
				2411	/* We've already setup this transaction, go ahead and exit */
				2412	if (block_group->cache_generation == trans->transid &&
				2413	i_size_read(inode)) {
				2414	dcs = BTRFS_DC_SETUP;
				2415	goto out_put;
				2416	}
				2417
				2418	if (i_size_read(inode) > 0) {
				2419	ret = btrfs_check_trunc_cache_free_space(fs_info,
				2420	&fs_info->global_block_rsv);
				2421	if (ret)
				2422	goto out_put;
				2423
				2424	ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
				2425	if (ret)
				2426	goto out_put;
				2427	}
				2428
				2429	spin_lock(&block_group->lock);
				2430	if (block_group->cached != BTRFS_CACHE_FINISHED \|\|
				2431	!btrfs_test_opt(fs_info, SPACE_CACHE)) {
				2432	/*
				2433	* don't bother trying to write stuff out _if_
				2434	* a) we're not cached,
				2435	* b) we're with nospace_cache mount option,
				2436	* c) we're with v2 space_cache (FREE_SPACE_TREE).
				2437	*/
				2438	dcs = BTRFS_DC_WRITTEN;
				2439	spin_unlock(&block_group->lock);
				2440	goto out_put;
				2441	}
				2442	spin_unlock(&block_group->lock);
				2443
				2444	/*
				2445	* We hit an ENOSPC when setting up the cache in this transaction, just
				2446	* skip doing the setup, we've already cleared the cache so we're safe.
				2447	*/
				2448	if (test_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags)) {
				2449	ret = -ENOSPC;
				2450	goto out_put;
				2451	}
				2452
				2453	/*
				2454	* Try to preallocate enough space based on how big the block group is.
				2455	* Keep in mind this has to include any pinned space which could end up
				2456	* taking up quite a bit since it's not folded into the other space
				2457	* cache.
				2458	*/
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2459	num_pages = div_u64(block_group->length, SZ_256M);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2460	if (!num_pages)
				2461	num_pages = 1;
				2462
				2463	num_pages *= 16;
				2464	num_pages *= PAGE_SIZE;
				2465
Nikolay Borisov	36ea6f3	2020-06-03 08:55:41 +0300	[diff] [blame]	2466	ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
				2467	num_pages);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2468	if (ret)
				2469	goto out_put;
				2470
				2471	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
				2472	num_pages, num_pages,
				2473	&alloc_hint);
				2474	/*
				2475	* Our cache requires contiguous chunks so that we don't modify a bunch
				2476	* of metadata or split extents when writing the cache out, which means
				2477	* we can enospc if we are heavily fragmented in addition to just normal
				2478	* out of space conditions. So if we hit this just skip setting up any
				2479	* other block groups for this transaction, maybe we'll unpin enough
				2480	* space the next time around.
				2481	*/
				2482	if (!ret)
				2483	dcs = BTRFS_DC_SETUP;
				2484	else if (ret == -ENOSPC)
				2485	set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
				2486
				2487	out_put:
				2488	iput(inode);
				2489	out_free:
				2490	btrfs_release_path(path);
				2491	out:
				2492	spin_lock(&block_group->lock);
				2493	if (!ret && dcs == BTRFS_DC_SETUP)
				2494	block_group->cache_generation = trans->transid;
				2495	block_group->disk_cache_state = dcs;
				2496	spin_unlock(&block_group->lock);
				2497
				2498	extent_changeset_free(data_reserved);
				2499	return ret;
				2500	}
				2501
				2502	int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
				2503	{
				2504	struct btrfs_fs_info *fs_info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2505	struct btrfs_block_group cache, tmp;
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2506	struct btrfs_transaction *cur_trans = trans->transaction;
				2507	struct btrfs_path *path;
				2508
				2509	if (list_empty(&cur_trans->dirty_bgs) \|\|
				2510	!btrfs_test_opt(fs_info, SPACE_CACHE))
				2511	return 0;
				2512
				2513	path = btrfs_alloc_path();
				2514	if (!path)
				2515	return -ENOMEM;
				2516
				2517	/* Could add new block groups, use _safe just in case */
				2518	list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
				2519	dirty_list) {
				2520	if (cache->disk_cache_state == BTRFS_DC_CLEAR)
				2521	cache_save_setup(cache, trans, path);
				2522	}
				2523
				2524	btrfs_free_path(path);
				2525	return 0;
				2526	}
				2527
				2528	/*
				2529	* Transaction commit does final block group cache writeback during a critical
				2530	* section where nothing is allowed to change the FS. This is required in
				2531	* order for the cache to actually match the block group, but can introduce a
				2532	* lot of latency into the commit.
				2533	*
				2534	* So, btrfs_start_dirty_block_groups is here to kick off block group cache IO.
				2535	* There's a chance we'll have to redo some of it if the block group changes
				2536	* again during the commit, but it greatly reduces the commit latency by
				2537	* getting rid of the easy block groups while we're still allowing others to
				2538	* join the commit.
				2539	*/
				2540	int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
				2541	{
				2542	struct btrfs_fs_info *fs_info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2543	struct btrfs_block_group *cache;
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2544	struct btrfs_transaction *cur_trans = trans->transaction;
				2545	int ret = 0;
				2546	int should_put;
				2547	struct btrfs_path *path = NULL;
				2548	LIST_HEAD(dirty);
				2549	struct list_head *io = &cur_trans->io_bgs;
				2550	int num_started = 0;
				2551	int loops = 0;
				2552
				2553	spin_lock(&cur_trans->dirty_bgs_lock);
				2554	if (list_empty(&cur_trans->dirty_bgs)) {
				2555	spin_unlock(&cur_trans->dirty_bgs_lock);
				2556	return 0;
				2557	}
				2558	list_splice_init(&cur_trans->dirty_bgs, &dirty);
				2559	spin_unlock(&cur_trans->dirty_bgs_lock);
				2560
				2561	again:
				2562	/* Make sure all the block groups on our dirty list actually exist */
				2563	btrfs_create_pending_block_groups(trans);
				2564
				2565	if (!path) {
				2566	path = btrfs_alloc_path();
				2567	if (!path)
				2568	return -ENOMEM;
				2569	}
				2570
				2571	/*
				2572	* cache_write_mutex is here only to save us from balance or automatic
				2573	* removal of empty block groups deleting this block group while we are
				2574	* writing out the cache
				2575	*/
				2576	mutex_lock(&trans->transaction->cache_write_mutex);
				2577	while (!list_empty(&dirty)) {
				2578	bool drop_reserve = true;
				2579
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2580	cache = list_first_entry(&dirty, struct btrfs_block_group,
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2581	dirty_list);
				2582	/*
				2583	* This can happen if something re-dirties a block group that
				2584	* is already under IO. Just wait for it to finish and then do
				2585	* it all again
				2586	*/
				2587	if (!list_empty(&cache->io_list)) {
				2588	list_del_init(&cache->io_list);
				2589	btrfs_wait_cache_io(trans, cache, path);
				2590	btrfs_put_block_group(cache);
				2591	}
				2592
				2593
				2594	/*
				2595	* btrfs_wait_cache_io uses the cache->dirty_list to decide if
				2596	* it should update the cache_state. Don't delete until after
				2597	* we wait.
				2598	*
				2599	* Since we're not running in the commit critical section
				2600	* we need the dirty_bgs_lock to protect from update_block_group
				2601	*/
				2602	spin_lock(&cur_trans->dirty_bgs_lock);
				2603	list_del_init(&cache->dirty_list);
				2604	spin_unlock(&cur_trans->dirty_bgs_lock);
				2605
				2606	should_put = 1;
				2607
				2608	cache_save_setup(cache, trans, path);
				2609
				2610	if (cache->disk_cache_state == BTRFS_DC_SETUP) {
				2611	cache->io_ctl.inode = NULL;
				2612	ret = btrfs_write_out_cache(trans, cache, path);
				2613	if (ret == 0 && cache->io_ctl.inode) {
				2614	num_started++;
				2615	should_put = 0;
				2616
				2617	/*
				2618	* The cache_write_mutex is protecting the
				2619	* io_list, also refer to the definition of
				2620	* btrfs_transaction::io_bgs for more details
				2621	*/
				2622	list_add_tail(&cache->io_list, io);
				2623	} else {
				2624	/*
				2625	* If we failed to write the cache, the
				2626	* generation will be bad and life goes on
				2627	*/
				2628	ret = 0;
				2629	}
				2630	}
				2631	if (!ret) {
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2632	ret = update_block_group_item(trans, path, cache);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2633	/*
				2634	* Our block group might still be attached to the list
				2635	* of new block groups in the transaction handle of some
				2636	* other task (struct btrfs_trans_handle->new_bgs). This
				2637	* means its block group item isn't yet in the extent
				2638	* tree. If this happens ignore the error, as we will
				2639	* try again later in the critical section of the
				2640	* transaction commit.
				2641	*/
				2642	if (ret == -ENOENT) {
				2643	ret = 0;
				2644	spin_lock(&cur_trans->dirty_bgs_lock);
				2645	if (list_empty(&cache->dirty_list)) {
				2646	list_add_tail(&cache->dirty_list,
				2647	&cur_trans->dirty_bgs);
				2648	btrfs_get_block_group(cache);
				2649	drop_reserve = false;
				2650	}
				2651	spin_unlock(&cur_trans->dirty_bgs_lock);
				2652	} else if (ret) {
				2653	btrfs_abort_transaction(trans, ret);
				2654	}
				2655	}
				2656
				2657	/* If it's not on the io list, we need to put the block group */
				2658	if (should_put)
				2659	btrfs_put_block_group(cache);
				2660	if (drop_reserve)
				2661	btrfs_delayed_refs_rsv_release(fs_info, 1);
				2662
				2663	if (ret)
				2664	break;
				2665
				2666	/*
				2667	* Avoid blocking other tasks for too long. It might even save
				2668	* us from writing caches for block groups that are going to be
				2669	* removed.
				2670	*/
				2671	mutex_unlock(&trans->transaction->cache_write_mutex);
				2672	mutex_lock(&trans->transaction->cache_write_mutex);
				2673	}
				2674	mutex_unlock(&trans->transaction->cache_write_mutex);
				2675
				2676	/*
				2677	* Go through delayed refs for all the stuff we've just kicked off
				2678	* and then loop back (just once)
				2679	*/
Josef Bacik	34d1eb0	2020-12-16 11:22:17 -0500	[diff] [blame]	2680	if (!ret)
				2681	ret = btrfs_run_delayed_refs(trans, 0);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2682	if (!ret && loops == 0) {
				2683	loops++;
				2684	spin_lock(&cur_trans->dirty_bgs_lock);
				2685	list_splice_init(&cur_trans->dirty_bgs, &dirty);
				2686	/*
				2687	* dirty_bgs_lock protects us from concurrent block group
				2688	* deletes too (not just cache_write_mutex).
				2689	*/
				2690	if (!list_empty(&dirty)) {
				2691	spin_unlock(&cur_trans->dirty_bgs_lock);
				2692	goto again;
				2693	}
				2694	spin_unlock(&cur_trans->dirty_bgs_lock);
				2695	} else if (ret < 0) {
				2696	btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
				2697	}
				2698
				2699	btrfs_free_path(path);
				2700	return ret;
				2701	}
				2702
				2703	int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
				2704	{
				2705	struct btrfs_fs_info *fs_info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2706	struct btrfs_block_group *cache;
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2707	struct btrfs_transaction *cur_trans = trans->transaction;
				2708	int ret = 0;
				2709	int should_put;
				2710	struct btrfs_path *path;
				2711	struct list_head *io = &cur_trans->io_bgs;
				2712	int num_started = 0;
				2713
				2714	path = btrfs_alloc_path();
				2715	if (!path)
				2716	return -ENOMEM;
				2717
				2718	/*
				2719	* Even though we are in the critical section of the transaction commit,
				2720	* we can still have concurrent tasks adding elements to this
				2721	* transaction's list of dirty block groups. These tasks correspond to
				2722	* endio free space workers started when writeback finishes for a
				2723	* space cache, which run inode.c:btrfs_finish_ordered_io(), and can
				2724	* allocate new block groups as a result of COWing nodes of the root
				2725	* tree when updating the free space inode. The writeback for the space
				2726	* caches is triggered by an earlier call to
				2727	* btrfs_start_dirty_block_groups() and iterations of the following
				2728	* loop.
				2729	* Also we want to do the cache_save_setup first and then run the
				2730	* delayed refs to make sure we have the best chance at doing this all
				2731	* in one shot.
				2732	*/
				2733	spin_lock(&cur_trans->dirty_bgs_lock);
				2734	while (!list_empty(&cur_trans->dirty_bgs)) {
				2735	cache = list_first_entry(&cur_trans->dirty_bgs,
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2736	struct btrfs_block_group,
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2737	dirty_list);
				2738
				2739	/*
				2740	* This can happen if cache_save_setup re-dirties a block group
				2741	* that is already under IO. Just wait for it to finish and
				2742	* then do it all again
				2743	*/
				2744	if (!list_empty(&cache->io_list)) {
				2745	spin_unlock(&cur_trans->dirty_bgs_lock);
				2746	list_del_init(&cache->io_list);
				2747	btrfs_wait_cache_io(trans, cache, path);
				2748	btrfs_put_block_group(cache);
				2749	spin_lock(&cur_trans->dirty_bgs_lock);
				2750	}
				2751
				2752	/*
				2753	* Don't remove from the dirty list until after we've waited on
				2754	* any pending IO
				2755	*/
				2756	list_del_init(&cache->dirty_list);
				2757	spin_unlock(&cur_trans->dirty_bgs_lock);
				2758	should_put = 1;
				2759
				2760	cache_save_setup(cache, trans, path);
				2761
				2762	if (!ret)
				2763	ret = btrfs_run_delayed_refs(trans,
				2764	(unsigned long) -1);
				2765
				2766	if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
				2767	cache->io_ctl.inode = NULL;
				2768	ret = btrfs_write_out_cache(trans, cache, path);
				2769	if (ret == 0 && cache->io_ctl.inode) {
				2770	num_started++;
				2771	should_put = 0;
				2772	list_add_tail(&cache->io_list, io);
				2773	} else {
				2774	/*
				2775	* If we failed to write the cache, the
				2776	* generation will be bad and life goes on
				2777	*/
				2778	ret = 0;
				2779	}
				2780	}
				2781	if (!ret) {
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2782	ret = update_block_group_item(trans, path, cache);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2783	/*
				2784	* One of the free space endio workers might have
				2785	* created a new block group while updating a free space
				2786	* cache's inode (at inode.c:btrfs_finish_ordered_io())
				2787	* and hasn't released its transaction handle yet, in
				2788	* which case the new block group is still attached to
				2789	* its transaction handle and its creation has not
				2790	* finished yet (no block group item in the extent tree
				2791	* yet, etc). If this is the case, wait for all free
				2792	* space endio workers to finish and retry. This is a
Randy Dunlap	260db43	2020-08-04 19:48:34 -0700	[diff] [blame]	2793	* very rare case so no need for a more efficient and
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2794	* complex approach.
				2795	*/
				2796	if (ret == -ENOENT) {
				2797	wait_event(cur_trans->writer_wait,
				2798	atomic_read(&cur_trans->num_writers) == 1);
Qu Wenruo	3be4d8e	2020-05-05 07:58:23 +0800	[diff] [blame]	2799	ret = update_block_group_item(trans, path, cache);
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2800	}
				2801	if (ret)
				2802	btrfs_abort_transaction(trans, ret);
				2803	}
				2804
				2805	/* If its not on the io list, we need to put the block group */
				2806	if (should_put)
				2807	btrfs_put_block_group(cache);
				2808	btrfs_delayed_refs_rsv_release(fs_info, 1);
				2809	spin_lock(&cur_trans->dirty_bgs_lock);
				2810	}
				2811	spin_unlock(&cur_trans->dirty_bgs_lock);
				2812
				2813	/*
				2814	* Refer to the definition of io_bgs member for details why it's safe
				2815	* to use it without any locking
				2816	*/
				2817	while (!list_empty(io)) {
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2818	cache = list_first_entry(io, struct btrfs_block_group,
Josef Bacik	77745c0	2019-06-20 15:38:00 -0400	[diff] [blame]	2819	io_list);
				2820	list_del_init(&cache->io_list);
				2821	btrfs_wait_cache_io(trans, cache, path);
				2822	btrfs_put_block_group(cache);
				2823	}
				2824
				2825	btrfs_free_path(path);
				2826	return ret;
				2827	}
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2828
				2829	int btrfs_update_block_group(struct btrfs_trans_handle *trans,
				2830	u64 bytenr, u64 num_bytes, int alloc)
				2831	{
				2832	struct btrfs_fs_info *info = trans->fs_info;
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2833	struct btrfs_block_group *cache = NULL;
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2834	u64 total = num_bytes;
				2835	u64 old_val;
				2836	u64 byte_in_group;
				2837	int factor;
				2838	int ret = 0;
				2839
				2840	/* Block accounting for super block */
				2841	spin_lock(&info->delalloc_root_lock);
				2842	old_val = btrfs_super_bytes_used(info->super_copy);
				2843	if (alloc)
				2844	old_val += num_bytes;
				2845	else
				2846	old_val -= num_bytes;
				2847	btrfs_set_super_bytes_used(info->super_copy, old_val);
				2848	spin_unlock(&info->delalloc_root_lock);
				2849
				2850	while (total) {
				2851	cache = btrfs_lookup_block_group(info, bytenr);
				2852	if (!cache) {
				2853	ret = -ENOENT;
				2854	break;
				2855	}
				2856	factor = btrfs_bg_type_to_factor(cache->flags);
				2857
				2858	/*
				2859	* If this block group has free space cache written out, we
				2860	* need to make sure to load it if we are removing space. This
				2861	* is because we need the unpinning stage to actually add the
				2862	* space back to the block group, otherwise we will leak space.
				2863	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2864	if (!alloc && !btrfs_block_group_done(cache))
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2865	btrfs_cache_block_group(cache, 1);
				2866
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2867	byte_in_group = bytenr - cache->start;
				2868	WARN_ON(byte_in_group > cache->length);
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2869
				2870	spin_lock(&cache->space_info->lock);
				2871	spin_lock(&cache->lock);
				2872
				2873	if (btrfs_test_opt(info, SPACE_CACHE) &&
				2874	cache->disk_cache_state < BTRFS_DC_CLEAR)
				2875	cache->disk_cache_state = BTRFS_DC_CLEAR;
				2876
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2877	old_val = cache->used;
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	2878	num_bytes = min(total, cache->length - byte_in_group);
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2879	if (alloc) {
				2880	old_val += num_bytes;
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2881	cache->used = old_val;
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2882	cache->reserved -= num_bytes;
				2883	cache->space_info->bytes_reserved -= num_bytes;
				2884	cache->space_info->bytes_used += num_bytes;
				2885	cache->space_info->disk_used += num_bytes * factor;
				2886	spin_unlock(&cache->lock);
				2887	spin_unlock(&cache->space_info->lock);
				2888	} else {
				2889	old_val -= num_bytes;
David Sterba	bf38be6	2019-10-23 18:48:11 +0200	[diff] [blame]	2890	cache->used = old_val;
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2891	cache->pinned += num_bytes;
				2892	btrfs_space_info_update_bytes_pinned(info,
				2893	cache->space_info, num_bytes);
				2894	cache->space_info->bytes_used -= num_bytes;
				2895	cache->space_info->disk_used -= num_bytes * factor;
				2896	spin_unlock(&cache->lock);
				2897	spin_unlock(&cache->space_info->lock);
				2898
Josef Bacik	2187374	2021-01-15 16:48:55 -0500	[diff] [blame^]	2899	__btrfs_mod_total_bytes_pinned(cache->space_info,
				2900	num_bytes);
Nikolay Borisov	fe119a6	2020-01-20 16:09:18 +0200	[diff] [blame]	2901	set_extent_dirty(&trans->transaction->pinned_extents,
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2902	bytenr, bytenr + num_bytes - 1,
				2903	GFP_NOFS \| __GFP_NOFAIL);
				2904	}
				2905
				2906	spin_lock(&trans->transaction->dirty_bgs_lock);
				2907	if (list_empty(&cache->dirty_list)) {
				2908	list_add_tail(&cache->dirty_list,
				2909	&trans->transaction->dirty_bgs);
				2910	trans->delayed_ref_updates++;
				2911	btrfs_get_block_group(cache);
				2912	}
				2913	spin_unlock(&trans->transaction->dirty_bgs_lock);
				2914
				2915	/*
				2916	* No longer have used bytes in this block group, queue it for
				2917	* deletion. We do this after adding the block group to the
				2918	* dirty list to avoid races between cleaner kthread and space
				2919	* cache writeout.
				2920	*/
Dennis Zhou	6e80d4f	2019-12-13 16:22:15 -0800	[diff] [blame]	2921	if (!alloc && old_val == 0) {
				2922	if (!btrfs_test_opt(info, DISCARD_ASYNC))
				2923	btrfs_mark_bg_unused(cache);
				2924	}
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2925
				2926	btrfs_put_block_group(cache);
				2927	total -= num_bytes;
				2928	bytenr += num_bytes;
				2929	}
				2930
				2931	/* Modified block groups are accounted for in the delayed_refs_rsv. */
				2932	btrfs_update_delayed_refs_rsv(trans);
				2933	return ret;
				2934	}
				2935
				2936	/**
				2937	* btrfs_add_reserved_bytes - update the block_group and space info counters
				2938	* @cache: The cache we are manipulating
				2939	* @ram_bytes: The number of bytes of file content, and will be same to
				2940	* @num_bytes except for the compress path.
				2941	* @num_bytes: The number of bytes in question
				2942	* @delalloc: The blocks are allocated for the delalloc write
				2943	*
				2944	* This is called by the allocator when it reserves space. If this is a
				2945	* reservation and the block group has become read only we cannot make the
				2946	* reservation and return -EAGAIN, otherwise this function always succeeds.
				2947	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2948	int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2949	u64 ram_bytes, u64 num_bytes, int delalloc)
				2950	{
				2951	struct btrfs_space_info *space_info = cache->space_info;
				2952	int ret = 0;
				2953
				2954	spin_lock(&space_info->lock);
				2955	spin_lock(&cache->lock);
				2956	if (cache->ro) {
				2957	ret = -EAGAIN;
				2958	} else {
				2959	cache->reserved += num_bytes;
				2960	space_info->bytes_reserved += num_bytes;
Josef Bacik	a43c383	2019-08-22 15:10:56 -0400	[diff] [blame]	2961	trace_btrfs_space_reservation(cache->fs_info, "space_info",
				2962	space_info->flags, num_bytes, 1);
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2963	btrfs_space_info_update_bytes_may_use(cache->fs_info,
				2964	space_info, -ram_bytes);
				2965	if (delalloc)
				2966	cache->delalloc_bytes += num_bytes;
Josef Bacik	99ffb43	2020-07-21 10:22:19 -0400	[diff] [blame]	2967
				2968	/*
				2969	* Compression can use less space than we reserved, so wake
				2970	* tickets if that happens
				2971	*/
				2972	if (num_bytes < ram_bytes)
				2973	btrfs_try_granting_tickets(cache->fs_info, space_info);
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2974	}
				2975	spin_unlock(&cache->lock);
				2976	spin_unlock(&space_info->lock);
				2977	return ret;
				2978	}
				2979
				2980	/**
				2981	* btrfs_free_reserved_bytes - update the block_group and space info counters
				2982	* @cache: The cache we are manipulating
				2983	* @num_bytes: The number of bytes in question
				2984	* @delalloc: The blocks are allocated for the delalloc write
				2985	*
				2986	* This is called by somebody who is freeing space that was never actually used
				2987	* on disk. For example if you reserve some space for a new leaf in transaction
				2988	* A and before transaction A commits you free that leaf, you call this with
				2989	* reserve set to 0 in order to clear the reservation.
				2990	*/
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	2991	void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	2992	u64 num_bytes, int delalloc)
				2993	{
				2994	struct btrfs_space_info *space_info = cache->space_info;
				2995
				2996	spin_lock(&space_info->lock);
				2997	spin_lock(&cache->lock);
				2998	if (cache->ro)
				2999	space_info->bytes_readonly += num_bytes;
				3000	cache->reserved -= num_bytes;
				3001	space_info->bytes_reserved -= num_bytes;
				3002	space_info->max_extent_size = 0;
				3003
				3004	if (delalloc)
				3005	cache->delalloc_bytes -= num_bytes;
				3006	spin_unlock(&cache->lock);
Josef Bacik	3308234	2020-07-21 10:22:17 -0400	[diff] [blame]	3007
				3008	btrfs_try_granting_tickets(cache->fs_info, space_info);
Josef Bacik	606d1bf	2019-06-20 15:38:02 -0400	[diff] [blame]	3009	spin_unlock(&space_info->lock);
				3010	}
Josef Bacik	07730d8	2019-06-20 15:38:04 -0400	[diff] [blame]	3011
				3012	static void force_metadata_allocation(struct btrfs_fs_info *info)
				3013	{
				3014	struct list_head *head = &info->space_info;
				3015	struct btrfs_space_info *found;
				3016
Josef Bacik	7280490	2020-09-01 17:40:37 -0400	[diff] [blame]	3017	list_for_each_entry(found, head, list) {
Josef Bacik	07730d8	2019-06-20 15:38:04 -0400	[diff] [blame]	3018	if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
				3019	found->force_alloc = CHUNK_ALLOC_FORCE;
				3020	}
Josef Bacik	07730d8	2019-06-20 15:38:04 -0400	[diff] [blame]	3021	}
				3022
				3023	static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
				3024	struct btrfs_space_info *sinfo, int force)
				3025	{
				3026	u64 bytes_used = btrfs_space_info_used(sinfo, false);
				3027	u64 thresh;
				3028
				3029	if (force == CHUNK_ALLOC_FORCE)
				3030	return 1;
				3031
				3032	/*
				3033	* in limited mode, we want to have some free space up to
				3034	* about 1% of the FS size.
				3035	*/
				3036	if (force == CHUNK_ALLOC_LIMITED) {
				3037	thresh = btrfs_super_total_bytes(fs_info->super_copy);
				3038	thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
				3039
				3040	if (sinfo->total_bytes - bytes_used < thresh)
				3041	return 1;
				3042	}
				3043
				3044	if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
				3045	return 0;
				3046	return 1;
				3047	}
				3048
				3049	int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
				3050	{
				3051	u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
				3052
				3053	return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
				3054	}
				3055
				3056	/*
				3057	* If force is CHUNK_ALLOC_FORCE:
				3058	* - return 1 if it successfully allocates a chunk,
				3059	* - return errors including -ENOSPC otherwise.
				3060	* If force is NOT CHUNK_ALLOC_FORCE:
				3061	* - return 0 if it doesn't need to allocate a new chunk,
				3062	* - return 1 if it successfully allocates a chunk,
				3063	* - return errors including -ENOSPC otherwise.
				3064	*/
				3065	int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
				3066	enum btrfs_chunk_alloc_enum force)
				3067	{
				3068	struct btrfs_fs_info *fs_info = trans->fs_info;
				3069	struct btrfs_space_info *space_info;
				3070	bool wait_for_alloc = false;
				3071	bool should_alloc = false;
				3072	int ret = 0;
				3073
				3074	/* Don't re-enter if we're already allocating a chunk */
				3075	if (trans->allocating_chunk)
				3076	return -ENOSPC;
				3077
				3078	space_info = btrfs_find_space_info(fs_info, flags);
				3079	ASSERT(space_info);
				3080
				3081	do {
				3082	spin_lock(&space_info->lock);
				3083	if (force < space_info->force_alloc)
				3084	force = space_info->force_alloc;
				3085	should_alloc = should_alloc_chunk(fs_info, space_info, force);
				3086	if (space_info->full) {
				3087	/* No more free physical space */
				3088	if (should_alloc)
				3089	ret = -ENOSPC;
				3090	else
				3091	ret = 0;
				3092	spin_unlock(&space_info->lock);
				3093	return ret;
				3094	} else if (!should_alloc) {
				3095	spin_unlock(&space_info->lock);
				3096	return 0;
				3097	} else if (space_info->chunk_alloc) {
				3098	/*
				3099	* Someone is already allocating, so we need to block
				3100	* until this someone is finished and then loop to
				3101	* recheck if we should continue with our allocation
				3102	* attempt.
				3103	*/
				3104	wait_for_alloc = true;
				3105	spin_unlock(&space_info->lock);
				3106	mutex_lock(&fs_info->chunk_mutex);
				3107	mutex_unlock(&fs_info->chunk_mutex);
				3108	} else {
				3109	/* Proceed with allocation */
				3110	space_info->chunk_alloc = 1;
				3111	wait_for_alloc = false;
				3112	spin_unlock(&space_info->lock);
				3113	}
				3114
				3115	cond_resched();
				3116	} while (wait_for_alloc);
				3117
				3118	mutex_lock(&fs_info->chunk_mutex);
				3119	trans->allocating_chunk = true;
				3120
				3121	/*
				3122	* If we have mixed data/metadata chunks we want to make sure we keep
				3123	* allocating mixed chunks instead of individual chunks.
				3124	*/
				3125	if (btrfs_mixed_space_info(space_info))
				3126	flags \|= (BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA);
				3127
				3128	/*
				3129	* if we're doing a data chunk, go ahead and make sure that
				3130	* we keep a reasonable number of metadata chunks allocated in the
				3131	* FS as well.
				3132	*/
				3133	if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
				3134	fs_info->data_chunk_allocations++;
				3135	if (!(fs_info->data_chunk_allocations %
				3136	fs_info->metadata_ratio))
				3137	force_metadata_allocation(fs_info);
				3138	}
				3139
				3140	/*
				3141	* Check if we have enough space in SYSTEM chunk because we may need
				3142	* to update devices.
				3143	*/
				3144	check_system_chunk(trans, flags);
				3145
				3146	ret = btrfs_alloc_chunk(trans, flags);
				3147	trans->allocating_chunk = false;
				3148
				3149	spin_lock(&space_info->lock);
				3150	if (ret < 0) {
				3151	if (ret == -ENOSPC)
				3152	space_info->full = 1;
				3153	else
				3154	goto out;
				3155	} else {
				3156	ret = 1;
				3157	space_info->max_extent_size = 0;
				3158	}
				3159
				3160	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
				3161	out:
				3162	space_info->chunk_alloc = 0;
				3163	spin_unlock(&space_info->lock);
				3164	mutex_unlock(&fs_info->chunk_mutex);
				3165	/*
				3166	* When we allocate a new chunk we reserve space in the chunk block
				3167	* reserve to make sure we can COW nodes/leafs in the chunk tree or
				3168	* add new nodes/leafs to it if we end up needing to do it when
				3169	* inserting the chunk item and updating device items as part of the
				3170	* second phase of chunk allocation, performed by
				3171	* btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
				3172	* large number of new block groups to create in our transaction
				3173	* handle's new_bgs list to avoid exhausting the chunk block reserve
				3174	* in extreme cases - like having a single transaction create many new
				3175	* block groups when starting to write out the free space caches of all
				3176	* the block groups that were made dirty during the lifetime of the
				3177	* transaction.
				3178	*/
				3179	if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
				3180	btrfs_create_pending_block_groups(trans);
				3181
				3182	return ret;
				3183	}
				3184
				3185	static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
				3186	{
				3187	u64 num_dev;
				3188
				3189	num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
				3190	if (!num_dev)
				3191	num_dev = fs_info->fs_devices->rw_devices;
				3192
				3193	return num_dev;
				3194	}
				3195
				3196	/*
Marcos Paulo de Souza	a9143bd	2019-10-07 21:50:38 -0300	[diff] [blame]	3197	* Reserve space in the system space for allocating or removing a chunk
Josef Bacik	07730d8	2019-06-20 15:38:04 -0400	[diff] [blame]	3198	*/
				3199	void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
				3200	{
				3201	struct btrfs_fs_info *fs_info = trans->fs_info;
				3202	struct btrfs_space_info *info;
				3203	u64 left;
				3204	u64 thresh;
				3205	int ret = 0;
				3206	u64 num_devs;
				3207
				3208	/*
				3209	* Needed because we can end up allocating a system chunk and for an
				3210	* atomic and race free space reservation in the chunk block reserve.
				3211	*/
				3212	lockdep_assert_held(&fs_info->chunk_mutex);
				3213
				3214	info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
				3215	spin_lock(&info->lock);
				3216	left = info->total_bytes - btrfs_space_info_used(info, true);
				3217	spin_unlock(&info->lock);
				3218
				3219	num_devs = get_profile_num_devs(fs_info, type);
				3220
				3221	/* num_devs device items to update and 1 chunk item to add or remove */
Josef Bacik	2bd36e7	2019-08-22 15:14:33 -0400	[diff] [blame]	3222	thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
				3223	btrfs_calc_insert_metadata_size(fs_info, 1);
Josef Bacik	07730d8	2019-06-20 15:38:04 -0400	[diff] [blame]	3224
				3225	if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
				3226	btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
				3227	left, thresh, type);
				3228	btrfs_dump_space_info(fs_info, info, 0, 0);
				3229	}
				3230
				3231	if (left < thresh) {
				3232	u64 flags = btrfs_system_alloc_profile(fs_info);
				3233
				3234	/*
				3235	* Ignore failure to create system chunk. We might end up not
				3236	* needing it, as we might not need to COW all nodes/leafs from
				3237	* the paths we visit in the chunk tree (they were already COWed
				3238	* or created in the current transaction for example).
				3239	*/
				3240	ret = btrfs_alloc_chunk(trans, flags);
				3241	}
				3242
				3243	if (!ret) {
				3244	ret = btrfs_block_rsv_add(fs_info->chunk_root,
				3245	&fs_info->chunk_block_rsv,
				3246	thresh, BTRFS_RESERVE_NO_FLUSH);
				3247	if (!ret)
				3248	trans->chunk_bytes_reserved += thresh;
				3249	}
				3250	}
				3251
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3252	void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
				3253	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	3254	struct btrfs_block_group *block_group;
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3255	u64 last = 0;
				3256
				3257	while (1) {
				3258	struct inode *inode;
				3259
				3260	block_group = btrfs_lookup_first_block_group(info, last);
				3261	while (block_group) {
				3262	btrfs_wait_block_group_cache_done(block_group);
				3263	spin_lock(&block_group->lock);
				3264	if (block_group->iref)
				3265	break;
				3266	spin_unlock(&block_group->lock);
				3267	block_group = btrfs_next_block_group(block_group);
				3268	}
				3269	if (!block_group) {
				3270	if (last == 0)
				3271	break;
				3272	last = 0;
				3273	continue;
				3274	}
				3275
				3276	inode = block_group->inode;
				3277	block_group->iref = 0;
				3278	block_group->inode = NULL;
				3279	spin_unlock(&block_group->lock);
				3280	ASSERT(block_group->io_ctl.inode == NULL);
				3281	iput(inode);
David Sterba	b3470b5	2019-10-23 18:48:22 +0200	[diff] [blame]	3282	last = block_group->start + block_group->length;
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3283	btrfs_put_block_group(block_group);
				3284	}
				3285	}
				3286
				3287	/*
				3288	* Must be called only after stopping all workers, since we could have block
				3289	* group caching kthreads running, and therefore they could race with us if we
				3290	* freed the block groups before stopping them.
				3291	*/
				3292	int btrfs_free_block_groups(struct btrfs_fs_info *info)
				3293	{
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	3294	struct btrfs_block_group *block_group;
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3295	struct btrfs_space_info *space_info;
				3296	struct btrfs_caching_control *caching_ctl;
				3297	struct rb_node *n;
				3298
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	3299	spin_lock(&info->block_group_cache_lock);
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3300	while (!list_empty(&info->caching_block_groups)) {
				3301	caching_ctl = list_entry(info->caching_block_groups.next,
				3302	struct btrfs_caching_control, list);
				3303	list_del(&caching_ctl->list);
				3304	btrfs_put_caching_control(caching_ctl);
				3305	}
Josef Bacik	bbb86a3	2020-10-23 09:58:11 -0400	[diff] [blame]	3306	spin_unlock(&info->block_group_cache_lock);
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3307
				3308	spin_lock(&info->unused_bgs_lock);
				3309	while (!list_empty(&info->unused_bgs)) {
				3310	block_group = list_first_entry(&info->unused_bgs,
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	3311	struct btrfs_block_group,
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3312	bg_list);
				3313	list_del_init(&block_group->bg_list);
				3314	btrfs_put_block_group(block_group);
				3315	}
				3316	spin_unlock(&info->unused_bgs_lock);
				3317
				3318	spin_lock(&info->block_group_cache_lock);
				3319	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
David Sterba	32da5386	2019-10-29 19:20:18 +0100	[diff] [blame]	3320	block_group = rb_entry(n, struct btrfs_block_group,
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3321	cache_node);
				3322	rb_erase(&block_group->cache_node,
				3323	&info->block_group_cache_tree);
				3324	RB_CLEAR_NODE(&block_group->cache_node);
				3325	spin_unlock(&info->block_group_cache_lock);
				3326
				3327	down_write(&block_group->space_info->groups_sem);
				3328	list_del(&block_group->list);
				3329	up_write(&block_group->space_info->groups_sem);
				3330
				3331	/*
				3332	* We haven't cached this block group, which means we could
				3333	* possibly have excluded extents on this block group.
				3334	*/
				3335	if (block_group->cached == BTRFS_CACHE_NO \|\|
				3336	block_group->cached == BTRFS_CACHE_ERROR)
				3337	btrfs_free_excluded_extents(block_group);
				3338
				3339	btrfs_remove_free_space_cache(block_group);
				3340	ASSERT(block_group->cached != BTRFS_CACHE_STARTED);
				3341	ASSERT(list_empty(&block_group->dirty_list));
				3342	ASSERT(list_empty(&block_group->io_list));
				3343	ASSERT(list_empty(&block_group->bg_list));
Josef Bacik	48aaeeb	2020-07-06 09:14:11 -0400	[diff] [blame]	3344	ASSERT(refcount_read(&block_group->refs) == 1);
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3345	btrfs_put_block_group(block_group);
				3346
				3347	spin_lock(&info->block_group_cache_lock);
				3348	}
				3349	spin_unlock(&info->block_group_cache_lock);
				3350
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3351	btrfs_release_global_block_rsv(info);
				3352
				3353	while (!list_empty(&info->space_info)) {
				3354	space_info = list_entry(info->space_info.next,
				3355	struct btrfs_space_info,
				3356	list);
				3357
				3358	/*
				3359	* Do not hide this behind enospc_debug, this is actually
				3360	* important and indicates a real bug if this happens.
				3361	*/
				3362	if (WARN_ON(space_info->bytes_pinned > 0 \|\|
				3363	space_info->bytes_reserved > 0 \|\|
				3364	space_info->bytes_may_use > 0))
				3365	btrfs_dump_space_info(info, space_info, 0, 0);
Filipe Manana	d611add	2020-04-07 11:38:49 +0100	[diff] [blame]	3366	WARN_ON(space_info->reclaim_size > 0);
Josef Bacik	3e43c27	2019-06-20 15:38:06 -0400	[diff] [blame]	3367	list_del(&space_info->list);
				3368	btrfs_sysfs_remove_space_info(space_info);
				3369	}
				3370	return 0;
				3371	}
Filipe Manana	684b752	2020-05-08 11:01:59 +0100	[diff] [blame]	3372
				3373	void btrfs_freeze_block_group(struct btrfs_block_group *cache)
				3374	{
				3375	atomic_inc(&cache->frozen);
				3376	}
				3377
				3378	void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
				3379	{
				3380	struct btrfs_fs_info *fs_info = block_group->fs_info;
				3381	struct extent_map_tree *em_tree;
				3382	struct extent_map *em;
				3383	bool cleanup;
				3384
				3385	spin_lock(&block_group->lock);
				3386	cleanup = (atomic_dec_and_test(&block_group->frozen) &&
				3387	block_group->removed);
				3388	spin_unlock(&block_group->lock);
				3389
				3390	if (cleanup) {
Filipe Manana	684b752	2020-05-08 11:01:59 +0100	[diff] [blame]	3391	em_tree = &fs_info->mapping_tree;
				3392	write_lock(&em_tree->lock);
				3393	em = lookup_extent_mapping(em_tree, block_group->start,
				3394	1);
				3395	BUG_ON(!em); /* logic error, can't happen */
				3396	remove_extent_mapping(em_tree, em);
				3397	write_unlock(&em_tree->lock);
Filipe Manana	684b752	2020-05-08 11:01:59 +0100	[diff] [blame]	3398
				3399	/* once for us and once for the tree */
				3400	free_extent_map(em);
				3401	free_extent_map(em);
				3402
				3403	/*
				3404	* We may have left one free space entry and other possible
				3405	* tasks trimming this block group have left 1 entry each one.
				3406	* Free them if any.
				3407	*/
				3408	__btrfs_remove_free_space_cache(block_group->free_space_ctl);
				3409	}
				3410	}