blob: 2131ae5b9ed78c59e9f9f325699485dede5db97c [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Josef Bacik0f9dd462008-09-23 13:14:11 -04002/*
3 * Copyright (C) 2008 Red Hat. All rights reserved.
Josef Bacik0f9dd462008-09-23 13:14:11 -04004 */
5
Josef Bacik96303082009-07-13 21:29:25 -04006#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -04007#include <linux/sched.h>
Ingo Molnarf361bf42017-02-03 23:47:37 +01008#include <linux/sched/signal.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09009#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040010#include <linux/math64.h>
Josef Bacik6ab60602011-08-08 08:24:46 -040011#include <linux/ratelimit.h>
Masami Hiramatsu540adea2018-01-13 02:55:03 +090012#include <linux/error-injection.h>
Josef Bacik84de76a2018-09-28 07:17:49 -040013#include <linux/sched/mm.h>
Johannes Thumshirn18bb8bb2021-04-19 16:41:02 +090014#include "misc.h"
Josef Bacik0f9dd462008-09-23 13:14:11 -040015#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040016#include "free-space-cache.h"
17#include "transaction.h"
Josef Bacik0af3d002010-06-21 14:48:16 -040018#include "disk-io.h"
Josef Bacik43be2142011-04-01 14:55:00 +000019#include "extent_io.h"
Filipe Manana04216822014-11-27 21:14:15 +000020#include "volumes.h"
Josef Bacik8719aaa2019-06-18 16:09:16 -040021#include "space-info.h"
Josef Bacik86736342019-06-19 15:12:00 -040022#include "delalloc-space.h"
Josef Bacikaac00232019-06-20 15:37:44 -040023#include "block-group.h"
Dennis Zhoub0643e52019-12-13 16:22:14 -080024#include "discard.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025
Feifei Xu0ef64472016-06-01 19:18:24 +080026#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
Dennis Zhou5d90c5c2020-01-02 16:26:43 -050027#define MAX_CACHE_BYTES_PER_GIG SZ_64K
28#define FORCE_EXTENT_THRESHOLD SZ_1M
Josef Bacik96303082009-07-13 21:29:25 -040029
Filipe Manana55507ce2014-12-01 17:04:09 +000030struct btrfs_trim_range {
31 u64 start;
32 u64 bytes;
33 struct list_head list;
34};
35
Li Zefan34d52cb2011-03-29 13:46:06 +080036static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0cb59c92010-07-02 12:14:14 -040037 struct btrfs_free_space *info);
Josef Bacikcd023e72012-05-14 10:06:40 -040038static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
39 struct btrfs_free_space *info);
Josef Bacikcd799092020-10-23 09:58:08 -040040static int search_bitmap(struct btrfs_free_space_ctl *ctl,
41 struct btrfs_free_space *bitmap_info, u64 *offset,
42 u64 *bytes, bool for_alloc);
43static void free_bitmap(struct btrfs_free_space_ctl *ctl,
44 struct btrfs_free_space *bitmap_info);
45static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
46 struct btrfs_free_space *info, u64 offset,
47 u64 bytes);
Josef Bacik0cb59c92010-07-02 12:14:14 -040048
Li Zefan0414efa2011-04-20 10:20:14 +080049static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
50 struct btrfs_path *path,
51 u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -040052{
Jeff Mahoney0b246af2016-06-22 18:54:23 -040053 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik0af3d002010-06-21 14:48:16 -040054 struct btrfs_key key;
55 struct btrfs_key location;
56 struct btrfs_disk_key disk_key;
57 struct btrfs_free_space_header *header;
58 struct extent_buffer *leaf;
59 struct inode *inode = NULL;
Josef Bacik84de76a2018-09-28 07:17:49 -040060 unsigned nofs_flag;
Josef Bacik0af3d002010-06-21 14:48:16 -040061 int ret;
62
Josef Bacik0af3d002010-06-21 14:48:16 -040063 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +080064 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -040065 key.type = 0;
66
67 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
68 if (ret < 0)
69 return ERR_PTR(ret);
70 if (ret > 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +020071 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040072 return ERR_PTR(-ENOENT);
73 }
74
75 leaf = path->nodes[0];
76 header = btrfs_item_ptr(leaf, path->slots[0],
77 struct btrfs_free_space_header);
78 btrfs_free_space_key(leaf, header, &disk_key);
79 btrfs_disk_key_to_cpu(&location, &disk_key);
David Sterbab3b4aa72011-04-21 01:20:15 +020080 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040081
Josef Bacik84de76a2018-09-28 07:17:49 -040082 /*
83 * We are often under a trans handle at this point, so we need to make
84 * sure NOFS is set to keep us from deadlocking.
85 */
86 nofs_flag = memalloc_nofs_save();
David Sterba0202e832020-05-15 19:35:59 +020087 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
Filipe Manana4222ea72018-10-24 10:13:03 +010088 btrfs_release_path(path);
Josef Bacik84de76a2018-09-28 07:17:49 -040089 memalloc_nofs_restore(nofs_flag);
Josef Bacik0af3d002010-06-21 14:48:16 -040090 if (IS_ERR(inode))
91 return inode;
Josef Bacik0af3d002010-06-21 14:48:16 -040092
Al Viro528c0322012-04-13 11:03:55 -040093 mapping_set_gfp_mask(inode->i_mapping,
Michal Hockoc62d2552015-11-06 16:28:49 -080094 mapping_gfp_constraint(inode->i_mapping,
95 ~(__GFP_FS | __GFP_HIGHMEM)));
Miao Xieadae52b2011-03-31 09:43:23 +000096
Li Zefan0414efa2011-04-20 10:20:14 +080097 return inode;
98}
99
David Sterba32da53862019-10-29 19:20:18 +0100100struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
David Sterba7949f332019-03-20 13:40:19 +0100101 struct btrfs_path *path)
Li Zefan0414efa2011-04-20 10:20:14 +0800102{
David Sterba7949f332019-03-20 13:40:19 +0100103 struct btrfs_fs_info *fs_info = block_group->fs_info;
Li Zefan0414efa2011-04-20 10:20:14 +0800104 struct inode *inode = NULL;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400105 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
Li Zefan0414efa2011-04-20 10:20:14 +0800106
107 spin_lock(&block_group->lock);
108 if (block_group->inode)
109 inode = igrab(block_group->inode);
110 spin_unlock(&block_group->lock);
111 if (inode)
112 return inode;
113
Jeff Mahoney77ab86b2017-02-15 16:28:30 -0500114 inode = __lookup_free_space_inode(fs_info->tree_root, path,
David Sterbab3470b52019-10-23 18:48:22 +0200115 block_group->start);
Li Zefan0414efa2011-04-20 10:20:14 +0800116 if (IS_ERR(inode))
117 return inode;
118
Josef Bacik0af3d002010-06-21 14:48:16 -0400119 spin_lock(&block_group->lock);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400120 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400121 btrfs_info(fs_info, "Old style space inode found, converting.");
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400122 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
123 BTRFS_INODE_NODATACOW;
Josef Bacik2f356122011-06-10 15:31:13 -0400124 block_group->disk_cache_state = BTRFS_DC_CLEAR;
125 }
126
Josef Bacik300e4f82011-08-29 14:06:00 -0400127 if (!block_group->iref) {
Josef Bacik0af3d002010-06-21 14:48:16 -0400128 block_group->inode = igrab(inode);
129 block_group->iref = 1;
130 }
131 spin_unlock(&block_group->lock);
132
133 return inode;
134}
135
Eric Sandeen48a3b632013-04-25 20:41:01 +0000136static int __create_free_space_inode(struct btrfs_root *root,
137 struct btrfs_trans_handle *trans,
138 struct btrfs_path *path,
139 u64 ino, u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -0400140{
141 struct btrfs_key key;
142 struct btrfs_disk_key disk_key;
143 struct btrfs_free_space_header *header;
144 struct btrfs_inode_item *inode_item;
145 struct extent_buffer *leaf;
Nikolay Borisovf0d12192020-12-03 10:09:49 +0200146 /* We inline CRCs for the free disk space cache */
147 const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
148 BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
Josef Bacik0af3d002010-06-21 14:48:16 -0400149 int ret;
150
Li Zefan0414efa2011-04-20 10:20:14 +0800151 ret = btrfs_insert_empty_inode(trans, root, path, ino);
Josef Bacik0af3d002010-06-21 14:48:16 -0400152 if (ret)
153 return ret;
154
155 leaf = path->nodes[0];
156 inode_item = btrfs_item_ptr(leaf, path->slots[0],
157 struct btrfs_inode_item);
158 btrfs_item_key(leaf, &disk_key, path->slots[0]);
David Sterbab159fa22016-11-08 18:09:03 +0100159 memzero_extent_buffer(leaf, (unsigned long)inode_item,
Josef Bacik0af3d002010-06-21 14:48:16 -0400160 sizeof(*inode_item));
161 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
162 btrfs_set_inode_size(leaf, inode_item, 0);
163 btrfs_set_inode_nbytes(leaf, inode_item, 0);
164 btrfs_set_inode_uid(leaf, inode_item, 0);
165 btrfs_set_inode_gid(leaf, inode_item, 0);
166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400167 btrfs_set_inode_flags(leaf, inode_item, flags);
Josef Bacik0af3d002010-06-21 14:48:16 -0400168 btrfs_set_inode_nlink(leaf, inode_item, 1);
169 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
Li Zefan0414efa2011-04-20 10:20:14 +0800170 btrfs_set_inode_block_group(leaf, inode_item, offset);
Josef Bacik0af3d002010-06-21 14:48:16 -0400171 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200172 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400173
174 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800175 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -0400176 key.type = 0;
Josef Bacik0af3d002010-06-21 14:48:16 -0400177 ret = btrfs_insert_empty_item(trans, root, path, &key,
178 sizeof(struct btrfs_free_space_header));
179 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200180 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400181 return ret;
182 }
Chris Masonc9dc4c62015-04-04 17:14:42 -0700183
Josef Bacik0af3d002010-06-21 14:48:16 -0400184 leaf = path->nodes[0];
185 header = btrfs_item_ptr(leaf, path->slots[0],
186 struct btrfs_free_space_header);
David Sterbab159fa22016-11-08 18:09:03 +0100187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header));
Josef Bacik0af3d002010-06-21 14:48:16 -0400188 btrfs_set_free_space_key(leaf, header, &disk_key);
189 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200190 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400191
192 return 0;
193}
194
David Sterba4ca75f12019-03-20 13:42:57 +0100195int create_free_space_inode(struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +0100196 struct btrfs_block_group *block_group,
Li Zefan0414efa2011-04-20 10:20:14 +0800197 struct btrfs_path *path)
198{
199 int ret;
200 u64 ino;
201
Nikolay Borisov543068a2020-12-07 17:32:33 +0200202 ret = btrfs_get_free_objectid(trans->fs_info->tree_root, &ino);
Li Zefan0414efa2011-04-20 10:20:14 +0800203 if (ret < 0)
204 return ret;
205
David Sterba4ca75f12019-03-20 13:42:57 +0100206 return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
David Sterbab3470b52019-10-23 18:48:22 +0200207 ino, block_group->start);
Li Zefan0414efa2011-04-20 10:20:14 +0800208}
209
Boris Burkov36b216c2020-11-18 15:06:25 -0800210/*
211 * inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
212 * handles lookup, otherwise it takes ownership and iputs the inode.
213 * Don't reuse an inode pointer after passing it into this function.
214 */
215int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
216 struct inode *inode,
217 struct btrfs_block_group *block_group)
218{
219 struct btrfs_path *path;
220 struct btrfs_key key;
221 int ret = 0;
222
223 path = btrfs_alloc_path();
224 if (!path)
225 return -ENOMEM;
226
227 if (!inode)
228 inode = lookup_free_space_inode(block_group, path);
229 if (IS_ERR(inode)) {
230 if (PTR_ERR(inode) != -ENOENT)
231 ret = PTR_ERR(inode);
232 goto out;
233 }
234 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
235 if (ret) {
236 btrfs_add_delayed_iput(inode);
237 goto out;
238 }
239 clear_nlink(inode);
240 /* One for the block groups ref */
241 spin_lock(&block_group->lock);
242 if (block_group->iref) {
243 block_group->iref = 0;
244 block_group->inode = NULL;
245 spin_unlock(&block_group->lock);
246 iput(inode);
247 } else {
248 spin_unlock(&block_group->lock);
249 }
250 /* One for the lookup ref */
251 btrfs_add_delayed_iput(inode);
252
253 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
254 key.type = 0;
255 key.offset = block_group->start;
256 ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
257 -1, 1);
258 if (ret) {
259 if (ret > 0)
260 ret = 0;
261 goto out;
262 }
263 ret = btrfs_del_item(trans, trans->fs_info->tree_root, path);
264out:
265 btrfs_free_path(path);
266 return ret;
267}
268
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400269int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
Miao Xie7b61cd92013-05-13 13:55:09 +0000270 struct btrfs_block_rsv *rsv)
Josef Bacik0af3d002010-06-21 14:48:16 -0400271{
Josef Bacikc8174312011-11-02 09:29:35 -0400272 u64 needed_bytes;
Miao Xie7b61cd92013-05-13 13:55:09 +0000273 int ret;
Josef Bacikc8174312011-11-02 09:29:35 -0400274
275 /* 1 for slack space, 1 for updating the inode */
Josef Bacik2bd36e72019-08-22 15:14:33 -0400276 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
277 btrfs_calc_metadata_size(fs_info, 1);
Josef Bacikc8174312011-11-02 09:29:35 -0400278
Miao Xie7b61cd92013-05-13 13:55:09 +0000279 spin_lock(&rsv->lock);
280 if (rsv->reserved < needed_bytes)
281 ret = -ENOSPC;
282 else
283 ret = 0;
284 spin_unlock(&rsv->lock);
Wei Yongjun4b286cd2013-05-21 02:39:21 +0000285 return ret;
Miao Xie7b61cd92013-05-13 13:55:09 +0000286}
287
Jeff Mahoney77ab86b2017-02-15 16:28:30 -0500288int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +0100289 struct btrfs_block_group *block_group,
Miao Xie7b61cd92013-05-13 13:55:09 +0000290 struct inode *inode)
291{
Jeff Mahoney77ab86b2017-02-15 16:28:30 -0500292 struct btrfs_root *root = BTRFS_I(inode)->root;
Miao Xie7b61cd92013-05-13 13:55:09 +0000293 int ret = 0;
Filipe Manana35c76642015-04-30 17:47:05 +0100294 bool locked = false;
Chris Mason1bbc6212015-04-06 12:46:08 -0700295
Chris Mason1bbc6212015-04-06 12:46:08 -0700296 if (block_group) {
Jeff Mahoney21e75ff2017-02-15 16:28:32 -0500297 struct btrfs_path *path = btrfs_alloc_path();
298
299 if (!path) {
300 ret = -ENOMEM;
301 goto fail;
302 }
Filipe Manana35c76642015-04-30 17:47:05 +0100303 locked = true;
Chris Mason1bbc6212015-04-06 12:46:08 -0700304 mutex_lock(&trans->transaction->cache_write_mutex);
305 if (!list_empty(&block_group->io_list)) {
306 list_del_init(&block_group->io_list);
307
Jeff Mahoneyafdb5712016-09-09 12:09:35 -0400308 btrfs_wait_cache_io(trans, block_group, path);
Chris Mason1bbc6212015-04-06 12:46:08 -0700309 btrfs_put_block_group(block_group);
310 }
311
312 /*
313 * now that we've truncated the cache away, its no longer
314 * setup or written
315 */
316 spin_lock(&block_group->lock);
317 block_group->disk_cache_state = BTRFS_DC_CLEAR;
318 spin_unlock(&block_group->lock);
Jeff Mahoney21e75ff2017-02-15 16:28:32 -0500319 btrfs_free_path(path);
Chris Mason1bbc6212015-04-06 12:46:08 -0700320 }
Josef Bacik0af3d002010-06-21 14:48:16 -0400321
Nikolay Borisov6ef06d22017-02-20 13:50:34 +0200322 btrfs_i_size_write(BTRFS_I(inode), 0);
Kirill A. Shutemov7caef262013-09-12 15:13:56 -0700323 truncate_pagecache(inode, 0);
Josef Bacik0af3d002010-06-21 14:48:16 -0400324
325 /*
Omar Sandovalf7e9e8f2018-05-11 13:13:32 -0700326 * We skip the throttling logic for free space cache inodes, so we don't
327 * need to check for -EAGAIN.
Josef Bacik0af3d002010-06-21 14:48:16 -0400328 */
Nikolay Borisov50743392020-11-02 16:48:55 +0200329 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
Filipe Manana0d7d3162021-05-24 11:35:55 +0100330 0, BTRFS_EXTENT_DATA_KEY, NULL);
Filipe Manana35c76642015-04-30 17:47:05 +0100331 if (ret)
332 goto fail;
Josef Bacik0af3d002010-06-21 14:48:16 -0400333
Nikolay Borisov9a56fcd2020-11-02 16:48:59 +0200334 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
Chris Mason1bbc6212015-04-06 12:46:08 -0700335
Chris Mason1bbc6212015-04-06 12:46:08 -0700336fail:
Filipe Manana35c76642015-04-30 17:47:05 +0100337 if (locked)
338 mutex_unlock(&trans->transaction->cache_write_mutex);
Jeff Mahoney79787ea2012-03-12 16:03:00 +0100339 if (ret)
Jeff Mahoney66642832016-06-10 18:19:25 -0400340 btrfs_abort_transaction(trans, ret);
Josef Bacikc8174312011-11-02 09:29:35 -0400341
Li Zefan82d59022011-04-20 10:33:24 +0800342 return ret;
Josef Bacik0af3d002010-06-21 14:48:16 -0400343}
344
David Sterba1d480532017-01-23 17:28:19 +0100345static void readahead_cache(struct inode *inode)
Josef Bacik9d66e232010-08-25 16:54:15 -0400346{
347 struct file_ra_state *ra;
348 unsigned long last_index;
349
350 ra = kzalloc(sizeof(*ra), GFP_NOFS);
351 if (!ra)
David Sterba1d480532017-01-23 17:28:19 +0100352 return;
Josef Bacik9d66e232010-08-25 16:54:15 -0400353
354 file_ra_state_init(ra, inode->i_mapping);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300355 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
Josef Bacik9d66e232010-08-25 16:54:15 -0400356
357 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
358
359 kfree(ra);
Josef Bacik9d66e232010-08-25 16:54:15 -0400360}
361
Chris Mason4c6d1d82015-04-06 13:17:20 -0700362static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
Jeff Mahoneyf15376d2016-06-22 18:56:18 -0400363 int write)
Josef Bacika67509c2011-10-05 15:18:58 -0400364{
Miao Xie5349d6c2014-06-19 10:42:49 +0800365 int num_pages;
Miao Xie5349d6c2014-06-19 10:42:49 +0800366
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300367 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
Miao Xie5349d6c2014-06-19 10:42:49 +0800368
Zhihui Zhang8f6c72a2018-07-02 20:00:54 -0400369 /* Make sure we can fit our crcs and generation into the first page */
Nikolay Borisov7dbdb442020-12-03 10:09:48 +0200370 if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
Miao Xie5349d6c2014-06-19 10:42:49 +0800371 return -ENOSPC;
372
Chris Mason4c6d1d82015-04-06 13:17:20 -0700373 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
Miao Xie5349d6c2014-06-19 10:42:49 +0800374
David Sterba31e818f2015-02-20 18:00:26 +0100375 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
Josef Bacika67509c2011-10-05 15:18:58 -0400376 if (!io_ctl->pages)
377 return -ENOMEM;
Miao Xie5349d6c2014-06-19 10:42:49 +0800378
379 io_ctl->num_pages = num_pages;
Jeff Mahoneyf15376d2016-06-22 18:56:18 -0400380 io_ctl->fs_info = btrfs_sb(inode->i_sb);
Chris Masonc9dc4c62015-04-04 17:14:42 -0700381 io_ctl->inode = inode;
Miao Xie5349d6c2014-06-19 10:42:49 +0800382
Josef Bacika67509c2011-10-05 15:18:58 -0400383 return 0;
384}
Masami Hiramatsu663faf92018-01-13 02:55:33 +0900385ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO);
Josef Bacika67509c2011-10-05 15:18:58 -0400386
Chris Mason4c6d1d82015-04-06 13:17:20 -0700387static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
Josef Bacika67509c2011-10-05 15:18:58 -0400388{
389 kfree(io_ctl->pages);
Chris Masonc9dc4c62015-04-04 17:14:42 -0700390 io_ctl->pages = NULL;
Josef Bacika67509c2011-10-05 15:18:58 -0400391}
392
Chris Mason4c6d1d82015-04-06 13:17:20 -0700393static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
Josef Bacika67509c2011-10-05 15:18:58 -0400394{
395 if (io_ctl->cur) {
Josef Bacika67509c2011-10-05 15:18:58 -0400396 io_ctl->cur = NULL;
397 io_ctl->orig = NULL;
398 }
399}
400
Chris Mason4c6d1d82015-04-06 13:17:20 -0700401static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
Josef Bacika67509c2011-10-05 15:18:58 -0400402{
Josef Bacikb12d6862013-08-26 17:14:08 -0400403 ASSERT(io_ctl->index < io_ctl->num_pages);
Josef Bacika67509c2011-10-05 15:18:58 -0400404 io_ctl->page = io_ctl->pages[io_ctl->index++];
Chris Mason2b108262015-04-06 07:48:20 -0700405 io_ctl->cur = page_address(io_ctl->page);
Josef Bacika67509c2011-10-05 15:18:58 -0400406 io_ctl->orig = io_ctl->cur;
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300407 io_ctl->size = PAGE_SIZE;
Josef Bacika67509c2011-10-05 15:18:58 -0400408 if (clear)
David Sterba619a9742017-03-29 20:48:44 +0200409 clear_page(io_ctl->cur);
Josef Bacika67509c2011-10-05 15:18:58 -0400410}
411
Chris Mason4c6d1d82015-04-06 13:17:20 -0700412static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
Josef Bacika67509c2011-10-05 15:18:58 -0400413{
414 int i;
415
416 io_ctl_unmap_page(io_ctl);
417
418 for (i = 0; i < io_ctl->num_pages; i++) {
Li Zefana1ee5a42012-01-09 14:27:42 +0800419 if (io_ctl->pages[i]) {
420 ClearPageChecked(io_ctl->pages[i]);
421 unlock_page(io_ctl->pages[i]);
Kirill A. Shutemov09cbfea2016-04-01 15:29:47 +0300422 put_page(io_ctl->pages[i]);
Li Zefana1ee5a42012-01-09 14:27:42 +0800423 }
Josef Bacika67509c2011-10-05 15:18:58 -0400424 }
425}
426
Johannes Thumshirn7a195f62020-02-12 00:10:20 +0900427static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
Josef Bacika67509c2011-10-05 15:18:58 -0400428{
429 struct page *page;
Johannes Thumshirn831fa142020-02-12 00:10:19 +0900430 struct inode *inode = io_ctl->inode;
Josef Bacika67509c2011-10-05 15:18:58 -0400431 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
432 int i;
433
434 for (i = 0; i < io_ctl->num_pages; i++) {
Qu Wenruo32443de2021-01-26 16:34:00 +0800435 int ret;
436
Josef Bacika67509c2011-10-05 15:18:58 -0400437 page = find_or_create_page(inode->i_mapping, i, mask);
438 if (!page) {
439 io_ctl_drop_pages(io_ctl);
440 return -ENOMEM;
441 }
Qu Wenruo32443de2021-01-26 16:34:00 +0800442
443 ret = set_page_extent_mapped(page);
444 if (ret < 0) {
445 unlock_page(page);
446 put_page(page);
447 io_ctl_drop_pages(io_ctl);
448 return ret;
449 }
450
Josef Bacika67509c2011-10-05 15:18:58 -0400451 io_ctl->pages[i] = page;
452 if (uptodate && !PageUptodate(page)) {
453 btrfs_readpage(NULL, page);
454 lock_page(page);
Josef Bacik37971362019-09-24 16:50:43 -0400455 if (page->mapping != inode->i_mapping) {
456 btrfs_err(BTRFS_I(inode)->root->fs_info,
457 "free space cache page truncated");
458 io_ctl_drop_pages(io_ctl);
459 return -EIO;
460 }
Josef Bacika67509c2011-10-05 15:18:58 -0400461 if (!PageUptodate(page)) {
Frank Holtonefe120a2013-12-20 11:37:06 -0500462 btrfs_err(BTRFS_I(inode)->root->fs_info,
463 "error reading free space cache");
Josef Bacika67509c2011-10-05 15:18:58 -0400464 io_ctl_drop_pages(io_ctl);
465 return -EIO;
466 }
467 }
468 }
469
Qu Wenruo32443de2021-01-26 16:34:00 +0800470 for (i = 0; i < io_ctl->num_pages; i++)
Josef Bacikf7d61dc2011-11-15 09:31:24 -0500471 clear_page_dirty_for_io(io_ctl->pages[i]);
Josef Bacikf7d61dc2011-11-15 09:31:24 -0500472
Josef Bacika67509c2011-10-05 15:18:58 -0400473 return 0;
474}
475
Chris Mason4c6d1d82015-04-06 13:17:20 -0700476static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
Josef Bacika67509c2011-10-05 15:18:58 -0400477{
Josef Bacika67509c2011-10-05 15:18:58 -0400478 io_ctl_map_page(io_ctl, 1);
479
480 /*
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400481 * Skip the csum areas. If we don't check crcs then we just have a
482 * 64bit chunk at the front of the first page.
Josef Bacika67509c2011-10-05 15:18:58 -0400483 */
Nikolay Borisov7dbdb442020-12-03 10:09:48 +0200484 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
485 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
Josef Bacika67509c2011-10-05 15:18:58 -0400486
David Sterba6994ca362020-09-15 13:32:34 +0200487 put_unaligned_le64(generation, io_ctl->cur);
Josef Bacika67509c2011-10-05 15:18:58 -0400488 io_ctl->cur += sizeof(u64);
Josef Bacika67509c2011-10-05 15:18:58 -0400489}
490
Chris Mason4c6d1d82015-04-06 13:17:20 -0700491static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
Josef Bacika67509c2011-10-05 15:18:58 -0400492{
David Sterba6994ca362020-09-15 13:32:34 +0200493 u64 cache_gen;
Josef Bacika67509c2011-10-05 15:18:58 -0400494
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400495 /*
496 * Skip the crc area. If we don't check crcs then we just have a 64bit
497 * chunk at the front of the first page.
498 */
Nikolay Borisov7dbdb442020-12-03 10:09:48 +0200499 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
500 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
Josef Bacika67509c2011-10-05 15:18:58 -0400501
David Sterba6994ca362020-09-15 13:32:34 +0200502 cache_gen = get_unaligned_le64(io_ctl->cur);
503 if (cache_gen != generation) {
Jeff Mahoneyf15376d2016-06-22 18:56:18 -0400504 btrfs_err_rl(io_ctl->fs_info,
David Sterba94647322015-10-08 11:01:36 +0200505 "space cache generation (%llu) does not match inode (%llu)",
David Sterba6994ca362020-09-15 13:32:34 +0200506 cache_gen, generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400507 io_ctl_unmap_page(io_ctl);
508 return -EIO;
509 }
510 io_ctl->cur += sizeof(u64);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400511 return 0;
512}
513
Chris Mason4c6d1d82015-04-06 13:17:20 -0700514static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400515{
516 u32 *tmp;
517 u32 crc = ~(u32)0;
518 unsigned offset = 0;
519
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400520 if (index == 0)
Justin P. Mattockcb54f252011-11-21 08:43:28 -0800521 offset = sizeof(u32) * io_ctl->num_pages;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400522
Johannes Thumshirn4bb3c2e2019-05-22 10:19:00 +0200523 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
524 btrfs_crc32c_final(crc, (u8 *)&crc);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400525 io_ctl_unmap_page(io_ctl);
Chris Mason2b108262015-04-06 07:48:20 -0700526 tmp = page_address(io_ctl->pages[0]);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400527 tmp += index;
528 *tmp = crc;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400529}
530
Chris Mason4c6d1d82015-04-06 13:17:20 -0700531static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400532{
533 u32 *tmp, val;
534 u32 crc = ~(u32)0;
535 unsigned offset = 0;
536
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400537 if (index == 0)
538 offset = sizeof(u32) * io_ctl->num_pages;
539
Chris Mason2b108262015-04-06 07:48:20 -0700540 tmp = page_address(io_ctl->pages[0]);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400541 tmp += index;
542 val = *tmp;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400543
544 io_ctl_map_page(io_ctl, 0);
Johannes Thumshirn4bb3c2e2019-05-22 10:19:00 +0200545 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
546 btrfs_crc32c_final(crc, (u8 *)&crc);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400547 if (val != crc) {
Jeff Mahoneyf15376d2016-06-22 18:56:18 -0400548 btrfs_err_rl(io_ctl->fs_info,
David Sterba94647322015-10-08 11:01:36 +0200549 "csum mismatch on free space cache");
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400550 io_ctl_unmap_page(io_ctl);
551 return -EIO;
552 }
553
Josef Bacika67509c2011-10-05 15:18:58 -0400554 return 0;
555}
556
Chris Mason4c6d1d82015-04-06 13:17:20 -0700557static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
Josef Bacika67509c2011-10-05 15:18:58 -0400558 void *bitmap)
559{
560 struct btrfs_free_space_entry *entry;
561
562 if (!io_ctl->cur)
563 return -ENOSPC;
564
565 entry = io_ctl->cur;
David Sterba6994ca362020-09-15 13:32:34 +0200566 put_unaligned_le64(offset, &entry->offset);
567 put_unaligned_le64(bytes, &entry->bytes);
Josef Bacika67509c2011-10-05 15:18:58 -0400568 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
569 BTRFS_FREE_SPACE_EXTENT;
570 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
571 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
572
573 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
574 return 0;
575
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400576 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400577
578 /* No more pages to map */
579 if (io_ctl->index >= io_ctl->num_pages)
580 return 0;
581
582 /* map the next page */
583 io_ctl_map_page(io_ctl, 1);
584 return 0;
585}
586
Chris Mason4c6d1d82015-04-06 13:17:20 -0700587static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
Josef Bacika67509c2011-10-05 15:18:58 -0400588{
589 if (!io_ctl->cur)
590 return -ENOSPC;
591
592 /*
593 * If we aren't at the start of the current page, unmap this one and
594 * map the next one if there is any left.
595 */
596 if (io_ctl->cur != io_ctl->orig) {
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400597 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400598 if (io_ctl->index >= io_ctl->num_pages)
599 return -ENOSPC;
600 io_ctl_map_page(io_ctl, 0);
601 }
602
David Sterba69d24802018-06-29 10:56:44 +0200603 copy_page(io_ctl->cur, bitmap);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400604 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400605 if (io_ctl->index < io_ctl->num_pages)
606 io_ctl_map_page(io_ctl, 0);
607 return 0;
608}
609
Chris Mason4c6d1d82015-04-06 13:17:20 -0700610static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
Josef Bacika67509c2011-10-05 15:18:58 -0400611{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400612 /*
613 * If we're not on the boundary we know we've modified the page and we
614 * need to crc the page.
615 */
616 if (io_ctl->cur != io_ctl->orig)
617 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
618 else
619 io_ctl_unmap_page(io_ctl);
Josef Bacika67509c2011-10-05 15:18:58 -0400620
621 while (io_ctl->index < io_ctl->num_pages) {
622 io_ctl_map_page(io_ctl, 1);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400623 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
Josef Bacika67509c2011-10-05 15:18:58 -0400624 }
625}
626
Chris Mason4c6d1d82015-04-06 13:17:20 -0700627static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400628 struct btrfs_free_space *entry, u8 *type)
Josef Bacika67509c2011-10-05 15:18:58 -0400629{
630 struct btrfs_free_space_entry *e;
Josef Bacik2f120c02011-11-10 20:45:05 -0500631 int ret;
632
633 if (!io_ctl->cur) {
634 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
635 if (ret)
636 return ret;
637 }
Josef Bacika67509c2011-10-05 15:18:58 -0400638
639 e = io_ctl->cur;
David Sterba6994ca362020-09-15 13:32:34 +0200640 entry->offset = get_unaligned_le64(&e->offset);
641 entry->bytes = get_unaligned_le64(&e->bytes);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400642 *type = e->type;
Josef Bacika67509c2011-10-05 15:18:58 -0400643 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
644 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
645
646 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400647 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400648
649 io_ctl_unmap_page(io_ctl);
650
Josef Bacik2f120c02011-11-10 20:45:05 -0500651 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400652}
653
Chris Mason4c6d1d82015-04-06 13:17:20 -0700654static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400655 struct btrfs_free_space *entry)
Josef Bacika67509c2011-10-05 15:18:58 -0400656{
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400657 int ret;
658
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400659 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
660 if (ret)
661 return ret;
662
David Sterba69d24802018-06-29 10:56:44 +0200663 copy_page(entry->bitmap, io_ctl->cur);
Josef Bacika67509c2011-10-05 15:18:58 -0400664 io_ctl_unmap_page(io_ctl);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400665
666 return 0;
Josef Bacika67509c2011-10-05 15:18:58 -0400667}
668
David Sterbafa598b02020-12-03 17:18:38 +0100669static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
670{
671 struct btrfs_block_group *block_group = ctl->private;
672 u64 max_bytes;
673 u64 bitmap_bytes;
674 u64 extent_bytes;
675 u64 size = block_group->length;
676 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
677 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
678
679 max_bitmaps = max_t(u64, max_bitmaps, 1);
680
681 ASSERT(ctl->total_bitmaps <= max_bitmaps);
682
683 /*
684 * We are trying to keep the total amount of memory used per 1GiB of
685 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
686 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
687 * bitmaps, we may end up using more memory than this.
688 */
689 if (size < SZ_1G)
690 max_bytes = MAX_CACHE_BYTES_PER_GIG;
691 else
692 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
693
694 bitmap_bytes = ctl->total_bitmaps * ctl->unit;
695
696 /*
697 * we want the extent entry threshold to always be at most 1/2 the max
698 * bytes we can have, or whatever is less than that.
699 */
700 extent_bytes = max_bytes - bitmap_bytes;
701 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
702
703 ctl->extents_thresh =
704 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
705}
706
Eric Sandeen48a3b632013-04-25 20:41:01 +0000707static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
708 struct btrfs_free_space_ctl *ctl,
709 struct btrfs_path *path, u64 offset)
Josef Bacik9d66e232010-08-25 16:54:15 -0400710{
David Sterba3ffbd682018-06-29 10:56:42 +0200711 struct btrfs_fs_info *fs_info = root->fs_info;
Josef Bacik9d66e232010-08-25 16:54:15 -0400712 struct btrfs_free_space_header *header;
713 struct extent_buffer *leaf;
Chris Mason4c6d1d82015-04-06 13:17:20 -0700714 struct btrfs_io_ctl io_ctl;
Josef Bacik9d66e232010-08-25 16:54:15 -0400715 struct btrfs_key key;
Josef Bacika67509c2011-10-05 15:18:58 -0400716 struct btrfs_free_space *e, *n;
Gui Hechengb76808f2014-12-31 09:51:35 +0800717 LIST_HEAD(bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400718 u64 num_entries;
719 u64 num_bitmaps;
720 u64 generation;
Josef Bacika67509c2011-10-05 15:18:58 -0400721 u8 type;
Josef Bacikf6a39822011-06-06 10:50:35 -0400722 int ret = 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400723
Josef Bacik9d66e232010-08-25 16:54:15 -0400724 /* Nothing in the space cache, goodbye */
Li Zefan0414efa2011-04-20 10:20:14 +0800725 if (!i_size_read(inode))
Josef Bacika67509c2011-10-05 15:18:58 -0400726 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400727
728 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800729 key.offset = offset;
Josef Bacik9d66e232010-08-25 16:54:15 -0400730 key.type = 0;
731
732 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Li Zefan0414efa2011-04-20 10:20:14 +0800733 if (ret < 0)
Josef Bacika67509c2011-10-05 15:18:58 -0400734 return 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800735 else if (ret > 0) {
Chris Mason945d8962011-05-22 12:33:42 -0400736 btrfs_release_path(path);
Josef Bacika67509c2011-10-05 15:18:58 -0400737 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400738 }
739
Li Zefan0414efa2011-04-20 10:20:14 +0800740 ret = -1;
741
Josef Bacik9d66e232010-08-25 16:54:15 -0400742 leaf = path->nodes[0];
743 header = btrfs_item_ptr(leaf, path->slots[0],
744 struct btrfs_free_space_header);
745 num_entries = btrfs_free_space_entries(leaf, header);
746 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
747 generation = btrfs_free_space_generation(leaf, header);
Chris Mason945d8962011-05-22 12:33:42 -0400748 btrfs_release_path(path);
Josef Bacik9d66e232010-08-25 16:54:15 -0400749
Miao Xiee570fd22014-06-19 10:42:50 +0800750 if (!BTRFS_I(inode)->generation) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400751 btrfs_info(fs_info,
David Sterba913e1532017-07-13 15:32:18 +0200752 "the free space cache file (%llu) is invalid, skip it",
Miao Xiee570fd22014-06-19 10:42:50 +0800753 offset);
754 return 0;
755 }
756
Josef Bacik9d66e232010-08-25 16:54:15 -0400757 if (BTRFS_I(inode)->generation != generation) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400758 btrfs_err(fs_info,
759 "free space inode generation (%llu) did not match free space cache generation (%llu)",
760 BTRFS_I(inode)->generation, generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400761 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400762 }
763
764 if (!num_entries)
Josef Bacika67509c2011-10-05 15:18:58 -0400765 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400766
Jeff Mahoneyf15376d2016-06-22 18:56:18 -0400767 ret = io_ctl_init(&io_ctl, inode, 0);
Li Zefan706efc62012-01-09 14:36:28 +0800768 if (ret)
769 return ret;
770
David Sterba1d480532017-01-23 17:28:19 +0100771 readahead_cache(inode);
Josef Bacik9d66e232010-08-25 16:54:15 -0400772
Johannes Thumshirn7a195f62020-02-12 00:10:20 +0900773 ret = io_ctl_prepare_pages(&io_ctl, true);
Josef Bacika67509c2011-10-05 15:18:58 -0400774 if (ret)
775 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400776
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400777 ret = io_ctl_check_crc(&io_ctl, 0);
778 if (ret)
779 goto free_cache;
780
Josef Bacika67509c2011-10-05 15:18:58 -0400781 ret = io_ctl_check_generation(&io_ctl, generation);
782 if (ret)
783 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400784
Josef Bacika67509c2011-10-05 15:18:58 -0400785 while (num_entries) {
786 e = kmem_cache_zalloc(btrfs_free_space_cachep,
787 GFP_NOFS);
Zhihao Cheng3cc64e72020-11-20 09:08:04 +0800788 if (!e) {
789 ret = -ENOMEM;
Josef Bacik9d66e232010-08-25 16:54:15 -0400790 goto free_cache;
Zhihao Cheng3cc64e72020-11-20 09:08:04 +0800791 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400792
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400793 ret = io_ctl_read_entry(&io_ctl, e, &type);
794 if (ret) {
795 kmem_cache_free(btrfs_free_space_cachep, e);
796 goto free_cache;
797 }
798
Josef Bacika67509c2011-10-05 15:18:58 -0400799 if (!e->bytes) {
Zhihao Cheng3cc64e72020-11-20 09:08:04 +0800800 ret = -1;
Josef Bacika67509c2011-10-05 15:18:58 -0400801 kmem_cache_free(btrfs_free_space_cachep, e);
802 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400803 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400804
Josef Bacika67509c2011-10-05 15:18:58 -0400805 if (type == BTRFS_FREE_SPACE_EXTENT) {
806 spin_lock(&ctl->tree_lock);
807 ret = link_free_space(ctl, e);
808 spin_unlock(&ctl->tree_lock);
809 if (ret) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400810 btrfs_err(fs_info,
Simon Kirbyc2cf52e2013-03-19 22:41:23 +0000811 "Duplicate entries in free space cache, dumping");
Josef Bacikdc89e982011-01-28 17:05:48 -0500812 kmem_cache_free(btrfs_free_space_cachep, e);
Josef Bacik9d66e232010-08-25 16:54:15 -0400813 goto free_cache;
814 }
Josef Bacika67509c2011-10-05 15:18:58 -0400815 } else {
Josef Bacikb12d6862013-08-26 17:14:08 -0400816 ASSERT(num_bitmaps);
Josef Bacika67509c2011-10-05 15:18:58 -0400817 num_bitmaps--;
Christophe Leroy3acd4852019-08-21 15:05:55 +0000818 e->bitmap = kmem_cache_zalloc(
819 btrfs_free_space_bitmap_cachep, GFP_NOFS);
Josef Bacika67509c2011-10-05 15:18:58 -0400820 if (!e->bitmap) {
Zhihao Cheng3cc64e72020-11-20 09:08:04 +0800821 ret = -ENOMEM;
Josef Bacika67509c2011-10-05 15:18:58 -0400822 kmem_cache_free(
823 btrfs_free_space_cachep, e);
824 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400825 }
Josef Bacika67509c2011-10-05 15:18:58 -0400826 spin_lock(&ctl->tree_lock);
827 ret = link_free_space(ctl, e);
828 ctl->total_bitmaps++;
David Sterbafa598b02020-12-03 17:18:38 +0100829 recalculate_thresholds(ctl);
Josef Bacika67509c2011-10-05 15:18:58 -0400830 spin_unlock(&ctl->tree_lock);
831 if (ret) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400832 btrfs_err(fs_info,
Simon Kirbyc2cf52e2013-03-19 22:41:23 +0000833 "Duplicate entries in free space cache, dumping");
Josef Bacika67509c2011-10-05 15:18:58 -0400834 kmem_cache_free(btrfs_free_space_cachep, e);
835 goto free_cache;
836 }
837 list_add_tail(&e->list, &bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400838 }
839
Josef Bacika67509c2011-10-05 15:18:58 -0400840 num_entries--;
Josef Bacik9d66e232010-08-25 16:54:15 -0400841 }
842
Josef Bacik2f120c02011-11-10 20:45:05 -0500843 io_ctl_unmap_page(&io_ctl);
844
Josef Bacika67509c2011-10-05 15:18:58 -0400845 /*
846 * We add the bitmaps at the end of the entries in order that
847 * the bitmap entries are added to the cache.
848 */
849 list_for_each_entry_safe(e, n, &bitmaps, list) {
850 list_del_init(&e->list);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400851 ret = io_ctl_read_bitmap(&io_ctl, e);
852 if (ret)
853 goto free_cache;
Josef Bacika67509c2011-10-05 15:18:58 -0400854 }
855
856 io_ctl_drop_pages(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400857 ret = 1;
858out:
Josef Bacika67509c2011-10-05 15:18:58 -0400859 io_ctl_free(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400860 return ret;
Josef Bacik9d66e232010-08-25 16:54:15 -0400861free_cache:
Josef Bacika67509c2011-10-05 15:18:58 -0400862 io_ctl_drop_pages(&io_ctl);
Li Zefan0414efa2011-04-20 10:20:14 +0800863 __btrfs_remove_free_space_cache(ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400864 goto out;
865}
866
Josef Bacikcd799092020-10-23 09:58:08 -0400867static int copy_free_space_cache(struct btrfs_block_group *block_group,
868 struct btrfs_free_space_ctl *ctl)
869{
870 struct btrfs_free_space *info;
871 struct rb_node *n;
872 int ret = 0;
873
874 while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
875 info = rb_entry(n, struct btrfs_free_space, offset_index);
876 if (!info->bitmap) {
877 unlink_free_space(ctl, info);
878 ret = btrfs_add_free_space(block_group, info->offset,
879 info->bytes);
880 kmem_cache_free(btrfs_free_space_cachep, info);
881 } else {
882 u64 offset = info->offset;
883 u64 bytes = ctl->unit;
884
885 while (search_bitmap(ctl, info, &offset, &bytes,
886 false) == 0) {
887 ret = btrfs_add_free_space(block_group, offset,
888 bytes);
889 if (ret)
890 break;
891 bitmap_clear_bits(ctl, info, offset, bytes);
892 offset = info->offset;
893 bytes = ctl->unit;
894 }
895 free_bitmap(ctl, info);
896 }
897 cond_resched();
898 }
899 return ret;
900}
901
David Sterba32da53862019-10-29 19:20:18 +0100902int load_free_space_cache(struct btrfs_block_group *block_group)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400903{
David Sterbabb6cb1c2019-03-20 13:47:15 +0100904 struct btrfs_fs_info *fs_info = block_group->fs_info;
Li Zefan34d52cb2011-03-29 13:46:06 +0800905 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacikcd799092020-10-23 09:58:08 -0400906 struct btrfs_free_space_ctl tmp_ctl = {};
Li Zefan0414efa2011-04-20 10:20:14 +0800907 struct inode *inode;
908 struct btrfs_path *path;
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400909 int ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800910 bool matched;
David Sterbabf38be62019-10-23 18:48:11 +0200911 u64 used = block_group->used;
Li Zefan0414efa2011-04-20 10:20:14 +0800912
913 /*
Josef Bacikcd799092020-10-23 09:58:08 -0400914 * Because we could potentially discard our loaded free space, we want
915 * to load everything into a temporary structure first, and then if it's
916 * valid copy it all into the actual free space ctl.
917 */
918 btrfs_init_free_space_ctl(block_group, &tmp_ctl);
919
920 /*
Li Zefan0414efa2011-04-20 10:20:14 +0800921 * If this block group has been marked to be cleared for one reason or
922 * another then we can't trust the on disk cache, so just return.
923 */
924 spin_lock(&block_group->lock);
925 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
926 spin_unlock(&block_group->lock);
927 return 0;
928 }
929 spin_unlock(&block_group->lock);
930
931 path = btrfs_alloc_path();
932 if (!path)
933 return 0;
Josef Bacikd53ba472012-04-12 16:03:57 -0400934 path->search_commit_root = 1;
935 path->skip_locking = 1;
Li Zefan0414efa2011-04-20 10:20:14 +0800936
Filipe Manana4222ea72018-10-24 10:13:03 +0100937 /*
938 * We must pass a path with search_commit_root set to btrfs_iget in
939 * order to avoid a deadlock when allocating extents for the tree root.
940 *
941 * When we are COWing an extent buffer from the tree root, when looking
942 * for a free extent, at extent-tree.c:find_free_extent(), we can find
943 * block group without its free space cache loaded. When we find one
944 * we must load its space cache which requires reading its free space
945 * cache's inode item from the root tree. If this inode item is located
946 * in the same leaf that we started COWing before, then we end up in
947 * deadlock on the extent buffer (trying to read lock it when we
948 * previously write locked it).
949 *
950 * It's safe to read the inode item using the commit root because
951 * block groups, once loaded, stay in memory forever (until they are
952 * removed) as well as their space caches once loaded. New block groups
953 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
954 * we will never try to read their inode item while the fs is mounted.
955 */
David Sterba7949f332019-03-20 13:40:19 +0100956 inode = lookup_free_space_inode(block_group, path);
Li Zefan0414efa2011-04-20 10:20:14 +0800957 if (IS_ERR(inode)) {
958 btrfs_free_path(path);
959 return 0;
960 }
961
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400962 /* We may have converted the inode and made the cache invalid. */
963 spin_lock(&block_group->lock);
964 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
965 spin_unlock(&block_group->lock);
Tsutomu Itoha7e221e2012-02-14 17:12:23 +0900966 btrfs_free_path(path);
Josef Bacik5b0e95b2011-10-06 08:58:24 -0400967 goto out;
968 }
969 spin_unlock(&block_group->lock);
970
Josef Bacikcd799092020-10-23 09:58:08 -0400971 ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
David Sterbab3470b52019-10-23 18:48:22 +0200972 path, block_group->start);
Li Zefan0414efa2011-04-20 10:20:14 +0800973 btrfs_free_path(path);
974 if (ret <= 0)
975 goto out;
976
Josef Bacikcd799092020-10-23 09:58:08 -0400977 matched = (tmp_ctl.free_space == (block_group->length - used -
978 block_group->bytes_super));
Li Zefan0414efa2011-04-20 10:20:14 +0800979
Josef Bacikcd799092020-10-23 09:58:08 -0400980 if (matched) {
981 ret = copy_free_space_cache(block_group, &tmp_ctl);
982 /*
983 * ret == 1 means we successfully loaded the free space cache,
984 * so we need to re-set it here.
985 */
986 if (ret == 0)
987 ret = 1;
988 } else {
989 __btrfs_remove_free_space_cache(&tmp_ctl);
Jeff Mahoney5d163e02016-09-20 10:05:00 -0400990 btrfs_warn(fs_info,
991 "block group %llu has wrong amount of free space",
David Sterbab3470b52019-10-23 18:48:22 +0200992 block_group->start);
Li Zefan0414efa2011-04-20 10:20:14 +0800993 ret = -1;
994 }
995out:
996 if (ret < 0) {
997 /* This cache is bogus, make sure it gets cleared */
998 spin_lock(&block_group->lock);
999 block_group->disk_cache_state = BTRFS_DC_CLEAR;
1000 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +08001001 ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +08001002
Jeff Mahoney5d163e02016-09-20 10:05:00 -04001003 btrfs_warn(fs_info,
1004 "failed to load free space cache for block group %llu, rebuilding it now",
David Sterbab3470b52019-10-23 18:48:22 +02001005 block_group->start);
Li Zefan0414efa2011-04-20 10:20:14 +08001006 }
1007
Josef Bacik66b53ba2020-10-23 09:58:07 -04001008 spin_lock(&ctl->tree_lock);
1009 btrfs_discard_update_discardable(block_group);
1010 spin_unlock(&ctl->tree_lock);
Li Zefan0414efa2011-04-20 10:20:14 +08001011 iput(inode);
1012 return ret;
1013}
1014
Chris Masond4452bc2014-05-19 20:47:56 -07001015static noinline_for_stack
Chris Mason4c6d1d82015-04-06 13:17:20 -07001016int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
Chris Masond4452bc2014-05-19 20:47:56 -07001017 struct btrfs_free_space_ctl *ctl,
David Sterba32da53862019-10-29 19:20:18 +01001018 struct btrfs_block_group *block_group,
Chris Masond4452bc2014-05-19 20:47:56 -07001019 int *entries, int *bitmaps,
1020 struct list_head *bitmap_list)
Josef Bacik0cb59c92010-07-02 12:14:14 -04001021{
Josef Bacikc09544e2011-08-30 10:19:10 -04001022 int ret;
Chris Masond4452bc2014-05-19 20:47:56 -07001023 struct btrfs_free_cluster *cluster = NULL;
Chris Mason1bbc6212015-04-06 12:46:08 -07001024 struct btrfs_free_cluster *cluster_locked = NULL;
Chris Masond4452bc2014-05-19 20:47:56 -07001025 struct rb_node *node = rb_first(&ctl->free_space_offset);
Filipe Manana55507ce2014-12-01 17:04:09 +00001026 struct btrfs_trim_range *trim_entry;
Josef Bacikbe1a12a2011-04-06 13:05:22 -04001027
Josef Bacik43be2142011-04-01 14:55:00 +00001028 /* Get the cluster for this block_group if it exists */
Chris Masond4452bc2014-05-19 20:47:56 -07001029 if (block_group && !list_empty(&block_group->cluster_list)) {
Josef Bacik43be2142011-04-01 14:55:00 +00001030 cluster = list_entry(block_group->cluster_list.next,
1031 struct btrfs_free_cluster,
1032 block_group_list);
Chris Masond4452bc2014-05-19 20:47:56 -07001033 }
Josef Bacik43be2142011-04-01 14:55:00 +00001034
Josef Bacikf75b1302011-10-05 10:00:18 -04001035 if (!node && cluster) {
Chris Mason1bbc6212015-04-06 12:46:08 -07001036 cluster_locked = cluster;
1037 spin_lock(&cluster_locked->lock);
Josef Bacikf75b1302011-10-05 10:00:18 -04001038 node = rb_first(&cluster->root);
1039 cluster = NULL;
1040 }
1041
Josef Bacik0cb59c92010-07-02 12:14:14 -04001042 /* Write out the extent entries */
Josef Bacika67509c2011-10-05 15:18:58 -04001043 while (node) {
1044 struct btrfs_free_space *e;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001045
Josef Bacika67509c2011-10-05 15:18:58 -04001046 e = rb_entry(node, struct btrfs_free_space, offset_index);
Chris Masond4452bc2014-05-19 20:47:56 -07001047 *entries += 1;
Josef Bacik43be2142011-04-01 14:55:00 +00001048
Chris Masond4452bc2014-05-19 20:47:56 -07001049 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes,
Josef Bacika67509c2011-10-05 15:18:58 -04001050 e->bitmap);
1051 if (ret)
Chris Masond4452bc2014-05-19 20:47:56 -07001052 goto fail;
Josef Bacika67509c2011-10-05 15:18:58 -04001053
1054 if (e->bitmap) {
Chris Masond4452bc2014-05-19 20:47:56 -07001055 list_add_tail(&e->list, bitmap_list);
1056 *bitmaps += 1;
Josef Bacika67509c2011-10-05 15:18:58 -04001057 }
1058 node = rb_next(node);
1059 if (!node && cluster) {
1060 node = rb_first(&cluster->root);
Chris Mason1bbc6212015-04-06 12:46:08 -07001061 cluster_locked = cluster;
1062 spin_lock(&cluster_locked->lock);
Josef Bacika67509c2011-10-05 15:18:58 -04001063 cluster = NULL;
1064 }
1065 }
Chris Mason1bbc6212015-04-06 12:46:08 -07001066 if (cluster_locked) {
1067 spin_unlock(&cluster_locked->lock);
1068 cluster_locked = NULL;
1069 }
Filipe Manana55507ce2014-12-01 17:04:09 +00001070
1071 /*
1072 * Make sure we don't miss any range that was removed from our rbtree
1073 * because trimming is running. Otherwise after a umount+mount (or crash
1074 * after committing the transaction) we would leak free space and get
1075 * an inconsistent free space cache report from fsck.
1076 */
1077 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
1078 ret = io_ctl_add_entry(io_ctl, trim_entry->start,
1079 trim_entry->bytes, NULL);
1080 if (ret)
1081 goto fail;
1082 *entries += 1;
1083 }
1084
Chris Masond4452bc2014-05-19 20:47:56 -07001085 return 0;
1086fail:
Chris Mason1bbc6212015-04-06 12:46:08 -07001087 if (cluster_locked)
1088 spin_unlock(&cluster_locked->lock);
Chris Masond4452bc2014-05-19 20:47:56 -07001089 return -ENOSPC;
1090}
1091
1092static noinline_for_stack int
1093update_cache_item(struct btrfs_trans_handle *trans,
1094 struct btrfs_root *root,
1095 struct inode *inode,
1096 struct btrfs_path *path, u64 offset,
1097 int entries, int bitmaps)
1098{
1099 struct btrfs_key key;
1100 struct btrfs_free_space_header *header;
1101 struct extent_buffer *leaf;
1102 int ret;
1103
1104 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
1105 key.offset = offset;
1106 key.type = 0;
1107
1108 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1109 if (ret < 0) {
1110 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Omar Sandovale1821632019-08-15 14:04:04 -07001111 EXTENT_DELALLOC, 0, 0, NULL);
Chris Masond4452bc2014-05-19 20:47:56 -07001112 goto fail;
1113 }
1114 leaf = path->nodes[0];
1115 if (ret > 0) {
1116 struct btrfs_key found_key;
1117 ASSERT(path->slots[0]);
1118 path->slots[0]--;
1119 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1120 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1121 found_key.offset != offset) {
1122 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
Omar Sandovale1821632019-08-15 14:04:04 -07001123 inode->i_size - 1, EXTENT_DELALLOC, 0,
1124 0, NULL);
Chris Masond4452bc2014-05-19 20:47:56 -07001125 btrfs_release_path(path);
1126 goto fail;
1127 }
1128 }
1129
1130 BTRFS_I(inode)->generation = trans->transid;
1131 header = btrfs_item_ptr(leaf, path->slots[0],
1132 struct btrfs_free_space_header);
1133 btrfs_set_free_space_entries(leaf, header, entries);
1134 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1135 btrfs_set_free_space_generation(leaf, header, trans->transid);
1136 btrfs_mark_buffer_dirty(leaf);
1137 btrfs_release_path(path);
1138
1139 return 0;
1140
1141fail:
1142 return -1;
1143}
1144
David Sterba6701bdb2019-03-20 13:49:09 +01001145static noinline_for_stack int write_pinned_extent_entries(
Nikolay Borisov6b45f6412020-01-20 16:09:15 +02001146 struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +01001147 struct btrfs_block_group *block_group,
Chris Mason4c6d1d82015-04-06 13:17:20 -07001148 struct btrfs_io_ctl *io_ctl,
Miao Xie5349d6c2014-06-19 10:42:49 +08001149 int *entries)
Chris Masond4452bc2014-05-19 20:47:56 -07001150{
1151 u64 start, extent_start, extent_end, len;
Chris Masond4452bc2014-05-19 20:47:56 -07001152 struct extent_io_tree *unpin = NULL;
1153 int ret;
Josef Bacika67509c2011-10-05 15:18:58 -04001154
Miao Xie5349d6c2014-06-19 10:42:49 +08001155 if (!block_group)
1156 return 0;
1157
Josef Bacika67509c2011-10-05 15:18:58 -04001158 /*
1159 * We want to add any pinned extents to our free space cache
1160 * so we don't leak the space
Chris Masond4452bc2014-05-19 20:47:56 -07001161 *
Li Zefandb804f22012-01-10 16:41:01 +08001162 * We shouldn't have switched the pinned extents yet so this is the
1163 * right one
1164 */
Nikolay Borisovfe119a62020-01-20 16:09:18 +02001165 unpin = &trans->transaction->pinned_extents;
Li Zefandb804f22012-01-10 16:41:01 +08001166
David Sterbab3470b52019-10-23 18:48:22 +02001167 start = block_group->start;
Li Zefandb804f22012-01-10 16:41:01 +08001168
David Sterbab3470b52019-10-23 18:48:22 +02001169 while (start < block_group->start + block_group->length) {
Li Zefandb804f22012-01-10 16:41:01 +08001170 ret = find_first_extent_bit(unpin, start,
1171 &extent_start, &extent_end,
Josef Bacike6138872012-09-27 17:07:30 -04001172 EXTENT_DIRTY, NULL);
Miao Xie5349d6c2014-06-19 10:42:49 +08001173 if (ret)
1174 return 0;
Josef Bacikbe1a12a2011-04-06 13:05:22 -04001175
Josef Bacika67509c2011-10-05 15:18:58 -04001176 /* This pinned extent is out of our range */
David Sterbab3470b52019-10-23 18:48:22 +02001177 if (extent_start >= block_group->start + block_group->length)
Miao Xie5349d6c2014-06-19 10:42:49 +08001178 return 0;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001179
Li Zefandb804f22012-01-10 16:41:01 +08001180 extent_start = max(extent_start, start);
David Sterbab3470b52019-10-23 18:48:22 +02001181 extent_end = min(block_group->start + block_group->length,
1182 extent_end + 1);
Li Zefandb804f22012-01-10 16:41:01 +08001183 len = extent_end - extent_start;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001184
Chris Masond4452bc2014-05-19 20:47:56 -07001185 *entries += 1;
1186 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL);
Josef Bacika67509c2011-10-05 15:18:58 -04001187 if (ret)
Miao Xie5349d6c2014-06-19 10:42:49 +08001188 return -ENOSPC;
Josef Bacik2f356122011-06-10 15:31:13 -04001189
Li Zefandb804f22012-01-10 16:41:01 +08001190 start = extent_end;
Josef Bacika67509c2011-10-05 15:18:58 -04001191 }
Josef Bacik0cb59c92010-07-02 12:14:14 -04001192
Miao Xie5349d6c2014-06-19 10:42:49 +08001193 return 0;
1194}
1195
1196static noinline_for_stack int
Chris Mason4c6d1d82015-04-06 13:17:20 -07001197write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
Miao Xie5349d6c2014-06-19 10:42:49 +08001198{
Geliang Tang7ae16812015-12-18 22:17:00 +08001199 struct btrfs_free_space *entry, *next;
Miao Xie5349d6c2014-06-19 10:42:49 +08001200 int ret;
1201
Josef Bacik0cb59c92010-07-02 12:14:14 -04001202 /* Write out the bitmaps */
Geliang Tang7ae16812015-12-18 22:17:00 +08001203 list_for_each_entry_safe(entry, next, bitmap_list, list) {
Chris Masond4452bc2014-05-19 20:47:56 -07001204 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap);
Josef Bacika67509c2011-10-05 15:18:58 -04001205 if (ret)
Miao Xie5349d6c2014-06-19 10:42:49 +08001206 return -ENOSPC;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001207 list_del_init(&entry->list);
Josef Bacikbe1a12a2011-04-06 13:05:22 -04001208 }
1209
Miao Xie5349d6c2014-06-19 10:42:49 +08001210 return 0;
1211}
Josef Bacik0cb59c92010-07-02 12:14:14 -04001212
Miao Xie5349d6c2014-06-19 10:42:49 +08001213static int flush_dirty_cache(struct inode *inode)
1214{
1215 int ret;
Josef Bacikbe1a12a2011-04-06 13:05:22 -04001216
Josef Bacik0ef8b722013-10-25 16:13:35 -04001217 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
Miao Xie5349d6c2014-06-19 10:42:49 +08001218 if (ret)
Josef Bacik0ef8b722013-10-25 16:13:35 -04001219 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Omar Sandovale1821632019-08-15 14:04:04 -07001220 EXTENT_DELALLOC, 0, 0, NULL);
Chris Masond4452bc2014-05-19 20:47:56 -07001221
Miao Xie5349d6c2014-06-19 10:42:49 +08001222 return ret;
Chris Masond4452bc2014-05-19 20:47:56 -07001223}
1224
1225static void noinline_for_stack
Chris Masona3bdccc2015-04-24 11:00:00 -07001226cleanup_bitmap_list(struct list_head *bitmap_list)
Chris Masond4452bc2014-05-19 20:47:56 -07001227{
Geliang Tang7ae16812015-12-18 22:17:00 +08001228 struct btrfs_free_space *entry, *next;
Miao Xie5349d6c2014-06-19 10:42:49 +08001229
Geliang Tang7ae16812015-12-18 22:17:00 +08001230 list_for_each_entry_safe(entry, next, bitmap_list, list)
Chris Masond4452bc2014-05-19 20:47:56 -07001231 list_del_init(&entry->list);
Chris Masona3bdccc2015-04-24 11:00:00 -07001232}
1233
1234static void noinline_for_stack
1235cleanup_write_cache_enospc(struct inode *inode,
1236 struct btrfs_io_ctl *io_ctl,
David Sterba7bf1a152017-02-10 20:23:00 +01001237 struct extent_state **cached_state)
Chris Masona3bdccc2015-04-24 11:00:00 -07001238{
Chris Masond4452bc2014-05-19 20:47:56 -07001239 io_ctl_drop_pages(io_ctl);
1240 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
David Sterbae43bbe52017-12-12 21:43:52 +01001241 i_size_read(inode) - 1, cached_state);
Chris Masond4452bc2014-05-19 20:47:56 -07001242}
1243
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04001244static int __btrfs_wait_cache_io(struct btrfs_root *root,
1245 struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +01001246 struct btrfs_block_group *block_group,
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04001247 struct btrfs_io_ctl *io_ctl,
1248 struct btrfs_path *path, u64 offset)
Chris Masonc9dc4c62015-04-04 17:14:42 -07001249{
1250 int ret;
1251 struct inode *inode = io_ctl->inode;
1252
Chris Mason1bbc6212015-04-06 12:46:08 -07001253 if (!inode)
1254 return 0;
1255
Chris Masonc9dc4c62015-04-04 17:14:42 -07001256 /* Flush the dirty pages in the cache file. */
1257 ret = flush_dirty_cache(inode);
1258 if (ret)
1259 goto out;
1260
1261 /* Update the cache item to tell everyone this cache file is valid. */
1262 ret = update_cache_item(trans, root, inode, path, offset,
1263 io_ctl->entries, io_ctl->bitmaps);
1264out:
Chris Masonc9dc4c62015-04-04 17:14:42 -07001265 if (ret) {
1266 invalidate_inode_pages2(inode->i_mapping);
1267 BTRFS_I(inode)->generation = 0;
Filipe Mananabbcd1f42020-05-18 17:34:23 +01001268 if (block_group)
1269 btrfs_debug(root->fs_info,
Filipe Manana2e69a7a2020-05-18 17:34:11 +01001270 "failed to write free space cache for block group %llu error %d",
1271 block_group->start, ret);
Chris Masonc9dc4c62015-04-04 17:14:42 -07001272 }
Nikolay Borisov9a56fcd2020-11-02 16:48:59 +02001273 btrfs_update_inode(trans, root, BTRFS_I(inode));
Chris Masonc9dc4c62015-04-04 17:14:42 -07001274
1275 if (block_group) {
Chris Mason1bbc6212015-04-06 12:46:08 -07001276 /* the dirty list is protected by the dirty_bgs_lock */
1277 spin_lock(&trans->transaction->dirty_bgs_lock);
1278
1279 /* the disk_cache_state is protected by the block group lock */
Chris Masonc9dc4c62015-04-04 17:14:42 -07001280 spin_lock(&block_group->lock);
1281
1282 /*
1283 * only mark this as written if we didn't get put back on
Chris Mason1bbc6212015-04-06 12:46:08 -07001284 * the dirty list while waiting for IO. Otherwise our
1285 * cache state won't be right, and we won't get written again
Chris Masonc9dc4c62015-04-04 17:14:42 -07001286 */
1287 if (!ret && list_empty(&block_group->dirty_list))
1288 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1289 else if (ret)
1290 block_group->disk_cache_state = BTRFS_DC_ERROR;
1291
1292 spin_unlock(&block_group->lock);
Chris Mason1bbc6212015-04-06 12:46:08 -07001293 spin_unlock(&trans->transaction->dirty_bgs_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07001294 io_ctl->inode = NULL;
1295 iput(inode);
1296 }
1297
1298 return ret;
1299
1300}
1301
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04001302int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +01001303 struct btrfs_block_group *block_group,
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04001304 struct btrfs_path *path)
1305{
1306 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
1307 block_group, &block_group->io_ctl,
David Sterbab3470b52019-10-23 18:48:22 +02001308 path, block_group->start);
Jeff Mahoneyafdb5712016-09-09 12:09:35 -04001309}
1310
Chris Masond4452bc2014-05-19 20:47:56 -07001311/**
Nikolay Borisovf092cf32021-01-22 11:57:56 +02001312 * Write out cached info to an inode
1313 *
1314 * @root: root the inode belongs to
1315 * @inode: freespace inode we are writing out
1316 * @ctl: free space cache we are going to write out
1317 * @block_group: block_group for this cache if it belongs to a block_group
1318 * @io_ctl: holds context for the io
1319 * @trans: the trans handle
Chris Masond4452bc2014-05-19 20:47:56 -07001320 *
1321 * This function writes out a free space cache struct to disk for quick recovery
Geliang Tang8cd1e732015-10-04 17:05:32 +08001322 * on mount. This will return 0 if it was successful in writing the cache out,
Omar Sandovalb8605452015-02-24 02:47:06 -08001323 * or an errno if it was not.
Chris Masond4452bc2014-05-19 20:47:56 -07001324 */
1325static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1326 struct btrfs_free_space_ctl *ctl,
David Sterba32da53862019-10-29 19:20:18 +01001327 struct btrfs_block_group *block_group,
Chris Masonc9dc4c62015-04-04 17:14:42 -07001328 struct btrfs_io_ctl *io_ctl,
David Sterba0e8d9312017-02-10 20:26:24 +01001329 struct btrfs_trans_handle *trans)
Chris Masond4452bc2014-05-19 20:47:56 -07001330{
1331 struct extent_state *cached_state = NULL;
Miao Xie5349d6c2014-06-19 10:42:49 +08001332 LIST_HEAD(bitmap_list);
Chris Masond4452bc2014-05-19 20:47:56 -07001333 int entries = 0;
1334 int bitmaps = 0;
1335 int ret;
Chris Masonc9dc4c62015-04-04 17:14:42 -07001336 int must_iput = 0;
Chris Masond4452bc2014-05-19 20:47:56 -07001337
1338 if (!i_size_read(inode))
Omar Sandovalb8605452015-02-24 02:47:06 -08001339 return -EIO;
Chris Masond4452bc2014-05-19 20:47:56 -07001340
Chris Masonc9dc4c62015-04-04 17:14:42 -07001341 WARN_ON(io_ctl->pages);
Jeff Mahoneyf15376d2016-06-22 18:56:18 -04001342 ret = io_ctl_init(io_ctl, inode, 1);
Chris Masond4452bc2014-05-19 20:47:56 -07001343 if (ret)
Omar Sandovalb8605452015-02-24 02:47:06 -08001344 return ret;
Chris Masond4452bc2014-05-19 20:47:56 -07001345
Miao Xiee570fd22014-06-19 10:42:50 +08001346 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) {
1347 down_write(&block_group->data_rwsem);
1348 spin_lock(&block_group->lock);
1349 if (block_group->delalloc_bytes) {
1350 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1351 spin_unlock(&block_group->lock);
1352 up_write(&block_group->data_rwsem);
1353 BTRFS_I(inode)->generation = 0;
1354 ret = 0;
Chris Masonc9dc4c62015-04-04 17:14:42 -07001355 must_iput = 1;
Miao Xiee570fd22014-06-19 10:42:50 +08001356 goto out;
1357 }
1358 spin_unlock(&block_group->lock);
1359 }
1360
Chris Masond4452bc2014-05-19 20:47:56 -07001361 /* Lock all pages first so we can lock the extent safely. */
Johannes Thumshirn7a195f62020-02-12 00:10:20 +09001362 ret = io_ctl_prepare_pages(io_ctl, false);
Omar Sandovalb8605452015-02-24 02:47:06 -08001363 if (ret)
Josef Bacikb77000e2017-11-15 16:20:52 -05001364 goto out_unlock;
Chris Masond4452bc2014-05-19 20:47:56 -07001365
1366 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
David Sterbaff13db42015-12-03 14:30:40 +01001367 &cached_state);
Chris Masond4452bc2014-05-19 20:47:56 -07001368
Chris Masonc9dc4c62015-04-04 17:14:42 -07001369 io_ctl_set_generation(io_ctl, trans->transid);
Chris Masond4452bc2014-05-19 20:47:56 -07001370
Filipe Manana55507ce2014-12-01 17:04:09 +00001371 mutex_lock(&ctl->cache_writeout_mutex);
Miao Xie5349d6c2014-06-19 10:42:49 +08001372 /* Write out the extent entries in the free space cache */
Chris Mason1bbc6212015-04-06 12:46:08 -07001373 spin_lock(&ctl->tree_lock);
Chris Masonc9dc4c62015-04-04 17:14:42 -07001374 ret = write_cache_extent_entries(io_ctl, ctl,
Chris Masond4452bc2014-05-19 20:47:56 -07001375 block_group, &entries, &bitmaps,
1376 &bitmap_list);
Chris Masona3bdccc2015-04-24 11:00:00 -07001377 if (ret)
1378 goto out_nospc_locked;
Chris Masond4452bc2014-05-19 20:47:56 -07001379
Miao Xie5349d6c2014-06-19 10:42:49 +08001380 /*
1381 * Some spaces that are freed in the current transaction are pinned,
1382 * they will be added into free space cache after the transaction is
1383 * committed, we shouldn't lose them.
Chris Mason1bbc6212015-04-06 12:46:08 -07001384 *
1385 * If this changes while we are working we'll get added back to
1386 * the dirty list and redo it. No locking needed
Miao Xie5349d6c2014-06-19 10:42:49 +08001387 */
Nikolay Borisov6b45f6412020-01-20 16:09:15 +02001388 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
Chris Masona3bdccc2015-04-24 11:00:00 -07001389 if (ret)
1390 goto out_nospc_locked;
Miao Xie5349d6c2014-06-19 10:42:49 +08001391
Filipe Manana55507ce2014-12-01 17:04:09 +00001392 /*
1393 * At last, we write out all the bitmaps and keep cache_writeout_mutex
1394 * locked while doing it because a concurrent trim can be manipulating
1395 * or freeing the bitmap.
1396 */
Chris Masonc9dc4c62015-04-04 17:14:42 -07001397 ret = write_bitmap_entries(io_ctl, &bitmap_list);
Chris Mason1bbc6212015-04-06 12:46:08 -07001398 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00001399 mutex_unlock(&ctl->cache_writeout_mutex);
Miao Xie5349d6c2014-06-19 10:42:49 +08001400 if (ret)
1401 goto out_nospc;
1402
1403 /* Zero out the rest of the pages just to make sure */
Chris Masonc9dc4c62015-04-04 17:14:42 -07001404 io_ctl_zero_remaining_pages(io_ctl);
Miao Xie5349d6c2014-06-19 10:42:49 +08001405
1406 /* Everything is written out, now we dirty the pages in the file. */
Nikolay Borisov088545f2020-06-03 08:55:36 +03001407 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
1408 io_ctl->num_pages, 0, i_size_read(inode),
Goldwyn Rodriguesaa8c1a42020-10-14 09:55:45 -05001409 &cached_state, false);
Miao Xie5349d6c2014-06-19 10:42:49 +08001410 if (ret)
1411 goto out_nospc;
1412
Miao Xiee570fd22014-06-19 10:42:50 +08001413 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1414 up_write(&block_group->data_rwsem);
Miao Xie5349d6c2014-06-19 10:42:49 +08001415 /*
1416 * Release the pages and unlock the extent, we will flush
1417 * them out later
1418 */
Chris Masonc9dc4c62015-04-04 17:14:42 -07001419 io_ctl_drop_pages(io_ctl);
Filipe Mananabbc37d62020-08-14 11:04:09 +01001420 io_ctl_free(io_ctl);
Miao Xie5349d6c2014-06-19 10:42:49 +08001421
1422 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
David Sterbae43bbe52017-12-12 21:43:52 +01001423 i_size_read(inode) - 1, &cached_state);
Miao Xie5349d6c2014-06-19 10:42:49 +08001424
Chris Masonc9dc4c62015-04-04 17:14:42 -07001425 /*
1426 * at this point the pages are under IO and we're happy,
Randy Dunlap260db432020-08-04 19:48:34 -07001427 * The caller is responsible for waiting on them and updating
Chris Masonc9dc4c62015-04-04 17:14:42 -07001428 * the cache and the inode
1429 */
1430 io_ctl->entries = entries;
1431 io_ctl->bitmaps = bitmaps;
1432
1433 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
Miao Xie5349d6c2014-06-19 10:42:49 +08001434 if (ret)
Josef Bacik0ef8b722013-10-25 16:13:35 -04001435 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -04001436
Chris Masonc9dc4c62015-04-04 17:14:42 -07001437 return 0;
1438
Chris Masona3bdccc2015-04-24 11:00:00 -07001439out_nospc_locked:
1440 cleanup_bitmap_list(&bitmap_list);
1441 spin_unlock(&ctl->tree_lock);
1442 mutex_unlock(&ctl->cache_writeout_mutex);
1443
Josef Bacika67509c2011-10-05 15:18:58 -04001444out_nospc:
David Sterba7bf1a152017-02-10 20:23:00 +01001445 cleanup_write_cache_enospc(inode, io_ctl, &cached_state);
Miao Xiee570fd22014-06-19 10:42:50 +08001446
Josef Bacikb77000e2017-11-15 16:20:52 -05001447out_unlock:
Miao Xiee570fd22014-06-19 10:42:50 +08001448 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1449 up_write(&block_group->data_rwsem);
1450
Johannes Thumshirnfd8efa82020-02-12 00:10:22 +09001451out:
1452 io_ctl->inode = NULL;
1453 io_ctl_free(io_ctl);
1454 if (ret) {
1455 invalidate_inode_pages2(inode->i_mapping);
1456 BTRFS_I(inode)->generation = 0;
1457 }
Nikolay Borisov9a56fcd2020-11-02 16:48:59 +02001458 btrfs_update_inode(trans, root, BTRFS_I(inode));
Johannes Thumshirnfd8efa82020-02-12 00:10:22 +09001459 if (must_iput)
1460 iput(inode);
1461 return ret;
Li Zefan0414efa2011-04-20 10:20:14 +08001462}
1463
David Sterbafe041532019-03-20 13:51:56 +01001464int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
David Sterba32da53862019-10-29 19:20:18 +01001465 struct btrfs_block_group *block_group,
Li Zefan0414efa2011-04-20 10:20:14 +08001466 struct btrfs_path *path)
1467{
David Sterbafe041532019-03-20 13:51:56 +01001468 struct btrfs_fs_info *fs_info = trans->fs_info;
Li Zefan0414efa2011-04-20 10:20:14 +08001469 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1470 struct inode *inode;
1471 int ret = 0;
1472
Li Zefan0414efa2011-04-20 10:20:14 +08001473 spin_lock(&block_group->lock);
1474 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1475 spin_unlock(&block_group->lock);
1476 return 0;
1477 }
1478 spin_unlock(&block_group->lock);
1479
David Sterba7949f332019-03-20 13:40:19 +01001480 inode = lookup_free_space_inode(block_group, path);
Li Zefan0414efa2011-04-20 10:20:14 +08001481 if (IS_ERR(inode))
1482 return 0;
1483
Jeff Mahoney77ab86b2017-02-15 16:28:30 -05001484 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
1485 block_group, &block_group->io_ctl, trans);
Josef Bacikc09544e2011-08-30 10:19:10 -04001486 if (ret) {
Filipe Mananabbcd1f42020-05-18 17:34:23 +01001487 btrfs_debug(fs_info,
Filipe Manana2e69a7a2020-05-18 17:34:11 +01001488 "failed to write free space cache for block group %llu error %d",
1489 block_group->start, ret);
Chris Masonc9dc4c62015-04-04 17:14:42 -07001490 spin_lock(&block_group->lock);
1491 block_group->disk_cache_state = BTRFS_DC_ERROR;
1492 spin_unlock(&block_group->lock);
1493
1494 block_group->io_ctl.inode = NULL;
1495 iput(inode);
Li Zefan0414efa2011-04-20 10:20:14 +08001496 }
1497
Chris Masonc9dc4c62015-04-04 17:14:42 -07001498 /*
1499 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1500 * to wait for IO and put the inode
1501 */
1502
Josef Bacik0cb59c92010-07-02 12:14:14 -04001503 return ret;
1504}
1505
Li Zefan34d52cb2011-03-29 13:46:06 +08001506static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
Josef Bacik96303082009-07-13 21:29:25 -04001507 u64 offset)
1508{
Josef Bacikb12d6862013-08-26 17:14:08 -04001509 ASSERT(offset >= bitmap_start);
Josef Bacik96303082009-07-13 21:29:25 -04001510 offset -= bitmap_start;
Li Zefan34d52cb2011-03-29 13:46:06 +08001511 return (unsigned long)(div_u64(offset, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001512}
1513
Li Zefan34d52cb2011-03-29 13:46:06 +08001514static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
Josef Bacik96303082009-07-13 21:29:25 -04001515{
Li Zefan34d52cb2011-03-29 13:46:06 +08001516 return (unsigned long)(div_u64(bytes, unit));
Josef Bacik96303082009-07-13 21:29:25 -04001517}
1518
Li Zefan34d52cb2011-03-29 13:46:06 +08001519static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001520 u64 offset)
1521{
1522 u64 bitmap_start;
Feifei Xu0ef64472016-06-01 19:18:24 +08001523 u64 bytes_per_bitmap;
Josef Bacik96303082009-07-13 21:29:25 -04001524
Li Zefan34d52cb2011-03-29 13:46:06 +08001525 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1526 bitmap_start = offset - ctl->start;
Feifei Xu0ef64472016-06-01 19:18:24 +08001527 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
Josef Bacik96303082009-07-13 21:29:25 -04001528 bitmap_start *= bytes_per_bitmap;
Li Zefan34d52cb2011-03-29 13:46:06 +08001529 bitmap_start += ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -04001530
1531 return bitmap_start;
1532}
Josef Bacik0f9dd462008-09-23 13:14:11 -04001533
1534static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -04001535 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001536{
1537 struct rb_node **p = &root->rb_node;
1538 struct rb_node *parent = NULL;
1539 struct btrfs_free_space *info;
1540
1541 while (*p) {
1542 parent = *p;
1543 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1544
Josef Bacik96303082009-07-13 21:29:25 -04001545 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001546 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -04001547 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001548 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -04001549 } else {
1550 /*
1551 * we could have a bitmap entry and an extent entry
1552 * share the same offset. If this is the case, we want
1553 * the extent entry to always be found first if we do a
1554 * linear search through the tree, since we want to have
1555 * the quickest allocation time, and allocating from an
1556 * extent is faster than allocating from a bitmap. So
1557 * if we're inserting a bitmap and we find an entry at
1558 * this offset, we want to go right, or after this entry
1559 * logically. If we are inserting an extent and we've
1560 * found a bitmap, we want to go left, or before
1561 * logically.
1562 */
1563 if (bitmap) {
Josef Bacik207dde82011-05-13 14:49:23 -04001564 if (info->bitmap) {
1565 WARN_ON_ONCE(1);
1566 return -EEXIST;
1567 }
Josef Bacik96303082009-07-13 21:29:25 -04001568 p = &(*p)->rb_right;
1569 } else {
Josef Bacik207dde82011-05-13 14:49:23 -04001570 if (!info->bitmap) {
1571 WARN_ON_ONCE(1);
1572 return -EEXIST;
1573 }
Josef Bacik96303082009-07-13 21:29:25 -04001574 p = &(*p)->rb_left;
1575 }
1576 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001577 }
1578
1579 rb_link_node(node, parent, p);
1580 rb_insert_color(node, root);
1581
1582 return 0;
1583}
1584
1585/*
Josef Bacik70cb0742009-04-03 10:14:19 -04001586 * searches the tree for the given offset.
1587 *
Josef Bacik96303082009-07-13 21:29:25 -04001588 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1589 * want a section that has at least bytes size and comes at or after the given
1590 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -04001591 */
Josef Bacik96303082009-07-13 21:29:25 -04001592static struct btrfs_free_space *
Li Zefan34d52cb2011-03-29 13:46:06 +08001593tree_search_offset(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001594 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001595{
Li Zefan34d52cb2011-03-29 13:46:06 +08001596 struct rb_node *n = ctl->free_space_offset.rb_node;
Josef Bacik96303082009-07-13 21:29:25 -04001597 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001598
Josef Bacik96303082009-07-13 21:29:25 -04001599 /* find entry that is closest to the 'offset' */
1600 while (1) {
1601 if (!n) {
1602 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001603 break;
1604 }
Josef Bacik96303082009-07-13 21:29:25 -04001605
1606 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1607 prev = entry;
1608
1609 if (offset < entry->offset)
1610 n = n->rb_left;
1611 else if (offset > entry->offset)
1612 n = n->rb_right;
1613 else
1614 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001615 }
1616
Josef Bacik96303082009-07-13 21:29:25 -04001617 if (bitmap_only) {
1618 if (!entry)
1619 return NULL;
1620 if (entry->bitmap)
1621 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001622
Josef Bacik96303082009-07-13 21:29:25 -04001623 /*
1624 * bitmap entry and extent entry may share same offset,
1625 * in that case, bitmap entry comes after extent entry.
1626 */
1627 n = rb_next(n);
1628 if (!n)
1629 return NULL;
1630 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1631 if (entry->offset != offset)
1632 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001633
Josef Bacik96303082009-07-13 21:29:25 -04001634 WARN_ON(!entry->bitmap);
1635 return entry;
1636 } else if (entry) {
1637 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001638 /*
Josef Bacik96303082009-07-13 21:29:25 -04001639 * if previous extent entry covers the offset,
1640 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -04001641 */
Miao Xiede6c4112012-10-18 08:18:01 +00001642 n = rb_prev(&entry->offset_index);
1643 if (n) {
Josef Bacik96303082009-07-13 21:29:25 -04001644 prev = rb_entry(n, struct btrfs_free_space,
1645 offset_index);
Miao Xiede6c4112012-10-18 08:18:01 +00001646 if (!prev->bitmap &&
1647 prev->offset + prev->bytes > offset)
1648 entry = prev;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001649 }
Josef Bacik96303082009-07-13 21:29:25 -04001650 }
1651 return entry;
1652 }
1653
1654 if (!prev)
1655 return NULL;
1656
1657 /* find last entry before the 'offset' */
1658 entry = prev;
1659 if (entry->offset > offset) {
1660 n = rb_prev(&entry->offset_index);
1661 if (n) {
1662 entry = rb_entry(n, struct btrfs_free_space,
1663 offset_index);
Josef Bacikb12d6862013-08-26 17:14:08 -04001664 ASSERT(entry->offset <= offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001665 } else {
Josef Bacik96303082009-07-13 21:29:25 -04001666 if (fuzzy)
1667 return entry;
1668 else
1669 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001670 }
1671 }
1672
Josef Bacik96303082009-07-13 21:29:25 -04001673 if (entry->bitmap) {
Miao Xiede6c4112012-10-18 08:18:01 +00001674 n = rb_prev(&entry->offset_index);
1675 if (n) {
Josef Bacik96303082009-07-13 21:29:25 -04001676 prev = rb_entry(n, struct btrfs_free_space,
1677 offset_index);
Miao Xiede6c4112012-10-18 08:18:01 +00001678 if (!prev->bitmap &&
1679 prev->offset + prev->bytes > offset)
1680 return prev;
Josef Bacik96303082009-07-13 21:29:25 -04001681 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001682 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001683 return entry;
1684 } else if (entry->offset + entry->bytes > offset)
1685 return entry;
1686
1687 if (!fuzzy)
1688 return NULL;
1689
1690 while (1) {
1691 if (entry->bitmap) {
1692 if (entry->offset + BITS_PER_BITMAP *
Li Zefan34d52cb2011-03-29 13:46:06 +08001693 ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001694 break;
1695 } else {
1696 if (entry->offset + entry->bytes > offset)
1697 break;
1698 }
1699
1700 n = rb_next(&entry->offset_index);
1701 if (!n)
1702 return NULL;
1703 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1704 }
1705 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001706}
1707
Li Zefanf333adb2010-11-09 14:57:39 +08001708static inline void
Li Zefan34d52cb2011-03-29 13:46:06 +08001709__unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001710 struct btrfs_free_space *info)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001711{
Li Zefan34d52cb2011-03-29 13:46:06 +08001712 rb_erase(&info->offset_index, &ctl->free_space_offset);
1713 ctl->free_extents--;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001714
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001715 if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001716 ctl->discardable_extents[BTRFS_STAT_CURR]--;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001717 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes;
1718 }
Li Zefanf333adb2010-11-09 14:57:39 +08001719}
1720
Li Zefan34d52cb2011-03-29 13:46:06 +08001721static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001722 struct btrfs_free_space *info)
1723{
Li Zefan34d52cb2011-03-29 13:46:06 +08001724 __unlink_free_space(ctl, info);
1725 ctl->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001726}
1727
Li Zefan34d52cb2011-03-29 13:46:06 +08001728static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0f9dd462008-09-23 13:14:11 -04001729 struct btrfs_free_space *info)
1730{
1731 int ret = 0;
1732
Josef Bacikb12d6862013-08-26 17:14:08 -04001733 ASSERT(info->bytes || info->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08001734 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001735 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -04001736 if (ret)
1737 return ret;
1738
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001739 if (!info->bitmap && !btrfs_free_space_trimmed(info)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001740 ctl->discardable_extents[BTRFS_STAT_CURR]++;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001741 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
1742 }
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001743
Li Zefan34d52cb2011-03-29 13:46:06 +08001744 ctl->free_space += info->bytes;
1745 ctl->free_extents++;
Josef Bacik96303082009-07-13 21:29:25 -04001746 return ret;
1747}
1748
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001749static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1750 struct btrfs_free_space *info,
1751 u64 offset, u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001752{
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001753 unsigned long start, count, end;
1754 int extent_delta = -1;
Josef Bacik96303082009-07-13 21:29:25 -04001755
Li Zefan34d52cb2011-03-29 13:46:06 +08001756 start = offset_to_bit(info->offset, ctl->unit, offset);
1757 count = bytes_to_bits(bytes, ctl->unit);
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001758 end = start + count;
1759 ASSERT(end <= BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001760
Li Zefanf38b6e72011-03-14 13:40:51 +08001761 bitmap_clear(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001762
1763 info->bytes -= bytes;
Josef Bacik553cceb2018-09-28 07:18:00 -04001764 if (info->max_extent_size > ctl->unit)
1765 info->max_extent_size = 0;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001766
1767 if (start && test_bit(start - 1, info->bitmap))
1768 extent_delta++;
1769
1770 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1771 extent_delta++;
1772
1773 info->bitmap_extents += extent_delta;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001774 if (!btrfs_free_space_trimmed(info)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001775 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001776 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
1777 }
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001778}
1779
1780static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1781 struct btrfs_free_space *info, u64 offset,
1782 u64 bytes)
1783{
1784 __bitmap_clear_bits(ctl, info, offset, bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001785 ctl->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001786}
1787
Li Zefan34d52cb2011-03-29 13:46:06 +08001788static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
Josef Bacik817d52f2009-07-13 21:29:25 -04001789 struct btrfs_free_space *info, u64 offset,
1790 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001791{
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001792 unsigned long start, count, end;
1793 int extent_delta = 1;
Josef Bacik96303082009-07-13 21:29:25 -04001794
Li Zefan34d52cb2011-03-29 13:46:06 +08001795 start = offset_to_bit(info->offset, ctl->unit, offset);
1796 count = bytes_to_bits(bytes, ctl->unit);
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001797 end = start + count;
1798 ASSERT(end <= BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001799
Li Zefanf38b6e72011-03-14 13:40:51 +08001800 bitmap_set(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001801
1802 info->bytes += bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001803 ctl->free_space += bytes;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001804
1805 if (start && test_bit(start - 1, info->bitmap))
1806 extent_delta--;
1807
1808 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap))
1809 extent_delta--;
1810
1811 info->bitmap_extents += extent_delta;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001812 if (!btrfs_free_space_trimmed(info)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001813 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08001814 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes;
1815 }
Josef Bacik96303082009-07-13 21:29:25 -04001816}
1817
Miao Xiea4820392013-09-09 13:19:42 +08001818/*
1819 * If we can not find suitable extent, we will use bytes to record
1820 * the size of the max extent.
1821 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001822static int search_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001823 struct btrfs_free_space *bitmap_info, u64 *offset,
Josef Bacik0584f712015-10-02 16:12:23 -04001824 u64 *bytes, bool for_alloc)
Josef Bacik96303082009-07-13 21:29:25 -04001825{
1826 unsigned long found_bits = 0;
Miao Xiea4820392013-09-09 13:19:42 +08001827 unsigned long max_bits = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001828 unsigned long bits, i;
1829 unsigned long next_zero;
Miao Xiea4820392013-09-09 13:19:42 +08001830 unsigned long extent_bits;
Josef Bacik96303082009-07-13 21:29:25 -04001831
Josef Bacikcef40482015-10-02 16:09:42 -04001832 /*
1833 * Skip searching the bitmap if we don't have a contiguous section that
1834 * is large enough for this allocation.
1835 */
Josef Bacik0584f712015-10-02 16:12:23 -04001836 if (for_alloc &&
1837 bitmap_info->max_extent_size &&
Josef Bacikcef40482015-10-02 16:09:42 -04001838 bitmap_info->max_extent_size < *bytes) {
1839 *bytes = bitmap_info->max_extent_size;
1840 return -1;
1841 }
1842
Li Zefan34d52cb2011-03-29 13:46:06 +08001843 i = offset_to_bit(bitmap_info->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04001844 max_t(u64, *offset, bitmap_info->offset));
Li Zefan34d52cb2011-03-29 13:46:06 +08001845 bits = bytes_to_bits(*bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04001846
Wei Yongjunebb3dad2012-09-13 20:29:02 -06001847 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
Josef Bacik0584f712015-10-02 16:12:23 -04001848 if (for_alloc && bits == 1) {
1849 found_bits = 1;
1850 break;
1851 }
Josef Bacik96303082009-07-13 21:29:25 -04001852 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1853 BITS_PER_BITMAP, i);
Miao Xiea4820392013-09-09 13:19:42 +08001854 extent_bits = next_zero - i;
1855 if (extent_bits >= bits) {
1856 found_bits = extent_bits;
Josef Bacik96303082009-07-13 21:29:25 -04001857 break;
Miao Xiea4820392013-09-09 13:19:42 +08001858 } else if (extent_bits > max_bits) {
1859 max_bits = extent_bits;
Josef Bacik96303082009-07-13 21:29:25 -04001860 }
1861 i = next_zero;
1862 }
1863
1864 if (found_bits) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001865 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1866 *bytes = (u64)(found_bits) * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04001867 return 0;
1868 }
1869
Miao Xiea4820392013-09-09 13:19:42 +08001870 *bytes = (u64)(max_bits) * ctl->unit;
Josef Bacikcef40482015-10-02 16:09:42 -04001871 bitmap_info->max_extent_size = *bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001872 return -1;
1873}
1874
Josef Bacikad22cf62018-10-12 15:32:33 -04001875static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
1876{
1877 if (entry->bitmap)
1878 return entry->max_extent_size;
1879 return entry->bytes;
1880}
1881
Miao Xiea4820392013-09-09 13:19:42 +08001882/* Cache the size of the max extent in bytes */
Li Zefan34d52cb2011-03-29 13:46:06 +08001883static struct btrfs_free_space *
David Woodhouse53b381b2013-01-29 18:40:14 -05001884find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
Miao Xiea4820392013-09-09 13:19:42 +08001885 unsigned long align, u64 *max_extent_size)
Josef Bacik96303082009-07-13 21:29:25 -04001886{
1887 struct btrfs_free_space *entry;
1888 struct rb_node *node;
David Woodhouse53b381b2013-01-29 18:40:14 -05001889 u64 tmp;
1890 u64 align_off;
Josef Bacik96303082009-07-13 21:29:25 -04001891 int ret;
1892
Li Zefan34d52cb2011-03-29 13:46:06 +08001893 if (!ctl->free_space_offset.rb_node)
Miao Xiea4820392013-09-09 13:19:42 +08001894 goto out;
Josef Bacik96303082009-07-13 21:29:25 -04001895
Li Zefan34d52cb2011-03-29 13:46:06 +08001896 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
Josef Bacik96303082009-07-13 21:29:25 -04001897 if (!entry)
Miao Xiea4820392013-09-09 13:19:42 +08001898 goto out;
Josef Bacik96303082009-07-13 21:29:25 -04001899
1900 for (node = &entry->offset_index; node; node = rb_next(node)) {
1901 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Miao Xiea4820392013-09-09 13:19:42 +08001902 if (entry->bytes < *bytes) {
Josef Bacikad22cf62018-10-12 15:32:33 -04001903 *max_extent_size = max(get_max_extent_size(entry),
1904 *max_extent_size);
Josef Bacik96303082009-07-13 21:29:25 -04001905 continue;
Miao Xiea4820392013-09-09 13:19:42 +08001906 }
Josef Bacik96303082009-07-13 21:29:25 -04001907
David Woodhouse53b381b2013-01-29 18:40:14 -05001908 /* make sure the space returned is big enough
1909 * to match our requested alignment
1910 */
1911 if (*bytes >= align) {
Miao Xiea4820392013-09-09 13:19:42 +08001912 tmp = entry->offset - ctl->start + align - 1;
David Sterba47c57132015-02-20 18:43:47 +01001913 tmp = div64_u64(tmp, align);
David Woodhouse53b381b2013-01-29 18:40:14 -05001914 tmp = tmp * align + ctl->start;
1915 align_off = tmp - entry->offset;
1916 } else {
1917 align_off = 0;
1918 tmp = entry->offset;
1919 }
1920
Miao Xiea4820392013-09-09 13:19:42 +08001921 if (entry->bytes < *bytes + align_off) {
Josef Bacikad22cf62018-10-12 15:32:33 -04001922 *max_extent_size = max(get_max_extent_size(entry),
1923 *max_extent_size);
David Woodhouse53b381b2013-01-29 18:40:14 -05001924 continue;
Miao Xiea4820392013-09-09 13:19:42 +08001925 }
David Woodhouse53b381b2013-01-29 18:40:14 -05001926
Josef Bacik96303082009-07-13 21:29:25 -04001927 if (entry->bitmap) {
Miao Xiea4820392013-09-09 13:19:42 +08001928 u64 size = *bytes;
1929
Josef Bacik0584f712015-10-02 16:12:23 -04001930 ret = search_bitmap(ctl, entry, &tmp, &size, true);
David Woodhouse53b381b2013-01-29 18:40:14 -05001931 if (!ret) {
1932 *offset = tmp;
Miao Xiea4820392013-09-09 13:19:42 +08001933 *bytes = size;
Josef Bacik96303082009-07-13 21:29:25 -04001934 return entry;
Josef Bacikad22cf62018-10-12 15:32:33 -04001935 } else {
1936 *max_extent_size =
1937 max(get_max_extent_size(entry),
1938 *max_extent_size);
David Woodhouse53b381b2013-01-29 18:40:14 -05001939 }
Josef Bacik96303082009-07-13 21:29:25 -04001940 continue;
1941 }
1942
David Woodhouse53b381b2013-01-29 18:40:14 -05001943 *offset = tmp;
1944 *bytes = entry->bytes - align_off;
Josef Bacik96303082009-07-13 21:29:25 -04001945 return entry;
1946 }
Miao Xiea4820392013-09-09 13:19:42 +08001947out:
Josef Bacik96303082009-07-13 21:29:25 -04001948 return NULL;
1949}
1950
Li Zefan34d52cb2011-03-29 13:46:06 +08001951static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001952 struct btrfs_free_space *info, u64 offset)
1953{
Li Zefan34d52cb2011-03-29 13:46:06 +08001954 info->offset = offset_to_bitmap(ctl, offset);
Josef Bacikf019f422009-09-11 16:11:20 -04001955 info->bytes = 0;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08001956 info->bitmap_extents = 0;
Alexandre Olivaf2d0f672011-11-28 12:04:43 -02001957 INIT_LIST_HEAD(&info->list);
Li Zefan34d52cb2011-03-29 13:46:06 +08001958 link_free_space(ctl, info);
1959 ctl->total_bitmaps++;
David Sterbafa598b02020-12-03 17:18:38 +01001960 recalculate_thresholds(ctl);
Josef Bacik96303082009-07-13 21:29:25 -04001961}
1962
Li Zefan34d52cb2011-03-29 13:46:06 +08001963static void free_bitmap(struct btrfs_free_space_ctl *ctl,
Li Zefanedf6e2d2010-11-09 14:50:07 +08001964 struct btrfs_free_space *bitmap_info)
1965{
Dennis Zhou27f0afc2020-01-02 16:26:45 -05001966 /*
1967 * Normally when this is called, the bitmap is completely empty. However,
1968 * if we are blowing up the free space cache for one reason or another
1969 * via __btrfs_remove_free_space_cache(), then it may not be freed and
1970 * we may leave stats on the table.
1971 */
1972 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) {
1973 ctl->discardable_extents[BTRFS_STAT_CURR] -=
1974 bitmap_info->bitmap_extents;
1975 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes;
1976
1977 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001978 unlink_free_space(ctl, bitmap_info);
Christophe Leroy3acd4852019-08-21 15:05:55 +00001979 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001980 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
Li Zefan34d52cb2011-03-29 13:46:06 +08001981 ctl->total_bitmaps--;
David Sterbafa598b02020-12-03 17:18:38 +01001982 recalculate_thresholds(ctl);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001983}
1984
Li Zefan34d52cb2011-03-29 13:46:06 +08001985static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001986 struct btrfs_free_space *bitmap_info,
1987 u64 *offset, u64 *bytes)
1988{
1989 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -04001990 u64 search_start, search_bytes;
1991 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001992
1993again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001994 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001995
Josef Bacik6606bb92009-07-31 11:03:58 -04001996 /*
Josef Bacikbdb7d302012-06-27 15:10:56 -04001997 * We need to search for bits in this bitmap. We could only cover some
1998 * of the extent in this bitmap thanks to how we add space, so we need
1999 * to search for as much as it as we can and clear that amount, and then
2000 * go searching for the next bit.
Josef Bacik6606bb92009-07-31 11:03:58 -04002001 */
2002 search_start = *offset;
Josef Bacikbdb7d302012-06-27 15:10:56 -04002003 search_bytes = ctl->unit;
Josef Bacik13dbc082011-02-03 02:39:52 +00002004 search_bytes = min(search_bytes, end - search_start + 1);
Josef Bacik0584f712015-10-02 16:12:23 -04002005 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
2006 false);
Josef Bacikb50c6e22013-04-25 15:55:30 -04002007 if (ret < 0 || search_start != *offset)
2008 return -EINVAL;
Josef Bacik6606bb92009-07-31 11:03:58 -04002009
Josef Bacikbdb7d302012-06-27 15:10:56 -04002010 /* We may have found more bits than what we need */
2011 search_bytes = min(search_bytes, *bytes);
2012
2013 /* Cannot clear past the end of the bitmap */
2014 search_bytes = min(search_bytes, end - search_start + 1);
2015
2016 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
2017 *offset += search_bytes;
2018 *bytes -= search_bytes;
Josef Bacik96303082009-07-13 21:29:25 -04002019
2020 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -04002021 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Li Zefanedf6e2d2010-11-09 14:50:07 +08002022 if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08002023 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04002024
Josef Bacik6606bb92009-07-31 11:03:58 -04002025 /*
2026 * no entry after this bitmap, but we still have bytes to
2027 * remove, so something has gone wrong.
2028 */
2029 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -04002030 return -EINVAL;
2031
Josef Bacik6606bb92009-07-31 11:03:58 -04002032 bitmap_info = rb_entry(next, struct btrfs_free_space,
2033 offset_index);
2034
2035 /*
2036 * if the next entry isn't a bitmap we need to return to let the
2037 * extent stuff do its work.
2038 */
Josef Bacik96303082009-07-13 21:29:25 -04002039 if (!bitmap_info->bitmap)
2040 return -EAGAIN;
2041
Josef Bacik6606bb92009-07-31 11:03:58 -04002042 /*
2043 * Ok the next item is a bitmap, but it may not actually hold
2044 * the information for the rest of this free space stuff, so
2045 * look for it, and if we don't find it return so we can try
2046 * everything over again.
2047 */
2048 search_start = *offset;
Josef Bacikbdb7d302012-06-27 15:10:56 -04002049 search_bytes = ctl->unit;
Li Zefan34d52cb2011-03-29 13:46:06 +08002050 ret = search_bitmap(ctl, bitmap_info, &search_start,
Josef Bacik0584f712015-10-02 16:12:23 -04002051 &search_bytes, false);
Josef Bacik6606bb92009-07-31 11:03:58 -04002052 if (ret < 0 || search_start != *offset)
2053 return -EAGAIN;
2054
Josef Bacik96303082009-07-13 21:29:25 -04002055 goto again;
Li Zefanedf6e2d2010-11-09 14:50:07 +08002056 } else if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08002057 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04002058
2059 return 0;
2060}
2061
Josef Bacik2cdc3422011-05-27 14:07:49 -04002062static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
2063 struct btrfs_free_space *info, u64 offset,
Dennis Zhouda080fe2019-12-13 16:22:13 -08002064 u64 bytes, enum btrfs_trim_state trim_state)
Josef Bacik2cdc3422011-05-27 14:07:49 -04002065{
2066 u64 bytes_to_set = 0;
2067 u64 end;
2068
Dennis Zhouda080fe2019-12-13 16:22:13 -08002069 /*
2070 * This is a tradeoff to make bitmap trim state minimal. We mark the
2071 * whole bitmap untrimmed if at any point we add untrimmed regions.
2072 */
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002073 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) {
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002074 if (btrfs_free_space_trimmed(info)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002075 ctl->discardable_extents[BTRFS_STAT_CURR] +=
2076 info->bitmap_extents;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002077 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes;
2078 }
Dennis Zhouda080fe2019-12-13 16:22:13 -08002079 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002080 }
Dennis Zhouda080fe2019-12-13 16:22:13 -08002081
Josef Bacik2cdc3422011-05-27 14:07:49 -04002082 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
2083
2084 bytes_to_set = min(end - offset, bytes);
2085
2086 bitmap_set_bits(ctl, info, offset, bytes_to_set);
2087
Josef Bacikcef40482015-10-02 16:09:42 -04002088 /*
2089 * We set some bytes, we have no idea what the max extent size is
2090 * anymore.
2091 */
2092 info->max_extent_size = 0;
2093
Josef Bacik2cdc3422011-05-27 14:07:49 -04002094 return bytes_to_set;
2095
2096}
2097
Li Zefan34d52cb2011-03-29 13:46:06 +08002098static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
2099 struct btrfs_free_space *info)
Josef Bacik96303082009-07-13 21:29:25 -04002100{
David Sterba32da53862019-10-29 19:20:18 +01002101 struct btrfs_block_group *block_group = ctl->private;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002102 struct btrfs_fs_info *fs_info = block_group->fs_info;
Josef Bacikd0bd4562015-09-23 14:54:14 -04002103 bool forced = false;
2104
2105#ifdef CONFIG_BTRFS_DEBUG
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002106 if (btrfs_should_fragment_free_space(block_group))
Josef Bacikd0bd4562015-09-23 14:54:14 -04002107 forced = true;
2108#endif
Josef Bacik96303082009-07-13 21:29:25 -04002109
Dennis Zhou5d90c5c2020-01-02 16:26:43 -05002110 /* This is a way to reclaim large regions from the bitmaps. */
2111 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD)
2112 return false;
2113
Josef Bacik96303082009-07-13 21:29:25 -04002114 /*
2115 * If we are below the extents threshold then we can add this as an
2116 * extent, and don't have to deal with the bitmap
2117 */
Josef Bacikd0bd4562015-09-23 14:54:14 -04002118 if (!forced && ctl->free_extents < ctl->extents_thresh) {
Josef Bacik32cb0842011-03-18 16:16:21 -04002119 /*
2120 * If this block group has some small extents we don't want to
2121 * use up all of our free slots in the cache with them, we want
Nicholas D Steeves01327612016-05-19 21:18:45 -04002122 * to reserve them to larger extents, however if we have plenty
Josef Bacik32cb0842011-03-18 16:16:21 -04002123 * of cache left then go ahead an dadd them, no sense in adding
2124 * the overhead of a bitmap if we don't have to.
2125 */
Dennis Zhouf9bb6152020-01-02 16:26:44 -05002126 if (info->bytes <= fs_info->sectorsize * 8) {
2127 if (ctl->free_extents * 3 <= ctl->extents_thresh)
Li Zefan34d52cb2011-03-29 13:46:06 +08002128 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04002129 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08002130 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04002131 }
2132 }
Josef Bacik96303082009-07-13 21:29:25 -04002133
2134 /*
Josef Bacikdde57402013-02-12 14:07:51 -05002135 * The original block groups from mkfs can be really small, like 8
2136 * megabytes, so don't bother with a bitmap for those entries. However
2137 * some block groups can be smaller than what a bitmap would cover but
2138 * are still large enough that they could overflow the 32k memory limit,
2139 * so allow those block groups to still be allowed to have a bitmap
2140 * entry.
Josef Bacik96303082009-07-13 21:29:25 -04002141 */
David Sterbab3470b52019-10-23 18:48:22 +02002142 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
Li Zefan34d52cb2011-03-29 13:46:06 +08002143 return false;
2144
2145 return true;
2146}
2147
David Sterba20e55062015-11-19 11:42:28 +01002148static const struct btrfs_free_space_op free_space_op = {
Josef Bacik2cdc3422011-05-27 14:07:49 -04002149 .use_bitmap = use_bitmap,
2150};
2151
Li Zefan34d52cb2011-03-29 13:46:06 +08002152static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
2153 struct btrfs_free_space *info)
2154{
2155 struct btrfs_free_space *bitmap_info;
David Sterba32da53862019-10-29 19:20:18 +01002156 struct btrfs_block_group *block_group = NULL;
Li Zefan34d52cb2011-03-29 13:46:06 +08002157 int added = 0;
Josef Bacik2cdc3422011-05-27 14:07:49 -04002158 u64 bytes, offset, bytes_added;
Dennis Zhouda080fe2019-12-13 16:22:13 -08002159 enum btrfs_trim_state trim_state;
Li Zefan34d52cb2011-03-29 13:46:06 +08002160 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04002161
2162 bytes = info->bytes;
2163 offset = info->offset;
Dennis Zhouda080fe2019-12-13 16:22:13 -08002164 trim_state = info->trim_state;
Josef Bacik96303082009-07-13 21:29:25 -04002165
Li Zefan34d52cb2011-03-29 13:46:06 +08002166 if (!ctl->op->use_bitmap(ctl, info))
2167 return 0;
2168
Josef Bacik2cdc3422011-05-27 14:07:49 -04002169 if (ctl->op == &free_space_op)
2170 block_group = ctl->private;
Chris Mason38e87882011-06-10 16:36:57 -04002171again:
Josef Bacik2cdc3422011-05-27 14:07:49 -04002172 /*
2173 * Since we link bitmaps right into the cluster we need to see if we
2174 * have a cluster here, and if so and it has our bitmap we need to add
2175 * the free space to that bitmap.
2176 */
2177 if (block_group && !list_empty(&block_group->cluster_list)) {
2178 struct btrfs_free_cluster *cluster;
2179 struct rb_node *node;
2180 struct btrfs_free_space *entry;
2181
2182 cluster = list_entry(block_group->cluster_list.next,
2183 struct btrfs_free_cluster,
2184 block_group_list);
2185 spin_lock(&cluster->lock);
2186 node = rb_first(&cluster->root);
2187 if (!node) {
2188 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04002189 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04002190 }
2191
2192 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2193 if (!entry->bitmap) {
2194 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04002195 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04002196 }
2197
2198 if (entry->offset == offset_to_bitmap(ctl, offset)) {
Dennis Zhouda080fe2019-12-13 16:22:13 -08002199 bytes_added = add_bytes_to_bitmap(ctl, entry, offset,
2200 bytes, trim_state);
Josef Bacik2cdc3422011-05-27 14:07:49 -04002201 bytes -= bytes_added;
2202 offset += bytes_added;
2203 }
2204 spin_unlock(&cluster->lock);
2205 if (!bytes) {
2206 ret = 1;
2207 goto out;
2208 }
2209 }
Chris Mason38e87882011-06-10 16:36:57 -04002210
2211no_cluster_bitmap:
Li Zefan34d52cb2011-03-29 13:46:06 +08002212 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik96303082009-07-13 21:29:25 -04002213 1, 0);
2214 if (!bitmap_info) {
Josef Bacikb12d6862013-08-26 17:14:08 -04002215 ASSERT(added == 0);
Josef Bacik96303082009-07-13 21:29:25 -04002216 goto new_bitmap;
2217 }
2218
Dennis Zhouda080fe2019-12-13 16:22:13 -08002219 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
2220 trim_state);
Josef Bacik2cdc3422011-05-27 14:07:49 -04002221 bytes -= bytes_added;
2222 offset += bytes_added;
2223 added = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002224
2225 if (!bytes) {
2226 ret = 1;
2227 goto out;
2228 } else
2229 goto again;
2230
2231new_bitmap:
2232 if (info && info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002233 add_new_bitmap(ctl, info, offset);
Josef Bacik96303082009-07-13 21:29:25 -04002234 added = 1;
2235 info = NULL;
2236 goto again;
2237 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08002238 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002239
2240 /* no pre-allocated info, allocate a new one */
2241 if (!info) {
Josef Bacikdc89e982011-01-28 17:05:48 -05002242 info = kmem_cache_zalloc(btrfs_free_space_cachep,
2243 GFP_NOFS);
Josef Bacik96303082009-07-13 21:29:25 -04002244 if (!info) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002245 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002246 ret = -ENOMEM;
2247 goto out;
2248 }
2249 }
2250
2251 /* allocate the bitmap */
Christophe Leroy3acd4852019-08-21 15:05:55 +00002252 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep,
2253 GFP_NOFS);
Dennis Zhouda080fe2019-12-13 16:22:13 -08002254 info->trim_state = BTRFS_TRIM_STATE_TRIMMED;
Li Zefan34d52cb2011-03-29 13:46:06 +08002255 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002256 if (!info->bitmap) {
2257 ret = -ENOMEM;
2258 goto out;
2259 }
2260 goto again;
2261 }
2262
2263out:
2264 if (info) {
Christophe Leroy3acd4852019-08-21 15:05:55 +00002265 if (info->bitmap)
2266 kmem_cache_free(btrfs_free_space_bitmap_cachep,
2267 info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05002268 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04002269 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002270
2271 return ret;
2272}
2273
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002274/*
2275 * Free space merging rules:
2276 * 1) Merge trimmed areas together
2277 * 2) Let untrimmed areas coalesce with trimmed areas
2278 * 3) Always pull neighboring regions from bitmaps
2279 *
2280 * The above rules are for when we merge free space based on btrfs_trim_state.
2281 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the
2282 * same reason: to promote larger extent regions which makes life easier for
2283 * find_free_extent(). Rule 2 enables coalescing based on the common path
2284 * being returning free space from btrfs_finish_extent_commit(). So when free
2285 * space is trimmed, it will prevent aggregating trimmed new region and
2286 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents
2287 * and provide find_free_extent() with the largest extents possible hoping for
2288 * the reuse path.
2289 */
Chris Mason945d8962011-05-22 12:33:42 -04002290static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08002291 struct btrfs_free_space *info, bool update_stat)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002292{
Josef Bacikbf53d462020-07-27 10:28:05 -04002293 struct btrfs_free_space *left_info = NULL;
Li Zefan120d66e2010-11-09 14:56:50 +08002294 struct btrfs_free_space *right_info;
2295 bool merged = false;
2296 u64 offset = info->offset;
2297 u64 bytes = info->bytes;
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002298 const bool is_trimmed = btrfs_free_space_trimmed(info);
Josef Bacik6226cb02009-04-03 10:14:18 -04002299
Josef Bacik0f9dd462008-09-23 13:14:11 -04002300 /*
2301 * first we want to see if there is free space adjacent to the range we
2302 * are adding, if there is remove that struct and add a new one to
2303 * cover the entire range
2304 */
Li Zefan34d52cb2011-03-29 13:46:06 +08002305 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04002306 if (right_info && rb_prev(&right_info->offset_index))
2307 left_info = rb_entry(rb_prev(&right_info->offset_index),
2308 struct btrfs_free_space, offset_index);
Josef Bacikbf53d462020-07-27 10:28:05 -04002309 else if (!right_info)
Li Zefan34d52cb2011-03-29 13:46:06 +08002310 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002311
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002312 /* See try_merge_free_space() comment. */
2313 if (right_info && !right_info->bitmap &&
2314 (!is_trimmed || btrfs_free_space_trimmed(right_info))) {
Li Zefanf333adb2010-11-09 14:57:39 +08002315 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08002316 unlink_free_space(ctl, right_info);
Li Zefanf333adb2010-11-09 14:57:39 +08002317 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002318 __unlink_free_space(ctl, right_info);
Josef Bacik6226cb02009-04-03 10:14:18 -04002319 info->bytes += right_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05002320 kmem_cache_free(btrfs_free_space_cachep, right_info);
Li Zefan120d66e2010-11-09 14:56:50 +08002321 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002322 }
2323
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002324 /* See try_merge_free_space() comment. */
Josef Bacik96303082009-07-13 21:29:25 -04002325 if (left_info && !left_info->bitmap &&
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002326 left_info->offset + left_info->bytes == offset &&
2327 (!is_trimmed || btrfs_free_space_trimmed(left_info))) {
Li Zefanf333adb2010-11-09 14:57:39 +08002328 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08002329 unlink_free_space(ctl, left_info);
Li Zefanf333adb2010-11-09 14:57:39 +08002330 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002331 __unlink_free_space(ctl, left_info);
Josef Bacik6226cb02009-04-03 10:14:18 -04002332 info->offset = left_info->offset;
2333 info->bytes += left_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05002334 kmem_cache_free(btrfs_free_space_cachep, left_info);
Li Zefan120d66e2010-11-09 14:56:50 +08002335 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002336 }
2337
Li Zefan120d66e2010-11-09 14:56:50 +08002338 return merged;
2339}
2340
Filipe Manana20005522014-08-29 13:35:13 +01002341static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl,
2342 struct btrfs_free_space *info,
2343 bool update_stat)
2344{
2345 struct btrfs_free_space *bitmap;
2346 unsigned long i;
2347 unsigned long j;
2348 const u64 end = info->offset + info->bytes;
2349 const u64 bitmap_offset = offset_to_bitmap(ctl, end);
2350 u64 bytes;
2351
2352 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2353 if (!bitmap)
2354 return false;
2355
2356 i = offset_to_bit(bitmap->offset, ctl->unit, end);
2357 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i);
2358 if (j == i)
2359 return false;
2360 bytes = (j - i) * ctl->unit;
2361 info->bytes += bytes;
2362
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002363 /* See try_merge_free_space() comment. */
2364 if (!btrfs_free_space_trimmed(bitmap))
2365 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2366
Filipe Manana20005522014-08-29 13:35:13 +01002367 if (update_stat)
2368 bitmap_clear_bits(ctl, bitmap, end, bytes);
2369 else
2370 __bitmap_clear_bits(ctl, bitmap, end, bytes);
2371
2372 if (!bitmap->bytes)
2373 free_bitmap(ctl, bitmap);
2374
2375 return true;
2376}
2377
2378static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl,
2379 struct btrfs_free_space *info,
2380 bool update_stat)
2381{
2382 struct btrfs_free_space *bitmap;
2383 u64 bitmap_offset;
2384 unsigned long i;
2385 unsigned long j;
2386 unsigned long prev_j;
2387 u64 bytes;
2388
2389 bitmap_offset = offset_to_bitmap(ctl, info->offset);
2390 /* If we're on a boundary, try the previous logical bitmap. */
2391 if (bitmap_offset == info->offset) {
2392 if (info->offset == 0)
2393 return false;
2394 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1);
2395 }
2396
2397 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0);
2398 if (!bitmap)
2399 return false;
2400
2401 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1;
2402 j = 0;
2403 prev_j = (unsigned long)-1;
2404 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) {
2405 if (j > i)
2406 break;
2407 prev_j = j;
2408 }
2409 if (prev_j == i)
2410 return false;
2411
2412 if (prev_j == (unsigned long)-1)
2413 bytes = (i + 1) * ctl->unit;
2414 else
2415 bytes = (i - prev_j) * ctl->unit;
2416
2417 info->offset -= bytes;
2418 info->bytes += bytes;
2419
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002420 /* See try_merge_free_space() comment. */
2421 if (!btrfs_free_space_trimmed(bitmap))
2422 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2423
Filipe Manana20005522014-08-29 13:35:13 +01002424 if (update_stat)
2425 bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2426 else
2427 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes);
2428
2429 if (!bitmap->bytes)
2430 free_bitmap(ctl, bitmap);
2431
2432 return true;
2433}
2434
2435/*
2436 * We prefer always to allocate from extent entries, both for clustered and
2437 * non-clustered allocation requests. So when attempting to add a new extent
2438 * entry, try to see if there's adjacent free space in bitmap entries, and if
2439 * there is, migrate that space from the bitmaps to the extent.
2440 * Like this we get better chances of satisfying space allocation requests
2441 * because we attempt to satisfy them based on a single cache entry, and never
2442 * on 2 or more entries - even if the entries represent a contiguous free space
2443 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry
2444 * ends).
2445 */
2446static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl,
2447 struct btrfs_free_space *info,
2448 bool update_stat)
2449{
2450 /*
2451 * Only work with disconnected entries, as we can change their offset,
2452 * and must be extent entries.
2453 */
2454 ASSERT(!info->bitmap);
2455 ASSERT(RB_EMPTY_NODE(&info->offset_index));
2456
2457 if (ctl->total_bitmaps > 0) {
2458 bool stole_end;
2459 bool stole_front = false;
2460
2461 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat);
2462 if (ctl->total_bitmaps > 0)
2463 stole_front = steal_from_bitmap_to_front(ctl, info,
2464 update_stat);
2465
2466 if (stole_end || stole_front)
2467 try_merge_free_space(ctl, info, update_stat);
2468 }
2469}
2470
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002471int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
2472 struct btrfs_free_space_ctl *ctl,
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002473 u64 offset, u64 bytes,
2474 enum btrfs_trim_state trim_state)
Li Zefan120d66e2010-11-09 14:56:50 +08002475{
Dennis Zhoub0643e52019-12-13 16:22:14 -08002476 struct btrfs_block_group *block_group = ctl->private;
Li Zefan120d66e2010-11-09 14:56:50 +08002477 struct btrfs_free_space *info;
2478 int ret = 0;
Dennis Zhou7fe6d452020-01-02 16:26:39 -05002479 u64 filter_bytes = bytes;
Li Zefan120d66e2010-11-09 14:56:50 +08002480
Naohiro Aota169e0da2021-02-04 19:21:52 +09002481 ASSERT(!btrfs_is_zoned(fs_info));
2482
Josef Bacikdc89e982011-01-28 17:05:48 -05002483 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
Li Zefan120d66e2010-11-09 14:56:50 +08002484 if (!info)
2485 return -ENOMEM;
2486
2487 info->offset = offset;
2488 info->bytes = bytes;
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002489 info->trim_state = trim_state;
Filipe Manana20005522014-08-29 13:35:13 +01002490 RB_CLEAR_NODE(&info->offset_index);
Li Zefan120d66e2010-11-09 14:56:50 +08002491
Li Zefan34d52cb2011-03-29 13:46:06 +08002492 spin_lock(&ctl->tree_lock);
Li Zefan120d66e2010-11-09 14:56:50 +08002493
Li Zefan34d52cb2011-03-29 13:46:06 +08002494 if (try_merge_free_space(ctl, info, true))
Li Zefan120d66e2010-11-09 14:56:50 +08002495 goto link;
2496
2497 /*
2498 * There was no extent directly to the left or right of this new
2499 * extent then we know we're going to have to allocate a new extent, so
2500 * before we do that see if we need to drop this into a bitmap
2501 */
Li Zefan34d52cb2011-03-29 13:46:06 +08002502 ret = insert_into_bitmap(ctl, info);
Li Zefan120d66e2010-11-09 14:56:50 +08002503 if (ret < 0) {
2504 goto out;
2505 } else if (ret) {
2506 ret = 0;
2507 goto out;
2508 }
2509link:
Filipe Manana20005522014-08-29 13:35:13 +01002510 /*
2511 * Only steal free space from adjacent bitmaps if we're sure we're not
2512 * going to add the new free space to existing bitmap entries - because
2513 * that would mean unnecessary work that would be reverted. Therefore
2514 * attempt to steal space from bitmaps if we're adding an extent entry.
2515 */
2516 steal_from_bitmap(ctl, info, true);
2517
Dennis Zhou7fe6d452020-01-02 16:26:39 -05002518 filter_bytes = max(filter_bytes, info->bytes);
2519
Li Zefan34d52cb2011-03-29 13:46:06 +08002520 ret = link_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002521 if (ret)
Josef Bacikdc89e982011-01-28 17:05:48 -05002522 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04002523out:
Josef Bacik66b53ba2020-10-23 09:58:07 -04002524 btrfs_discard_update_discardable(block_group);
Li Zefan34d52cb2011-03-29 13:46:06 +08002525 spin_unlock(&ctl->tree_lock);
Josef Bacik6226cb02009-04-03 10:14:18 -04002526
Josef Bacik0f9dd462008-09-23 13:14:11 -04002527 if (ret) {
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002528 btrfs_crit(fs_info, "unable to add free space :%d", ret);
Josef Bacikb12d6862013-08-26 17:14:08 -04002529 ASSERT(ret != -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002530 }
2531
Dennis Zhou7fe6d452020-01-02 16:26:39 -05002532 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) {
2533 btrfs_discard_check_filter(block_group, filter_bytes);
Dennis Zhoub0643e52019-12-13 16:22:14 -08002534 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
Dennis Zhou7fe6d452020-01-02 16:26:39 -05002535 }
Dennis Zhoub0643e52019-12-13 16:22:14 -08002536
Josef Bacik0f9dd462008-09-23 13:14:11 -04002537 return ret;
2538}
2539
Naohiro Aota169e0da2021-02-04 19:21:52 +09002540static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
2541 u64 bytenr, u64 size, bool used)
2542{
Johannes Thumshirn18bb8bb2021-04-19 16:41:02 +09002543 struct btrfs_fs_info *fs_info = block_group->fs_info;
Naohiro Aota169e0da2021-02-04 19:21:52 +09002544 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2545 u64 offset = bytenr - block_group->start;
2546 u64 to_free, to_unusable;
2547
2548 spin_lock(&ctl->tree_lock);
2549 if (!used)
2550 to_free = size;
2551 else if (offset >= block_group->alloc_offset)
2552 to_free = size;
2553 else if (offset + size <= block_group->alloc_offset)
2554 to_free = 0;
2555 else
2556 to_free = offset + size - block_group->alloc_offset;
2557 to_unusable = size - to_free;
2558
2559 ctl->free_space += to_free;
Naohiro Aotabadae9c2021-03-03 17:55:48 +09002560 /*
2561 * If the block group is read-only, we should account freed space into
2562 * bytes_readonly.
2563 */
2564 if (!block_group->ro)
2565 block_group->zone_unusable += to_unusable;
Naohiro Aota169e0da2021-02-04 19:21:52 +09002566 spin_unlock(&ctl->tree_lock);
2567 if (!used) {
2568 spin_lock(&block_group->lock);
2569 block_group->alloc_offset -= size;
2570 spin_unlock(&block_group->lock);
2571 }
2572
2573 /* All the region is now unusable. Mark it as unused and reclaim */
Johannes Thumshirn18bb8bb2021-04-19 16:41:02 +09002574 if (block_group->zone_unusable == block_group->length) {
Naohiro Aota169e0da2021-02-04 19:21:52 +09002575 btrfs_mark_bg_unused(block_group);
Johannes Thumshirn18bb8bb2021-04-19 16:41:02 +09002576 } else if (block_group->zone_unusable >=
2577 div_factor_fine(block_group->length,
2578 fs_info->bg_reclaim_threshold)) {
2579 btrfs_mark_bg_to_reclaim(block_group);
2580 }
Naohiro Aota169e0da2021-02-04 19:21:52 +09002581
2582 return 0;
2583}
2584
David Sterba32da53862019-10-29 19:20:18 +01002585int btrfs_add_free_space(struct btrfs_block_group *block_group,
Josef Bacik478b4d92019-06-20 15:37:43 -04002586 u64 bytenr, u64 size)
2587{
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002588 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2589
Naohiro Aota169e0da2021-02-04 19:21:52 +09002590 if (btrfs_is_zoned(block_group->fs_info))
2591 return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2592 true);
2593
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002594 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC))
2595 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2596
Josef Bacik478b4d92019-06-20 15:37:43 -04002597 return __btrfs_add_free_space(block_group->fs_info,
2598 block_group->free_space_ctl,
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002599 bytenr, size, trim_state);
Josef Bacik478b4d92019-06-20 15:37:43 -04002600}
2601
Naohiro Aota169e0da2021-02-04 19:21:52 +09002602int btrfs_add_free_space_unused(struct btrfs_block_group *block_group,
2603 u64 bytenr, u64 size)
2604{
2605 if (btrfs_is_zoned(block_group->fs_info))
2606 return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2607 false);
2608
2609 return btrfs_add_free_space(block_group, bytenr, size);
2610}
2611
Dennis Zhoub0643e52019-12-13 16:22:14 -08002612/*
2613 * This is a subtle distinction because when adding free space back in general,
2614 * we want it to be added as untrimmed for async. But in the case where we add
2615 * it on loading of a block group, we want to consider it trimmed.
2616 */
2617int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
2618 u64 bytenr, u64 size)
2619{
2620 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
2621
Naohiro Aota169e0da2021-02-04 19:21:52 +09002622 if (btrfs_is_zoned(block_group->fs_info))
2623 return __btrfs_add_free_space_zoned(block_group, bytenr, size,
2624 true);
2625
Dennis Zhoub0643e52019-12-13 16:22:14 -08002626 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) ||
2627 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
2628 trim_state = BTRFS_TRIM_STATE_TRIMMED;
2629
2630 return __btrfs_add_free_space(block_group->fs_info,
2631 block_group->free_space_ctl,
2632 bytenr, size, trim_state);
2633}
2634
David Sterba32da53862019-10-29 19:20:18 +01002635int btrfs_remove_free_space(struct btrfs_block_group *block_group,
Josef Bacik6226cb02009-04-03 10:14:18 -04002636 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002637{
Li Zefan34d52cb2011-03-29 13:46:06 +08002638 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002639 struct btrfs_free_space *info;
Josef Bacikb0175112012-12-18 11:39:19 -05002640 int ret;
2641 bool re_search = false;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002642
Naohiro Aota011b41b2021-02-04 19:21:55 +09002643 if (btrfs_is_zoned(block_group->fs_info)) {
2644 /*
2645 * This can happen with conventional zones when replaying log.
2646 * Since the allocation info of tree-log nodes are not recorded
2647 * to the extent-tree, calculate_alloc_pointer() failed to
2648 * advance the allocation pointer after last allocated tree log
2649 * node blocks.
2650 *
2651 * This function is called from
2652 * btrfs_pin_extent_for_log_replay() when replaying the log.
2653 * Advance the pointer not to overwrite the tree-log nodes.
2654 */
2655 if (block_group->alloc_offset < offset + bytes)
2656 block_group->alloc_offset = offset + bytes;
Naohiro Aota169e0da2021-02-04 19:21:52 +09002657 return 0;
Naohiro Aota011b41b2021-02-04 19:21:55 +09002658 }
Naohiro Aota169e0da2021-02-04 19:21:52 +09002659
Li Zefan34d52cb2011-03-29 13:46:06 +08002660 spin_lock(&ctl->tree_lock);
Josef Bacik6226cb02009-04-03 10:14:18 -04002661
Josef Bacik96303082009-07-13 21:29:25 -04002662again:
Josef Bacikb0175112012-12-18 11:39:19 -05002663 ret = 0;
Josef Bacikbdb7d302012-06-27 15:10:56 -04002664 if (!bytes)
2665 goto out_lock;
2666
Li Zefan34d52cb2011-03-29 13:46:06 +08002667 info = tree_search_offset(ctl, offset, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04002668 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -04002669 /*
2670 * oops didn't find an extent that matched the space we wanted
2671 * to remove, look for a bitmap instead
2672 */
Li Zefan34d52cb2011-03-29 13:46:06 +08002673 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik6606bb92009-07-31 11:03:58 -04002674 1, 0);
2675 if (!info) {
Josef Bacikb0175112012-12-18 11:39:19 -05002676 /*
2677 * If we found a partial bit of our free space in a
2678 * bitmap but then couldn't find the other part this may
2679 * be a problem, so WARN about it.
Chris Mason24a70312011-11-21 09:39:11 -05002680 */
Josef Bacikb0175112012-12-18 11:39:19 -05002681 WARN_ON(re_search);
Josef Bacik6606bb92009-07-31 11:03:58 -04002682 goto out_lock;
2683 }
Josef Bacik96303082009-07-13 21:29:25 -04002684 }
2685
Josef Bacikb0175112012-12-18 11:39:19 -05002686 re_search = false;
Josef Bacikbdb7d302012-06-27 15:10:56 -04002687 if (!info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002688 unlink_free_space(ctl, info);
Josef Bacikbdb7d302012-06-27 15:10:56 -04002689 if (offset == info->offset) {
2690 u64 to_free = min(bytes, info->bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002691
Josef Bacikbdb7d302012-06-27 15:10:56 -04002692 info->bytes -= to_free;
2693 info->offset += to_free;
2694 if (info->bytes) {
2695 ret = link_free_space(ctl, info);
2696 WARN_ON(ret);
2697 } else {
2698 kmem_cache_free(btrfs_free_space_cachep, info);
2699 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002700
Josef Bacikbdb7d302012-06-27 15:10:56 -04002701 offset += to_free;
2702 bytes -= to_free;
2703 goto again;
2704 } else {
2705 u64 old_end = info->bytes + info->offset;
Chris Mason9b49c9b2008-09-24 11:23:25 -04002706
Josef Bacikbdb7d302012-06-27 15:10:56 -04002707 info->bytes = offset - info->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08002708 ret = link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04002709 WARN_ON(ret);
2710 if (ret)
2711 goto out_lock;
Josef Bacik96303082009-07-13 21:29:25 -04002712
Josef Bacikbdb7d302012-06-27 15:10:56 -04002713 /* Not enough bytes in this entry to satisfy us */
2714 if (old_end < offset + bytes) {
2715 bytes -= old_end - offset;
2716 offset = old_end;
2717 goto again;
2718 } else if (old_end == offset + bytes) {
2719 /* all done */
2720 goto out_lock;
2721 }
2722 spin_unlock(&ctl->tree_lock);
2723
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002724 ret = __btrfs_add_free_space(block_group->fs_info, ctl,
2725 offset + bytes,
2726 old_end - (offset + bytes),
2727 info->trim_state);
Josef Bacikbdb7d302012-06-27 15:10:56 -04002728 WARN_ON(ret);
2729 goto out;
2730 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002731 }
Josef Bacik96303082009-07-13 21:29:25 -04002732
Li Zefan34d52cb2011-03-29 13:46:06 +08002733 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
Josef Bacikb0175112012-12-18 11:39:19 -05002734 if (ret == -EAGAIN) {
2735 re_search = true;
Josef Bacik96303082009-07-13 21:29:25 -04002736 goto again;
Josef Bacikb0175112012-12-18 11:39:19 -05002737 }
Josef Bacik96303082009-07-13 21:29:25 -04002738out_lock:
Josef Bacik66b53ba2020-10-23 09:58:07 -04002739 btrfs_discard_update_discardable(block_group);
Li Zefan34d52cb2011-03-29 13:46:06 +08002740 spin_unlock(&ctl->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002741out:
Josef Bacik25179202008-10-29 14:49:05 -04002742 return ret;
2743}
2744
David Sterba32da53862019-10-29 19:20:18 +01002745void btrfs_dump_free_space(struct btrfs_block_group *block_group,
Josef Bacik0f9dd462008-09-23 13:14:11 -04002746 u64 bytes)
2747{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002748 struct btrfs_fs_info *fs_info = block_group->fs_info;
Li Zefan34d52cb2011-03-29 13:46:06 +08002749 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002750 struct btrfs_free_space *info;
2751 struct rb_node *n;
2752 int count = 0;
2753
Naohiro Aota169e0da2021-02-04 19:21:52 +09002754 /*
2755 * Zoned btrfs does not use free space tree and cluster. Just print
2756 * out the free space after the allocation offset.
2757 */
2758 if (btrfs_is_zoned(fs_info)) {
2759 btrfs_info(fs_info, "free space %llu",
2760 block_group->length - block_group->alloc_offset);
2761 return;
2762 }
2763
Filipe Manana9084cb62018-10-22 10:43:06 +01002764 spin_lock(&ctl->tree_lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08002765 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04002766 info = rb_entry(n, struct btrfs_free_space, offset_index);
Liu Bof6175ef2012-07-06 03:31:36 -06002767 if (info->bytes >= bytes && !block_group->ro)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002768 count++;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002769 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
Frank Holtonefe120a2013-12-20 11:37:06 -05002770 info->offset, info->bytes,
Josef Bacik96303082009-07-13 21:29:25 -04002771 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -04002772 }
Filipe Manana9084cb62018-10-22 10:43:06 +01002773 spin_unlock(&ctl->tree_lock);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002774 btrfs_info(fs_info, "block group has cluster?: %s",
Josef Bacik96303082009-07-13 21:29:25 -04002775 list_empty(&block_group->cluster_list) ? "no" : "yes");
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002776 btrfs_info(fs_info,
Frank Holtonefe120a2013-12-20 11:37:06 -05002777 "%d blocks of free space at or bigger than bytes is", count);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002778}
2779
Josef Bacikcd799092020-10-23 09:58:08 -04002780void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
2781 struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002782{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002783 struct btrfs_fs_info *fs_info = block_group->fs_info;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002784
Li Zefan34d52cb2011-03-29 13:46:06 +08002785 spin_lock_init(&ctl->tree_lock);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002786 ctl->unit = fs_info->sectorsize;
David Sterbab3470b52019-10-23 18:48:22 +02002787 ctl->start = block_group->start;
Li Zefan34d52cb2011-03-29 13:46:06 +08002788 ctl->private = block_group;
2789 ctl->op = &free_space_op;
Filipe Manana55507ce2014-12-01 17:04:09 +00002790 INIT_LIST_HEAD(&ctl->trimming_ranges);
2791 mutex_init(&ctl->cache_writeout_mutex);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002792
Li Zefan34d52cb2011-03-29 13:46:06 +08002793 /*
2794 * we only want to have 32k of ram per block group for keeping
2795 * track of free space, and if we pass 1/2 of that we want to
2796 * start converting things over to using bitmaps
2797 */
Byongho Leeee221842015-12-15 01:42:10 +09002798 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space);
Josef Bacik0f9dd462008-09-23 13:14:11 -04002799}
2800
Chris Masonfa9c0d792009-04-03 09:47:43 -04002801/*
2802 * for a given cluster, put all of its extents back into the free
2803 * space cache. If the block group passed doesn't match the block group
2804 * pointed to by the cluster, someone else raced in and freed the
2805 * cluster already. In that case, we just return without changing anything
2806 */
Anand Jain69b0e092020-06-03 18:10:18 +08002807static void __btrfs_return_cluster_to_free_space(
David Sterba32da53862019-10-29 19:20:18 +01002808 struct btrfs_block_group *block_group,
Chris Masonfa9c0d792009-04-03 09:47:43 -04002809 struct btrfs_free_cluster *cluster)
2810{
Li Zefan34d52cb2011-03-29 13:46:06 +08002811 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002812 struct btrfs_free_space *entry;
2813 struct rb_node *node;
2814
2815 spin_lock(&cluster->lock);
Josef Bacik95c85fb2021-01-25 16:42:35 -05002816 if (cluster->block_group != block_group) {
2817 spin_unlock(&cluster->lock);
2818 return;
2819 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002820
Josef Bacik96303082009-07-13 21:29:25 -04002821 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002822 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -04002823 list_del_init(&cluster->block_group_list);
Josef Bacik96303082009-07-13 21:29:25 -04002824
Chris Masonfa9c0d792009-04-03 09:47:43 -04002825 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -04002826 while (node) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002827 bool bitmap;
2828
Chris Masonfa9c0d792009-04-03 09:47:43 -04002829 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2830 node = rb_next(&entry->offset_index);
2831 rb_erase(&entry->offset_index, &cluster->root);
Filipe Manana20005522014-08-29 13:35:13 +01002832 RB_CLEAR_NODE(&entry->offset_index);
Josef Bacik4e69b592011-03-21 10:11:24 -04002833
2834 bitmap = (entry->bitmap != NULL);
Filipe Manana20005522014-08-29 13:35:13 +01002835 if (!bitmap) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002836 /* Merging treats extents as if they were new */
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002837 if (!btrfs_free_space_trimmed(entry)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002838 ctl->discardable_extents[BTRFS_STAT_CURR]--;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002839 ctl->discardable_bytes[BTRFS_STAT_CURR] -=
2840 entry->bytes;
2841 }
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002842
Li Zefan34d52cb2011-03-29 13:46:06 +08002843 try_merge_free_space(ctl, entry, false);
Filipe Manana20005522014-08-29 13:35:13 +01002844 steal_from_bitmap(ctl, entry, false);
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002845
2846 /* As we insert directly, update these statistics */
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002847 if (!btrfs_free_space_trimmed(entry)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08002848 ctl->discardable_extents[BTRFS_STAT_CURR]++;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08002849 ctl->discardable_bytes[BTRFS_STAT_CURR] +=
2850 entry->bytes;
2851 }
Filipe Manana20005522014-08-29 13:35:13 +01002852 }
Li Zefan34d52cb2011-03-29 13:46:06 +08002853 tree_insert_offset(&ctl->free_space_offset,
Josef Bacik4e69b592011-03-21 10:11:24 -04002854 entry->offset, &entry->offset_index, bitmap);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002855 }
Eric Paris6bef4d32010-02-23 19:43:04 +00002856 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002857 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002858 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002859}
2860
Eric Sandeen48a3b632013-04-25 20:41:01 +00002861static void __btrfs_remove_free_space_cache_locked(
2862 struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002863{
2864 struct btrfs_free_space *info;
2865 struct rb_node *node;
Li Zefan581bb052011-04-20 10:06:11 +08002866
Li Zefan581bb052011-04-20 10:06:11 +08002867 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2868 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik9b90f512011-06-24 16:02:51 +00002869 if (!info->bitmap) {
2870 unlink_free_space(ctl, info);
2871 kmem_cache_free(btrfs_free_space_cachep, info);
2872 } else {
2873 free_bitmap(ctl, info);
2874 }
David Sterba351810c2015-01-08 15:20:54 +01002875
2876 cond_resched_lock(&ctl->tree_lock);
Li Zefan581bb052011-04-20 10:06:11 +08002877 }
Chris Mason09655372011-05-21 09:27:38 -04002878}
2879
2880void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2881{
2882 spin_lock(&ctl->tree_lock);
2883 __btrfs_remove_free_space_cache_locked(ctl);
Dennis Zhou27f0afc2020-01-02 16:26:45 -05002884 if (ctl->private)
Josef Bacik66b53ba2020-10-23 09:58:07 -04002885 btrfs_discard_update_discardable(ctl->private);
Li Zefan581bb052011-04-20 10:06:11 +08002886 spin_unlock(&ctl->tree_lock);
2887}
2888
David Sterba32da53862019-10-29 19:20:18 +01002889void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002890{
Li Zefan34d52cb2011-03-29 13:46:06 +08002891 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002892 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -04002893 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002894
Li Zefan34d52cb2011-03-29 13:46:06 +08002895 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04002896 while ((head = block_group->cluster_list.next) !=
2897 &block_group->cluster_list) {
2898 cluster = list_entry(head, struct btrfs_free_cluster,
2899 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002900
2901 WARN_ON(cluster->block_group != block_group);
2902 __btrfs_return_cluster_to_free_space(block_group, cluster);
David Sterba351810c2015-01-08 15:20:54 +01002903
2904 cond_resched_lock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002905 }
Chris Mason09655372011-05-21 09:27:38 -04002906 __btrfs_remove_free_space_cache_locked(ctl);
Josef Bacik66b53ba2020-10-23 09:58:07 -04002907 btrfs_discard_update_discardable(block_group);
Li Zefan34d52cb2011-03-29 13:46:06 +08002908 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002909
Josef Bacik0f9dd462008-09-23 13:14:11 -04002910}
2911
Dennis Zhou6e80d4f2019-12-13 16:22:15 -08002912/**
2913 * btrfs_is_free_space_trimmed - see if everything is trimmed
2914 * @block_group: block_group of interest
2915 *
2916 * Walk @block_group's free space rb_tree to determine if everything is trimmed.
2917 */
2918bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group)
2919{
2920 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2921 struct btrfs_free_space *info;
2922 struct rb_node *node;
2923 bool ret = true;
2924
2925 spin_lock(&ctl->tree_lock);
2926 node = rb_first(&ctl->free_space_offset);
2927
2928 while (node) {
2929 info = rb_entry(node, struct btrfs_free_space, offset_index);
2930
2931 if (!btrfs_free_space_trimmed(info)) {
2932 ret = false;
2933 break;
2934 }
2935
2936 node = rb_next(node);
2937 }
2938
2939 spin_unlock(&ctl->tree_lock);
2940 return ret;
2941}
2942
David Sterba32da53862019-10-29 19:20:18 +01002943u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
Miao Xiea4820392013-09-09 13:19:42 +08002944 u64 offset, u64 bytes, u64 empty_size,
2945 u64 *max_extent_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -04002946{
Li Zefan34d52cb2011-03-29 13:46:06 +08002947 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Dennis Zhou9ddf6482020-01-02 16:26:41 -05002948 struct btrfs_discard_ctl *discard_ctl =
2949 &block_group->fs_info->discard_ctl;
Josef Bacik6226cb02009-04-03 10:14:18 -04002950 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -04002951 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb02009-04-03 10:14:18 -04002952 u64 ret = 0;
David Woodhouse53b381b2013-01-29 18:40:14 -05002953 u64 align_gap = 0;
2954 u64 align_gap_len = 0;
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002955 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
Josef Bacik0f9dd462008-09-23 13:14:11 -04002956
Naohiro Aota2eda5702021-02-04 19:21:53 +09002957 ASSERT(!btrfs_is_zoned(block_group->fs_info));
2958
Li Zefan34d52cb2011-03-29 13:46:06 +08002959 spin_lock(&ctl->tree_lock);
David Woodhouse53b381b2013-01-29 18:40:14 -05002960 entry = find_free_space(ctl, &offset, &bytes_search,
Miao Xiea4820392013-09-09 13:19:42 +08002961 block_group->full_stripe_len, max_extent_size);
Josef Bacik6226cb02009-04-03 10:14:18 -04002962 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -04002963 goto out;
2964
2965 ret = offset;
2966 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002967 bitmap_clear_bits(ctl, entry, offset, bytes);
Dennis Zhou9ddf6482020-01-02 16:26:41 -05002968
2969 if (!btrfs_free_space_trimmed(entry))
2970 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2971
Li Zefanedf6e2d2010-11-09 14:50:07 +08002972 if (!entry->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08002973 free_bitmap(ctl, entry);
Josef Bacik96303082009-07-13 21:29:25 -04002974 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08002975 unlink_free_space(ctl, entry);
David Woodhouse53b381b2013-01-29 18:40:14 -05002976 align_gap_len = offset - entry->offset;
2977 align_gap = entry->offset;
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002978 align_gap_trim_state = entry->trim_state;
David Woodhouse53b381b2013-01-29 18:40:14 -05002979
Dennis Zhou9ddf6482020-01-02 16:26:41 -05002980 if (!btrfs_free_space_trimmed(entry))
2981 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
2982
David Woodhouse53b381b2013-01-29 18:40:14 -05002983 entry->offset = offset + bytes;
2984 WARN_ON(entry->bytes < bytes + align_gap_len);
2985
2986 entry->bytes -= bytes + align_gap_len;
Josef Bacik6226cb02009-04-03 10:14:18 -04002987 if (!entry->bytes)
Josef Bacikdc89e982011-01-28 17:05:48 -05002988 kmem_cache_free(btrfs_free_space_cachep, entry);
Josef Bacik6226cb02009-04-03 10:14:18 -04002989 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002990 link_free_space(ctl, entry);
Josef Bacik6226cb02009-04-03 10:14:18 -04002991 }
Josef Bacik96303082009-07-13 21:29:25 -04002992out:
Josef Bacik66b53ba2020-10-23 09:58:07 -04002993 btrfs_discard_update_discardable(block_group);
Li Zefan34d52cb2011-03-29 13:46:06 +08002994 spin_unlock(&ctl->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04002995
David Woodhouse53b381b2013-01-29 18:40:14 -05002996 if (align_gap_len)
Jeff Mahoneyab8d0fc2016-09-20 10:05:02 -04002997 __btrfs_add_free_space(block_group->fs_info, ctl,
Dennis Zhoua7ccb252019-12-13 16:22:12 -08002998 align_gap, align_gap_len,
2999 align_gap_trim_state);
Josef Bacik0f9dd462008-09-23 13:14:11 -04003000 return ret;
3001}
Chris Masonfa9c0d792009-04-03 09:47:43 -04003002
3003/*
3004 * given a cluster, put all of its extents back into the free space
3005 * cache. If a block group is passed, this function will only free
3006 * a cluster that belongs to the passed block group.
3007 *
3008 * Otherwise, it'll get a reference on the block group pointed to by the
3009 * cluster and remove the cluster from it.
3010 */
Anand Jain69b0e092020-06-03 18:10:18 +08003011void btrfs_return_cluster_to_free_space(
David Sterba32da53862019-10-29 19:20:18 +01003012 struct btrfs_block_group *block_group,
Chris Masonfa9c0d792009-04-03 09:47:43 -04003013 struct btrfs_free_cluster *cluster)
3014{
Li Zefan34d52cb2011-03-29 13:46:06 +08003015 struct btrfs_free_space_ctl *ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003016
3017 /* first, get a safe pointer to the block group */
3018 spin_lock(&cluster->lock);
3019 if (!block_group) {
3020 block_group = cluster->block_group;
3021 if (!block_group) {
3022 spin_unlock(&cluster->lock);
Anand Jain69b0e092020-06-03 18:10:18 +08003023 return;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003024 }
3025 } else if (cluster->block_group != block_group) {
3026 /* someone else has already freed it don't redo their work */
3027 spin_unlock(&cluster->lock);
Anand Jain69b0e092020-06-03 18:10:18 +08003028 return;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003029 }
Anand Jainb5790d52020-06-03 18:10:20 +08003030 btrfs_get_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003031 spin_unlock(&cluster->lock);
3032
Li Zefan34d52cb2011-03-29 13:46:06 +08003033 ctl = block_group->free_space_ctl;
3034
Chris Masonfa9c0d792009-04-03 09:47:43 -04003035 /* now return any extents the cluster had on it */
Li Zefan34d52cb2011-03-29 13:46:06 +08003036 spin_lock(&ctl->tree_lock);
Anand Jain69b0e092020-06-03 18:10:18 +08003037 __btrfs_return_cluster_to_free_space(block_group, cluster);
Li Zefan34d52cb2011-03-29 13:46:06 +08003038 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003039
Dennis Zhou6e80d4f2019-12-13 16:22:15 -08003040 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group);
3041
Chris Masonfa9c0d792009-04-03 09:47:43 -04003042 /* finally drop our ref */
3043 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003044}
3045
David Sterba32da53862019-10-29 19:20:18 +01003046static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
Josef Bacik96303082009-07-13 21:29:25 -04003047 struct btrfs_free_cluster *cluster,
Josef Bacik4e69b592011-03-21 10:11:24 -04003048 struct btrfs_free_space *entry,
Miao Xiea4820392013-09-09 13:19:42 +08003049 u64 bytes, u64 min_start,
3050 u64 *max_extent_size)
Josef Bacik96303082009-07-13 21:29:25 -04003051{
Li Zefan34d52cb2011-03-29 13:46:06 +08003052 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04003053 int err;
3054 u64 search_start = cluster->window_start;
3055 u64 search_bytes = bytes;
3056 u64 ret = 0;
3057
Josef Bacik96303082009-07-13 21:29:25 -04003058 search_start = min_start;
3059 search_bytes = bytes;
3060
Josef Bacik0584f712015-10-02 16:12:23 -04003061 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
Miao Xiea4820392013-09-09 13:19:42 +08003062 if (err) {
Josef Bacikad22cf62018-10-12 15:32:33 -04003063 *max_extent_size = max(get_max_extent_size(entry),
3064 *max_extent_size);
Josef Bacik4e69b592011-03-21 10:11:24 -04003065 return 0;
Miao Xiea4820392013-09-09 13:19:42 +08003066 }
Josef Bacik96303082009-07-13 21:29:25 -04003067
3068 ret = search_start;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00003069 __bitmap_clear_bits(ctl, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04003070
3071 return ret;
3072}
3073
Chris Masonfa9c0d792009-04-03 09:47:43 -04003074/*
3075 * given a cluster, try to allocate 'bytes' from it, returns 0
3076 * if it couldn't find anything suitably large, or a logical disk offset
3077 * if things worked out
3078 */
David Sterba32da53862019-10-29 19:20:18 +01003079u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
Chris Masonfa9c0d792009-04-03 09:47:43 -04003080 struct btrfs_free_cluster *cluster, u64 bytes,
Miao Xiea4820392013-09-09 13:19:42 +08003081 u64 min_start, u64 *max_extent_size)
Chris Masonfa9c0d792009-04-03 09:47:43 -04003082{
Li Zefan34d52cb2011-03-29 13:46:06 +08003083 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Dennis Zhou9ddf6482020-01-02 16:26:41 -05003084 struct btrfs_discard_ctl *discard_ctl =
3085 &block_group->fs_info->discard_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003086 struct btrfs_free_space *entry = NULL;
3087 struct rb_node *node;
3088 u64 ret = 0;
3089
Naohiro Aota2eda5702021-02-04 19:21:53 +09003090 ASSERT(!btrfs_is_zoned(block_group->fs_info));
3091
Chris Masonfa9c0d792009-04-03 09:47:43 -04003092 spin_lock(&cluster->lock);
3093 if (bytes > cluster->max_size)
3094 goto out;
3095
3096 if (cluster->block_group != block_group)
3097 goto out;
3098
3099 node = rb_first(&cluster->root);
3100 if (!node)
3101 goto out;
3102
3103 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05303104 while (1) {
Josef Bacikad22cf62018-10-12 15:32:33 -04003105 if (entry->bytes < bytes)
3106 *max_extent_size = max(get_max_extent_size(entry),
3107 *max_extent_size);
Miao Xiea4820392013-09-09 13:19:42 +08003108
Josef Bacik4e69b592011-03-21 10:11:24 -04003109 if (entry->bytes < bytes ||
3110 (!entry->bitmap && entry->offset < min_start)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04003111 node = rb_next(&entry->offset_index);
3112 if (!node)
3113 break;
3114 entry = rb_entry(node, struct btrfs_free_space,
3115 offset_index);
3116 continue;
3117 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04003118
Josef Bacik4e69b592011-03-21 10:11:24 -04003119 if (entry->bitmap) {
3120 ret = btrfs_alloc_from_bitmap(block_group,
3121 cluster, entry, bytes,
Miao Xiea4820392013-09-09 13:19:42 +08003122 cluster->window_start,
3123 max_extent_size);
Josef Bacik4e69b592011-03-21 10:11:24 -04003124 if (ret == 0) {
Josef Bacik4e69b592011-03-21 10:11:24 -04003125 node = rb_next(&entry->offset_index);
3126 if (!node)
3127 break;
3128 entry = rb_entry(node, struct btrfs_free_space,
3129 offset_index);
3130 continue;
3131 }
Josef Bacik9b230622012-01-26 15:01:12 -05003132 cluster->window_start += bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04003133 } else {
Josef Bacik4e69b592011-03-21 10:11:24 -04003134 ret = entry->offset;
3135
3136 entry->offset += bytes;
3137 entry->bytes -= bytes;
3138 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04003139
Chris Masonfa9c0d792009-04-03 09:47:43 -04003140 break;
3141 }
3142out:
3143 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04003144
Li Zefan5e71b5d2010-11-09 14:55:34 +08003145 if (!ret)
3146 return 0;
3147
Li Zefan34d52cb2011-03-29 13:46:06 +08003148 spin_lock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08003149
Dennis Zhou9ddf6482020-01-02 16:26:41 -05003150 if (!btrfs_free_space_trimmed(entry))
3151 atomic64_add(bytes, &discard_ctl->discard_bytes_saved);
3152
Li Zefan34d52cb2011-03-29 13:46:06 +08003153 ctl->free_space -= bytes;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08003154 if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
3155 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
Nikolay Borisov3c179162021-02-08 10:26:54 +02003156
3157 spin_lock(&cluster->lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08003158 if (entry->bytes == 0) {
Nikolay Borisov3c179162021-02-08 10:26:54 +02003159 rb_erase(&entry->offset_index, &cluster->root);
Li Zefan34d52cb2011-03-29 13:46:06 +08003160 ctl->free_extents--;
Josef Bacik4e69b592011-03-21 10:11:24 -04003161 if (entry->bitmap) {
Christophe Leroy3acd4852019-08-21 15:05:55 +00003162 kmem_cache_free(btrfs_free_space_bitmap_cachep,
3163 entry->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08003164 ctl->total_bitmaps--;
David Sterbafa598b02020-12-03 17:18:38 +01003165 recalculate_thresholds(ctl);
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003166 } else if (!btrfs_free_space_trimmed(entry)) {
3167 ctl->discardable_extents[BTRFS_STAT_CURR]--;
Josef Bacik4e69b592011-03-21 10:11:24 -04003168 }
Josef Bacikdc89e982011-01-28 17:05:48 -05003169 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Zefan5e71b5d2010-11-09 14:55:34 +08003170 }
3171
Nikolay Borisov3c179162021-02-08 10:26:54 +02003172 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08003173 spin_unlock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08003174
Chris Masonfa9c0d792009-04-03 09:47:43 -04003175 return ret;
3176}
3177
David Sterba32da53862019-10-29 19:20:18 +01003178static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
Josef Bacik96303082009-07-13 21:29:25 -04003179 struct btrfs_free_space *entry,
3180 struct btrfs_free_cluster *cluster,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003181 u64 offset, u64 bytes,
3182 u64 cont1_bytes, u64 min_bytes)
Josef Bacik96303082009-07-13 21:29:25 -04003183{
Li Zefan34d52cb2011-03-29 13:46:06 +08003184 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04003185 unsigned long next_zero;
3186 unsigned long i;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003187 unsigned long want_bits;
3188 unsigned long min_bits;
Josef Bacik96303082009-07-13 21:29:25 -04003189 unsigned long found_bits;
Josef Bacikcef40482015-10-02 16:09:42 -04003190 unsigned long max_bits = 0;
Josef Bacik96303082009-07-13 21:29:25 -04003191 unsigned long start = 0;
3192 unsigned long total_found = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04003193 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04003194
Wang Sheng-Hui96009762012-11-30 06:30:14 +00003195 i = offset_to_bit(entry->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04003196 max_t(u64, offset, entry->offset));
Wang Sheng-Hui96009762012-11-30 06:30:14 +00003197 want_bits = bytes_to_bits(bytes, ctl->unit);
3198 min_bits = bytes_to_bits(min_bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04003199
Josef Bacikcef40482015-10-02 16:09:42 -04003200 /*
3201 * Don't bother looking for a cluster in this bitmap if it's heavily
3202 * fragmented.
3203 */
3204 if (entry->max_extent_size &&
3205 entry->max_extent_size < cont1_bytes)
3206 return -ENOSPC;
Josef Bacik96303082009-07-13 21:29:25 -04003207again:
3208 found_bits = 0;
Wei Yongjunebb3dad2012-09-13 20:29:02 -06003209 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
Josef Bacik96303082009-07-13 21:29:25 -04003210 next_zero = find_next_zero_bit(entry->bitmap,
3211 BITS_PER_BITMAP, i);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003212 if (next_zero - i >= min_bits) {
Josef Bacik96303082009-07-13 21:29:25 -04003213 found_bits = next_zero - i;
Josef Bacikcef40482015-10-02 16:09:42 -04003214 if (found_bits > max_bits)
3215 max_bits = found_bits;
Josef Bacik96303082009-07-13 21:29:25 -04003216 break;
3217 }
Josef Bacikcef40482015-10-02 16:09:42 -04003218 if (next_zero - i > max_bits)
3219 max_bits = next_zero - i;
Josef Bacik96303082009-07-13 21:29:25 -04003220 i = next_zero;
3221 }
3222
Josef Bacikcef40482015-10-02 16:09:42 -04003223 if (!found_bits) {
3224 entry->max_extent_size = (u64)max_bits * ctl->unit;
Josef Bacik4e69b592011-03-21 10:11:24 -04003225 return -ENOSPC;
Josef Bacikcef40482015-10-02 16:09:42 -04003226 }
Josef Bacik96303082009-07-13 21:29:25 -04003227
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003228 if (!total_found) {
Josef Bacik96303082009-07-13 21:29:25 -04003229 start = i;
Alexandre Olivab78d09b2011-11-30 13:43:00 -05003230 cluster->max_size = 0;
Josef Bacik96303082009-07-13 21:29:25 -04003231 }
3232
3233 total_found += found_bits;
3234
Wang Sheng-Hui96009762012-11-30 06:30:14 +00003235 if (cluster->max_size < found_bits * ctl->unit)
3236 cluster->max_size = found_bits * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04003237
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003238 if (total_found < want_bits || cluster->max_size < cont1_bytes) {
3239 i = next_zero + 1;
Josef Bacik96303082009-07-13 21:29:25 -04003240 goto again;
3241 }
3242
Wang Sheng-Hui96009762012-11-30 06:30:14 +00003243 cluster->window_start = start * ctl->unit + entry->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08003244 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04003245 ret = tree_insert_offset(&cluster->root, entry->offset,
3246 &entry->offset_index, 1);
Josef Bacikb12d6862013-08-26 17:14:08 -04003247 ASSERT(!ret); /* -EEXIST; Logic error */
Josef Bacik96303082009-07-13 21:29:25 -04003248
Josef Bacik3f7de032011-11-10 08:29:20 -05003249 trace_btrfs_setup_cluster(block_group, cluster,
Wang Sheng-Hui96009762012-11-30 06:30:14 +00003250 total_found * ctl->unit, 1);
Josef Bacik96303082009-07-13 21:29:25 -04003251 return 0;
3252}
3253
Chris Masonfa9c0d792009-04-03 09:47:43 -04003254/*
Josef Bacik4e69b592011-03-21 10:11:24 -04003255 * This searches the block group for just extents to fill the cluster with.
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003256 * Try to find a cluster with at least bytes total bytes, at least one
3257 * extent of cont1_bytes, and other clusters of at least min_bytes.
Josef Bacik4e69b592011-03-21 10:11:24 -04003258 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04003259static noinline int
David Sterba32da53862019-10-29 19:20:18 +01003260setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
Josef Bacik3de85bb2011-05-25 13:07:37 -04003261 struct btrfs_free_cluster *cluster,
3262 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003263 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04003264{
Li Zefan34d52cb2011-03-29 13:46:06 +08003265 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04003266 struct btrfs_free_space *first = NULL;
3267 struct btrfs_free_space *entry = NULL;
Josef Bacik4e69b592011-03-21 10:11:24 -04003268 struct btrfs_free_space *last;
3269 struct rb_node *node;
Josef Bacik4e69b592011-03-21 10:11:24 -04003270 u64 window_free;
3271 u64 max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05003272 u64 total_size = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04003273
Li Zefan34d52cb2011-03-29 13:46:06 +08003274 entry = tree_search_offset(ctl, offset, 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04003275 if (!entry)
3276 return -ENOSPC;
3277
3278 /*
3279 * We don't want bitmaps, so just move along until we find a normal
3280 * extent entry.
3281 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003282 while (entry->bitmap || entry->bytes < min_bytes) {
3283 if (entry->bitmap && list_empty(&entry->list))
Josef Bacik86d4a772011-05-25 13:03:16 -04003284 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04003285 node = rb_next(&entry->offset_index);
3286 if (!node)
3287 return -ENOSPC;
3288 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3289 }
3290
Josef Bacik4e69b592011-03-21 10:11:24 -04003291 window_free = entry->bytes;
3292 max_extent = entry->bytes;
3293 first = entry;
3294 last = entry;
Josef Bacik4e69b592011-03-21 10:11:24 -04003295
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003296 for (node = rb_next(&entry->offset_index); node;
3297 node = rb_next(&entry->offset_index)) {
Josef Bacik4e69b592011-03-21 10:11:24 -04003298 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3299
Josef Bacik86d4a772011-05-25 13:03:16 -04003300 if (entry->bitmap) {
3301 if (list_empty(&entry->list))
3302 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04003303 continue;
Josef Bacik86d4a772011-05-25 13:03:16 -04003304 }
3305
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003306 if (entry->bytes < min_bytes)
3307 continue;
3308
3309 last = entry;
3310 window_free += entry->bytes;
3311 if (entry->bytes > max_extent)
Josef Bacik4e69b592011-03-21 10:11:24 -04003312 max_extent = entry->bytes;
Josef Bacik4e69b592011-03-21 10:11:24 -04003313 }
3314
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003315 if (window_free < bytes || max_extent < cont1_bytes)
3316 return -ENOSPC;
3317
Josef Bacik4e69b592011-03-21 10:11:24 -04003318 cluster->window_start = first->offset;
3319
3320 node = &first->offset_index;
3321
3322 /*
3323 * now we've found our entries, pull them out of the free space
3324 * cache and put them into the cluster rbtree
3325 */
3326 do {
3327 int ret;
3328
3329 entry = rb_entry(node, struct btrfs_free_space, offset_index);
3330 node = rb_next(&entry->offset_index);
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003331 if (entry->bitmap || entry->bytes < min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04003332 continue;
3333
Li Zefan34d52cb2011-03-29 13:46:06 +08003334 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04003335 ret = tree_insert_offset(&cluster->root, entry->offset,
3336 &entry->offset_index, 0);
Josef Bacik3f7de032011-11-10 08:29:20 -05003337 total_size += entry->bytes;
Josef Bacikb12d6862013-08-26 17:14:08 -04003338 ASSERT(!ret); /* -EEXIST; Logic error */
Josef Bacik4e69b592011-03-21 10:11:24 -04003339 } while (node && entry != last);
3340
3341 cluster->max_size = max_extent;
Josef Bacik3f7de032011-11-10 08:29:20 -05003342 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
Josef Bacik4e69b592011-03-21 10:11:24 -04003343 return 0;
3344}
3345
3346/*
3347 * This specifically looks for bitmaps that may work in the cluster, we assume
3348 * that we have already failed to find extents that will work.
3349 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04003350static noinline int
David Sterba32da53862019-10-29 19:20:18 +01003351setup_cluster_bitmap(struct btrfs_block_group *block_group,
Josef Bacik3de85bb2011-05-25 13:07:37 -04003352 struct btrfs_free_cluster *cluster,
3353 struct list_head *bitmaps, u64 offset, u64 bytes,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003354 u64 cont1_bytes, u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04003355{
Li Zefan34d52cb2011-03-29 13:46:06 +08003356 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Mason1b9b9222015-12-15 07:15:32 -08003357 struct btrfs_free_space *entry = NULL;
Josef Bacik4e69b592011-03-21 10:11:24 -04003358 int ret = -ENOSPC;
Li Zefan0f0fbf12011-11-20 07:33:38 -05003359 u64 bitmap_offset = offset_to_bitmap(ctl, offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04003360
Li Zefan34d52cb2011-03-29 13:46:06 +08003361 if (ctl->total_bitmaps == 0)
Josef Bacik4e69b592011-03-21 10:11:24 -04003362 return -ENOSPC;
3363
Josef Bacik86d4a772011-05-25 13:03:16 -04003364 /*
Li Zefan0f0fbf12011-11-20 07:33:38 -05003365 * The bitmap that covers offset won't be in the list unless offset
3366 * is just its start offset.
3367 */
Chris Mason1b9b9222015-12-15 07:15:32 -08003368 if (!list_empty(bitmaps))
3369 entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
3370
3371 if (!entry || entry->offset != bitmap_offset) {
Li Zefan0f0fbf12011-11-20 07:33:38 -05003372 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
3373 if (entry && list_empty(&entry->list))
3374 list_add(&entry->list, bitmaps);
3375 }
3376
Josef Bacik86d4a772011-05-25 13:03:16 -04003377 list_for_each_entry(entry, bitmaps, list) {
Josef Bacik357b9782012-01-26 15:01:11 -05003378 if (entry->bytes < bytes)
Josef Bacik86d4a772011-05-25 13:03:16 -04003379 continue;
3380 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003381 bytes, cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04003382 if (!ret)
3383 return 0;
3384 }
3385
3386 /*
Li Zefan52621cb2011-11-20 07:33:38 -05003387 * The bitmaps list has all the bitmaps that record free space
3388 * starting after offset, so no more search is required.
Josef Bacik86d4a772011-05-25 13:03:16 -04003389 */
Li Zefan52621cb2011-11-20 07:33:38 -05003390 return -ENOSPC;
Josef Bacik4e69b592011-03-21 10:11:24 -04003391}
3392
3393/*
Chris Masonfa9c0d792009-04-03 09:47:43 -04003394 * here we try to find a cluster of blocks in a block group. The goal
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003395 * is to find at least bytes+empty_size.
Chris Masonfa9c0d792009-04-03 09:47:43 -04003396 * We might not find them all in one contiguous area.
3397 *
3398 * returns zero and sets up cluster if things worked out, otherwise
3399 * it returns -enospc
3400 */
David Sterba32da53862019-10-29 19:20:18 +01003401int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
Chris Masonfa9c0d792009-04-03 09:47:43 -04003402 struct btrfs_free_cluster *cluster,
3403 u64 offset, u64 bytes, u64 empty_size)
3404{
David Sterba2ceeae22019-03-20 13:53:49 +01003405 struct btrfs_fs_info *fs_info = block_group->fs_info;
Li Zefan34d52cb2011-03-29 13:46:06 +08003406 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik86d4a772011-05-25 13:03:16 -04003407 struct btrfs_free_space *entry, *tmp;
Li Zefan52621cb2011-11-20 07:33:38 -05003408 LIST_HEAD(bitmaps);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003409 u64 min_bytes;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003410 u64 cont1_bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003411 int ret;
3412
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003413 /*
3414 * Choose the minimum extent size we'll require for this
3415 * cluster. For SSD_SPREAD, don't allow any fragmentation.
3416 * For metadata, allow allocates with smaller extents. For
3417 * data, keep it dense.
3418 */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003419 if (btrfs_test_opt(fs_info, SSD_SPREAD)) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003420 cont1_bytes = min_bytes = bytes + empty_size;
Chris Mason451d7582009-06-09 20:28:34 -04003421 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003422 cont1_bytes = bytes;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003423 min_bytes = fs_info->sectorsize;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003424 } else {
3425 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003426 min_bytes = fs_info->sectorsize;
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003427 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04003428
Li Zefan34d52cb2011-03-29 13:46:06 +08003429 spin_lock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04003430
3431 /*
3432 * If we know we don't have enough space to make a cluster don't even
3433 * bother doing all the work to try and find one.
3434 */
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003435 if (ctl->free_space < bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08003436 spin_unlock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04003437 return -ENOSPC;
3438 }
3439
Chris Masonfa9c0d792009-04-03 09:47:43 -04003440 spin_lock(&cluster->lock);
3441
3442 /* someone already found a cluster, hooray */
3443 if (cluster->block_group) {
3444 ret = 0;
3445 goto out;
3446 }
Josef Bacik4e69b592011-03-21 10:11:24 -04003447
Josef Bacik3f7de032011-11-10 08:29:20 -05003448 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
3449 min_bytes);
3450
Josef Bacik86d4a772011-05-25 13:03:16 -04003451 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003452 bytes + empty_size,
3453 cont1_bytes, min_bytes);
Josef Bacik4e69b592011-03-21 10:11:24 -04003454 if (ret)
Josef Bacik86d4a772011-05-25 13:03:16 -04003455 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
Alexandre Oliva1bb91902011-10-14 12:10:36 -03003456 offset, bytes + empty_size,
3457 cont1_bytes, min_bytes);
Josef Bacik86d4a772011-05-25 13:03:16 -04003458
3459 /* Clear our temporary list */
3460 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
3461 list_del_init(&entry->list);
Josef Bacik4e69b592011-03-21 10:11:24 -04003462
3463 if (!ret) {
Anand Jainb5790d52020-06-03 18:10:20 +08003464 btrfs_get_block_group(block_group);
Josef Bacik4e69b592011-03-21 10:11:24 -04003465 list_add_tail(&cluster->block_group_list,
3466 &block_group->cluster_list);
3467 cluster->block_group = block_group;
Josef Bacik3f7de032011-11-10 08:29:20 -05003468 } else {
3469 trace_btrfs_failed_cluster_setup(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003470 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04003471out:
3472 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08003473 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04003474
3475 return ret;
3476}
3477
3478/*
3479 * simple code to zero out a cluster
3480 */
3481void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
3482{
3483 spin_lock_init(&cluster->lock);
3484 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00003485 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003486 cluster->max_size = 0;
Josef Bacikc759c4e2015-10-02 15:25:10 -04003487 cluster->fragmented = false;
Chris Masonfa9c0d792009-04-03 09:47:43 -04003488 INIT_LIST_HEAD(&cluster->block_group_list);
3489 cluster->block_group = NULL;
3490}
3491
David Sterba32da53862019-10-29 19:20:18 +01003492static int do_trimming(struct btrfs_block_group *block_group,
Li Zefan7fe1e642011-12-29 14:47:27 +08003493 u64 *total_trimmed, u64 start, u64 bytes,
Filipe Manana55507ce2014-12-01 17:04:09 +00003494 u64 reserved_start, u64 reserved_bytes,
Dennis Zhoub0643e52019-12-13 16:22:14 -08003495 enum btrfs_trim_state reserved_trim_state,
Filipe Manana55507ce2014-12-01 17:04:09 +00003496 struct btrfs_trim_range *trim_entry)
Li Zefan7fe1e642011-12-29 14:47:27 +08003497{
3498 struct btrfs_space_info *space_info = block_group->space_info;
3499 struct btrfs_fs_info *fs_info = block_group->fs_info;
Filipe Manana55507ce2014-12-01 17:04:09 +00003500 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan7fe1e642011-12-29 14:47:27 +08003501 int ret;
3502 int update = 0;
Dennis Zhoub0643e52019-12-13 16:22:14 -08003503 const u64 end = start + bytes;
3504 const u64 reserved_end = reserved_start + reserved_bytes;
3505 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
Li Zefan7fe1e642011-12-29 14:47:27 +08003506 u64 trimmed = 0;
3507
3508 spin_lock(&space_info->lock);
3509 spin_lock(&block_group->lock);
3510 if (!block_group->ro) {
3511 block_group->reserved += reserved_bytes;
3512 space_info->bytes_reserved += reserved_bytes;
3513 update = 1;
3514 }
3515 spin_unlock(&block_group->lock);
3516 spin_unlock(&space_info->lock);
3517
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003518 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed);
Dennis Zhoub0643e52019-12-13 16:22:14 -08003519 if (!ret) {
Li Zefan7fe1e642011-12-29 14:47:27 +08003520 *total_trimmed += trimmed;
Dennis Zhoub0643e52019-12-13 16:22:14 -08003521 trim_state = BTRFS_TRIM_STATE_TRIMMED;
3522 }
Li Zefan7fe1e642011-12-29 14:47:27 +08003523
Filipe Manana55507ce2014-12-01 17:04:09 +00003524 mutex_lock(&ctl->cache_writeout_mutex);
Dennis Zhoub0643e52019-12-13 16:22:14 -08003525 if (reserved_start < start)
3526 __btrfs_add_free_space(fs_info, ctl, reserved_start,
3527 start - reserved_start,
3528 reserved_trim_state);
3529 if (start + bytes < reserved_start + reserved_bytes)
3530 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end,
3531 reserved_trim_state);
3532 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state);
Filipe Manana55507ce2014-12-01 17:04:09 +00003533 list_del(&trim_entry->list);
3534 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003535
3536 if (update) {
3537 spin_lock(&space_info->lock);
3538 spin_lock(&block_group->lock);
3539 if (block_group->ro)
3540 space_info->bytes_readonly += reserved_bytes;
3541 block_group->reserved -= reserved_bytes;
3542 space_info->bytes_reserved -= reserved_bytes;
Li Zefan7fe1e642011-12-29 14:47:27 +08003543 spin_unlock(&block_group->lock);
Su Yue8f63a842018-11-28 11:21:12 +08003544 spin_unlock(&space_info->lock);
Li Zefan7fe1e642011-12-29 14:47:27 +08003545 }
3546
3547 return ret;
3548}
3549
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003550/*
3551 * If @async is set, then we will trim 1 region and return.
3552 */
David Sterba32da53862019-10-29 19:20:18 +01003553static int trim_no_bitmap(struct btrfs_block_group *block_group,
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003554 u64 *total_trimmed, u64 start, u64 end, u64 minlen,
3555 bool async)
Li Dongyangf7039b12011-03-24 10:24:28 +00003556{
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003557 struct btrfs_discard_ctl *discard_ctl =
3558 &block_group->fs_info->discard_ctl;
Li Zefan34d52cb2011-03-29 13:46:06 +08003559 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan7fe1e642011-12-29 14:47:27 +08003560 struct btrfs_free_space *entry;
3561 struct rb_node *node;
Li Dongyangf7039b12011-03-24 10:24:28 +00003562 int ret = 0;
Li Zefan7fe1e642011-12-29 14:47:27 +08003563 u64 extent_start;
3564 u64 extent_bytes;
Dennis Zhoub0643e52019-12-13 16:22:14 -08003565 enum btrfs_trim_state extent_trim_state;
Li Zefan7fe1e642011-12-29 14:47:27 +08003566 u64 bytes;
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003567 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
Li Dongyangf7039b12011-03-24 10:24:28 +00003568
3569 while (start < end) {
Filipe Manana55507ce2014-12-01 17:04:09 +00003570 struct btrfs_trim_range trim_entry;
3571
3572 mutex_lock(&ctl->cache_writeout_mutex);
Li Zefan34d52cb2011-03-29 13:46:06 +08003573 spin_lock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00003574
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003575 if (ctl->free_space < minlen)
3576 goto out_unlock;
Li Dongyangf7039b12011-03-24 10:24:28 +00003577
Li Zefan34d52cb2011-03-29 13:46:06 +08003578 entry = tree_search_offset(ctl, start, 0, 1);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003579 if (!entry)
3580 goto out_unlock;
Li Dongyangf7039b12011-03-24 10:24:28 +00003581
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003582 /* Skip bitmaps and if async, already trimmed entries */
3583 while (entry->bitmap ||
3584 (async && btrfs_free_space_trimmed(entry))) {
Li Zefan7fe1e642011-12-29 14:47:27 +08003585 node = rb_next(&entry->offset_index);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003586 if (!node)
3587 goto out_unlock;
Li Zefan7fe1e642011-12-29 14:47:27 +08003588 entry = rb_entry(node, struct btrfs_free_space,
3589 offset_index);
Li Dongyangf7039b12011-03-24 10:24:28 +00003590 }
3591
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003592 if (entry->offset >= end)
3593 goto out_unlock;
Li Zefan7fe1e642011-12-29 14:47:27 +08003594
3595 extent_start = entry->offset;
3596 extent_bytes = entry->bytes;
Dennis Zhoub0643e52019-12-13 16:22:14 -08003597 extent_trim_state = entry->trim_state;
Dennis Zhou4aa9ad52020-01-02 16:26:37 -05003598 if (async) {
3599 start = entry->offset;
3600 bytes = entry->bytes;
3601 if (bytes < minlen) {
3602 spin_unlock(&ctl->tree_lock);
3603 mutex_unlock(&ctl->cache_writeout_mutex);
3604 goto next;
3605 }
3606 unlink_free_space(ctl, entry);
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003607 /*
3608 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3609 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim
3610 * X when we come back around. So trim it now.
3611 */
3612 if (max_discard_size &&
3613 bytes >= (max_discard_size +
3614 BTRFS_ASYNC_DISCARD_MIN_FILTER)) {
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003615 bytes = max_discard_size;
3616 extent_bytes = max_discard_size;
3617 entry->offset += max_discard_size;
3618 entry->bytes -= max_discard_size;
Dennis Zhou4aa9ad52020-01-02 16:26:37 -05003619 link_free_space(ctl, entry);
3620 } else {
3621 kmem_cache_free(btrfs_free_space_cachep, entry);
3622 }
3623 } else {
3624 start = max(start, extent_start);
3625 bytes = min(extent_start + extent_bytes, end) - start;
3626 if (bytes < minlen) {
3627 spin_unlock(&ctl->tree_lock);
3628 mutex_unlock(&ctl->cache_writeout_mutex);
3629 goto next;
3630 }
Li Zefan7fe1e642011-12-29 14:47:27 +08003631
Dennis Zhou4aa9ad52020-01-02 16:26:37 -05003632 unlink_free_space(ctl, entry);
3633 kmem_cache_free(btrfs_free_space_cachep, entry);
3634 }
Li Zefan7fe1e642011-12-29 14:47:27 +08003635
Li Zefan34d52cb2011-03-29 13:46:06 +08003636 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003637 trim_entry.start = extent_start;
3638 trim_entry.bytes = extent_bytes;
3639 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3640 mutex_unlock(&ctl->cache_writeout_mutex);
Li Dongyangf7039b12011-03-24 10:24:28 +00003641
Li Zefan7fe1e642011-12-29 14:47:27 +08003642 ret = do_trimming(block_group, total_trimmed, start, bytes,
Dennis Zhoub0643e52019-12-13 16:22:14 -08003643 extent_start, extent_bytes, extent_trim_state,
3644 &trim_entry);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003645 if (ret) {
3646 block_group->discard_cursor = start + bytes;
Li Zefan7fe1e642011-12-29 14:47:27 +08003647 break;
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003648 }
Li Zefan7fe1e642011-12-29 14:47:27 +08003649next:
Li Dongyangf7039b12011-03-24 10:24:28 +00003650 start += bytes;
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003651 block_group->discard_cursor = start;
3652 if (async && *total_trimmed)
3653 break;
Li Dongyangf7039b12011-03-24 10:24:28 +00003654
3655 if (fatal_signal_pending(current)) {
3656 ret = -ERESTARTSYS;
3657 break;
3658 }
3659
3660 cond_resched();
3661 }
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003662
3663 return ret;
3664
3665out_unlock:
3666 block_group->discard_cursor = btrfs_block_group_end(block_group);
3667 spin_unlock(&ctl->tree_lock);
3668 mutex_unlock(&ctl->cache_writeout_mutex);
3669
Li Zefan7fe1e642011-12-29 14:47:27 +08003670 return ret;
3671}
3672
Dennis Zhouda080fe2019-12-13 16:22:13 -08003673/*
3674 * If we break out of trimming a bitmap prematurely, we should reset the
3675 * trimming bit. In a rather contrieved case, it's possible to race here so
3676 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
3677 *
3678 * start = start of bitmap
3679 * end = near end of bitmap
3680 *
3681 * Thread 1: Thread 2:
3682 * trim_bitmaps(start)
3683 * trim_bitmaps(end)
3684 * end_trimming_bitmap()
3685 * reset_trimming_bitmap()
3686 */
3687static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset)
3688{
3689 struct btrfs_free_space *entry;
3690
3691 spin_lock(&ctl->tree_lock);
3692 entry = tree_search_offset(ctl, offset, 1, 0);
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003693 if (entry) {
Dennis Zhou5dc7c102019-12-13 16:22:21 -08003694 if (btrfs_free_space_trimmed(entry)) {
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003695 ctl->discardable_extents[BTRFS_STAT_CURR] +=
3696 entry->bitmap_extents;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08003697 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes;
3698 }
Dennis Zhouda080fe2019-12-13 16:22:13 -08003699 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003700 }
3701
Dennis Zhouda080fe2019-12-13 16:22:13 -08003702 spin_unlock(&ctl->tree_lock);
3703}
3704
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003705static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl,
3706 struct btrfs_free_space *entry)
Dennis Zhouda080fe2019-12-13 16:22:13 -08003707{
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003708 if (btrfs_free_space_trimming_bitmap(entry)) {
Dennis Zhouda080fe2019-12-13 16:22:13 -08003709 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003710 ctl->discardable_extents[BTRFS_STAT_CURR] -=
3711 entry->bitmap_extents;
Dennis Zhou5dc7c102019-12-13 16:22:21 -08003712 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes;
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003713 }
Dennis Zhouda080fe2019-12-13 16:22:13 -08003714}
3715
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003716/*
3717 * If @async is set, then we will trim 1 region and return.
3718 */
David Sterba32da53862019-10-29 19:20:18 +01003719static int trim_bitmaps(struct btrfs_block_group *block_group,
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003720 u64 *total_trimmed, u64 start, u64 end, u64 minlen,
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003721 u64 maxlen, bool async)
Li Zefan7fe1e642011-12-29 14:47:27 +08003722{
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003723 struct btrfs_discard_ctl *discard_ctl =
3724 &block_group->fs_info->discard_ctl;
Li Zefan7fe1e642011-12-29 14:47:27 +08003725 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
3726 struct btrfs_free_space *entry;
3727 int ret = 0;
3728 int ret2;
3729 u64 bytes;
3730 u64 offset = offset_to_bitmap(ctl, start);
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003731 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size);
Li Zefan7fe1e642011-12-29 14:47:27 +08003732
3733 while (offset < end) {
3734 bool next_bitmap = false;
Filipe Manana55507ce2014-12-01 17:04:09 +00003735 struct btrfs_trim_range trim_entry;
Li Zefan7fe1e642011-12-29 14:47:27 +08003736
Filipe Manana55507ce2014-12-01 17:04:09 +00003737 mutex_lock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003738 spin_lock(&ctl->tree_lock);
3739
3740 if (ctl->free_space < minlen) {
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003741 block_group->discard_cursor =
3742 btrfs_block_group_end(block_group);
Li Zefan7fe1e642011-12-29 14:47:27 +08003743 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003744 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003745 break;
3746 }
3747
3748 entry = tree_search_offset(ctl, offset, 1, 0);
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003749 /*
3750 * Bitmaps are marked trimmed lossily now to prevent constant
3751 * discarding of the same bitmap (the reason why we are bound
3752 * by the filters). So, retrim the block group bitmaps when we
3753 * are preparing to punt to the unused_bgs list. This uses
3754 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED
3755 * which is the only discard index which sets minlen to 0.
3756 */
3757 if (!entry || (async && minlen && start == offset &&
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003758 btrfs_free_space_trimmed(entry))) {
Li Zefan7fe1e642011-12-29 14:47:27 +08003759 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003760 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003761 next_bitmap = true;
3762 goto next;
3763 }
3764
Dennis Zhouda080fe2019-12-13 16:22:13 -08003765 /*
3766 * Async discard bitmap trimming begins at by setting the start
3767 * to be key.objectid and the offset_to_bitmap() aligns to the
3768 * start of the bitmap. This lets us know we are fully
3769 * scanning the bitmap rather than only some portion of it.
3770 */
3771 if (start == offset)
3772 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING;
3773
Li Zefan7fe1e642011-12-29 14:47:27 +08003774 bytes = minlen;
Josef Bacik0584f712015-10-02 16:12:23 -04003775 ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
Li Zefan7fe1e642011-12-29 14:47:27 +08003776 if (ret2 || start >= end) {
Dennis Zhouda080fe2019-12-13 16:22:13 -08003777 /*
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003778 * We lossily consider a bitmap trimmed if we only skip
3779 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER.
Dennis Zhouda080fe2019-12-13 16:22:13 -08003780 */
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003781 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER)
Dennis Zhoudfb79dd2019-12-13 16:22:20 -08003782 end_trimming_bitmap(ctl, entry);
Dennis Zhouda080fe2019-12-13 16:22:13 -08003783 else
3784 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED;
Li Zefan7fe1e642011-12-29 14:47:27 +08003785 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003786 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003787 next_bitmap = true;
3788 goto next;
3789 }
3790
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003791 /*
3792 * We already trimmed a region, but are using the locking above
3793 * to reset the trim_state.
3794 */
3795 if (async && *total_trimmed) {
3796 spin_unlock(&ctl->tree_lock);
3797 mutex_unlock(&ctl->cache_writeout_mutex);
3798 goto out;
3799 }
3800
Li Zefan7fe1e642011-12-29 14:47:27 +08003801 bytes = min(bytes, end - start);
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003802 if (bytes < minlen || (async && maxlen && bytes > maxlen)) {
Li Zefan7fe1e642011-12-29 14:47:27 +08003803 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003804 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003805 goto next;
3806 }
3807
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003808 /*
3809 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X.
3810 * If X < @minlen, we won't trim X when we come back around.
3811 * So trim it now. We differ here from trimming extents as we
3812 * don't keep individual state per bit.
3813 */
3814 if (async &&
3815 max_discard_size &&
3816 bytes > (max_discard_size + minlen))
Dennis Zhou19b2a2c2020-01-02 16:26:38 -05003817 bytes = max_discard_size;
Dennis Zhou4aa9ad52020-01-02 16:26:37 -05003818
Li Zefan7fe1e642011-12-29 14:47:27 +08003819 bitmap_clear_bits(ctl, entry, start, bytes);
3820 if (entry->bytes == 0)
3821 free_bitmap(ctl, entry);
3822
3823 spin_unlock(&ctl->tree_lock);
Filipe Manana55507ce2014-12-01 17:04:09 +00003824 trim_entry.start = start;
3825 trim_entry.bytes = bytes;
3826 list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
3827 mutex_unlock(&ctl->cache_writeout_mutex);
Li Zefan7fe1e642011-12-29 14:47:27 +08003828
3829 ret = do_trimming(block_group, total_trimmed, start, bytes,
Dennis Zhoub0643e52019-12-13 16:22:14 -08003830 start, bytes, 0, &trim_entry);
Dennis Zhouda080fe2019-12-13 16:22:13 -08003831 if (ret) {
3832 reset_trimming_bitmap(ctl, offset);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003833 block_group->discard_cursor =
3834 btrfs_block_group_end(block_group);
Li Zefan7fe1e642011-12-29 14:47:27 +08003835 break;
Dennis Zhouda080fe2019-12-13 16:22:13 -08003836 }
Li Zefan7fe1e642011-12-29 14:47:27 +08003837next:
3838 if (next_bitmap) {
3839 offset += BITS_PER_BITMAP * ctl->unit;
Dennis Zhouda080fe2019-12-13 16:22:13 -08003840 start = offset;
Li Zefan7fe1e642011-12-29 14:47:27 +08003841 } else {
3842 start += bytes;
Li Zefan7fe1e642011-12-29 14:47:27 +08003843 }
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003844 block_group->discard_cursor = start;
Li Zefan7fe1e642011-12-29 14:47:27 +08003845
3846 if (fatal_signal_pending(current)) {
Dennis Zhouda080fe2019-12-13 16:22:13 -08003847 if (start != offset)
3848 reset_trimming_bitmap(ctl, offset);
Li Zefan7fe1e642011-12-29 14:47:27 +08003849 ret = -ERESTARTSYS;
3850 break;
3851 }
3852
3853 cond_resched();
3854 }
3855
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003856 if (offset >= end)
3857 block_group->discard_cursor = end;
3858
3859out:
Li Zefan7fe1e642011-12-29 14:47:27 +08003860 return ret;
3861}
3862
David Sterba32da53862019-10-29 19:20:18 +01003863int btrfs_trim_block_group(struct btrfs_block_group *block_group,
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003864 u64 *trimmed, u64 start, u64 end, u64 minlen)
3865{
Dennis Zhouda080fe2019-12-13 16:22:13 -08003866 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003867 int ret;
Dennis Zhouda080fe2019-12-13 16:22:13 -08003868 u64 rem = 0;
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003869
Naohiro Aota2eda5702021-02-04 19:21:53 +09003870 ASSERT(!btrfs_is_zoned(block_group->fs_info));
3871
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003872 *trimmed = 0;
3873
3874 spin_lock(&block_group->lock);
3875 if (block_group->removed) {
3876 spin_unlock(&block_group->lock);
3877 return 0;
3878 }
Filipe Manana6b7304a2020-05-08 11:01:47 +01003879 btrfs_freeze_block_group(block_group);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003880 spin_unlock(&block_group->lock);
3881
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003882 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false);
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003883 if (ret)
3884 goto out;
3885
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003886 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false);
Dennis Zhouda080fe2019-12-13 16:22:13 -08003887 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem);
3888 /* If we ended in the middle of a bitmap, reset the trimming flag */
3889 if (rem)
3890 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end));
Jeff Mahoneye33e17e2015-06-15 09:41:19 -04003891out:
Filipe Manana6b7304a2020-05-08 11:01:47 +01003892 btrfs_unfreeze_block_group(block_group);
Li Dongyangf7039b12011-03-24 10:24:28 +00003893 return ret;
3894}
Li Zefan581bb052011-04-20 10:06:11 +08003895
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003896int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
3897 u64 *trimmed, u64 start, u64 end, u64 minlen,
3898 bool async)
3899{
3900 int ret;
3901
3902 *trimmed = 0;
3903
3904 spin_lock(&block_group->lock);
3905 if (block_group->removed) {
3906 spin_unlock(&block_group->lock);
3907 return 0;
3908 }
Filipe Manana6b7304a2020-05-08 11:01:47 +01003909 btrfs_freeze_block_group(block_group);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003910 spin_unlock(&block_group->lock);
3911
3912 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async);
Filipe Manana6b7304a2020-05-08 11:01:47 +01003913 btrfs_unfreeze_block_group(block_group);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003914
3915 return ret;
3916}
3917
3918int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
3919 u64 *trimmed, u64 start, u64 end, u64 minlen,
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003920 u64 maxlen, bool async)
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003921{
3922 int ret;
3923
3924 *trimmed = 0;
3925
3926 spin_lock(&block_group->lock);
3927 if (block_group->removed) {
3928 spin_unlock(&block_group->lock);
3929 return 0;
3930 }
Filipe Manana6b7304a2020-05-08 11:01:47 +01003931 btrfs_freeze_block_group(block_group);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003932 spin_unlock(&block_group->lock);
3933
Dennis Zhou7fe6d452020-01-02 16:26:39 -05003934 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen,
3935 async);
3936
Filipe Manana6b7304a2020-05-08 11:01:47 +01003937 btrfs_unfreeze_block_group(block_group);
Dennis Zhou2bee7eb2019-12-13 16:22:16 -08003938
3939 return ret;
3940}
3941
Boris Burkov94846222020-11-18 15:06:22 -08003942bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
3943{
3944 return btrfs_super_cache_generation(fs_info->super_copy);
3945}
3946
Boris Burkov36b216c2020-11-18 15:06:25 -08003947static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
3948 struct btrfs_trans_handle *trans)
3949{
3950 struct btrfs_block_group *block_group;
3951 struct rb_node *node;
Tom Rix77364fa2021-04-30 11:06:55 -07003952 int ret = 0;
Boris Burkov36b216c2020-11-18 15:06:25 -08003953
3954 btrfs_info(fs_info, "cleaning free space cache v1");
3955
3956 node = rb_first(&fs_info->block_group_cache_tree);
3957 while (node) {
3958 block_group = rb_entry(node, struct btrfs_block_group, cache_node);
3959 ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
3960 if (ret)
3961 goto out;
3962 node = rb_next(node);
3963 }
3964out:
3965 return ret;
3966}
3967
Boris Burkov94846222020-11-18 15:06:22 -08003968int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
3969{
3970 struct btrfs_trans_handle *trans;
3971 int ret;
3972
3973 /*
Boris Burkov36b216c2020-11-18 15:06:25 -08003974 * update_super_roots will appropriately set or unset
3975 * super_copy->cache_generation based on SPACE_CACHE and
3976 * BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
3977 * transaction commit whether we are enabling space cache v1 and don't
3978 * have any other work to do, or are disabling it and removing free
3979 * space inodes.
Boris Burkov94846222020-11-18 15:06:22 -08003980 */
3981 trans = btrfs_start_transaction(fs_info->tree_root, 0);
3982 if (IS_ERR(trans))
3983 return PTR_ERR(trans);
3984
Boris Burkov36b216c2020-11-18 15:06:25 -08003985 if (!active) {
Boris Burkov94846222020-11-18 15:06:22 -08003986 set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
Boris Burkov36b216c2020-11-18 15:06:25 -08003987 ret = cleanup_free_space_cache_v1(fs_info, trans);
3988 if (ret) {
3989 btrfs_abort_transaction(trans, ret);
3990 btrfs_end_transaction(trans);
3991 goto out;
3992 }
3993 }
Boris Burkov94846222020-11-18 15:06:22 -08003994
3995 ret = btrfs_commit_transaction(trans);
Boris Burkov36b216c2020-11-18 15:06:25 -08003996out:
Boris Burkov94846222020-11-18 15:06:22 -08003997 clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
3998
3999 return ret;
4000}
4001
Josef Bacik74255aa2013-03-15 09:47:08 -04004002#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
Josef Bacikdc11dd52013-08-14 15:05:12 -04004003/*
4004 * Use this if you need to make a bitmap or extent entry specifically, it
4005 * doesn't do any of the merging that add_free_space does, this acts a lot like
4006 * how the free space cache loading stuff works, so you can get really weird
4007 * configurations.
4008 */
David Sterba32da53862019-10-29 19:20:18 +01004009int test_add_free_space_entry(struct btrfs_block_group *cache,
Josef Bacikdc11dd52013-08-14 15:05:12 -04004010 u64 offset, u64 bytes, bool bitmap)
Josef Bacik74255aa2013-03-15 09:47:08 -04004011{
Josef Bacikdc11dd52013-08-14 15:05:12 -04004012 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4013 struct btrfs_free_space *info = NULL, *bitmap_info;
4014 void *map = NULL;
Dennis Zhouda080fe2019-12-13 16:22:13 -08004015 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED;
Josef Bacikdc11dd52013-08-14 15:05:12 -04004016 u64 bytes_added;
4017 int ret;
Josef Bacik74255aa2013-03-15 09:47:08 -04004018
Josef Bacikdc11dd52013-08-14 15:05:12 -04004019again:
4020 if (!info) {
4021 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
4022 if (!info)
4023 return -ENOMEM;
Josef Bacik74255aa2013-03-15 09:47:08 -04004024 }
4025
Josef Bacikdc11dd52013-08-14 15:05:12 -04004026 if (!bitmap) {
4027 spin_lock(&ctl->tree_lock);
4028 info->offset = offset;
4029 info->bytes = bytes;
Josef Bacikcef40482015-10-02 16:09:42 -04004030 info->max_extent_size = 0;
Josef Bacikdc11dd52013-08-14 15:05:12 -04004031 ret = link_free_space(ctl, info);
4032 spin_unlock(&ctl->tree_lock);
4033 if (ret)
4034 kmem_cache_free(btrfs_free_space_cachep, info);
4035 return ret;
4036 }
Josef Bacik74255aa2013-03-15 09:47:08 -04004037
Josef Bacikdc11dd52013-08-14 15:05:12 -04004038 if (!map) {
Christophe Leroy3acd4852019-08-21 15:05:55 +00004039 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS);
Josef Bacikdc11dd52013-08-14 15:05:12 -04004040 if (!map) {
4041 kmem_cache_free(btrfs_free_space_cachep, info);
4042 return -ENOMEM;
4043 }
4044 }
Josef Bacik74255aa2013-03-15 09:47:08 -04004045
Josef Bacikdc11dd52013-08-14 15:05:12 -04004046 spin_lock(&ctl->tree_lock);
4047 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4048 1, 0);
4049 if (!bitmap_info) {
4050 info->bitmap = map;
4051 map = NULL;
4052 add_new_bitmap(ctl, info, offset);
4053 bitmap_info = info;
Filipe Manana20005522014-08-29 13:35:13 +01004054 info = NULL;
Josef Bacikdc11dd52013-08-14 15:05:12 -04004055 }
Josef Bacik74255aa2013-03-15 09:47:08 -04004056
Dennis Zhouda080fe2019-12-13 16:22:13 -08004057 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
4058 trim_state);
Josef Bacikcef40482015-10-02 16:09:42 -04004059
Josef Bacikdc11dd52013-08-14 15:05:12 -04004060 bytes -= bytes_added;
4061 offset += bytes_added;
4062 spin_unlock(&ctl->tree_lock);
4063
4064 if (bytes)
4065 goto again;
4066
Filipe Manana20005522014-08-29 13:35:13 +01004067 if (info)
4068 kmem_cache_free(btrfs_free_space_cachep, info);
Christophe Leroy3acd4852019-08-21 15:05:55 +00004069 if (map)
4070 kmem_cache_free(btrfs_free_space_bitmap_cachep, map);
Josef Bacikdc11dd52013-08-14 15:05:12 -04004071 return 0;
Josef Bacik74255aa2013-03-15 09:47:08 -04004072}
4073
4074/*
4075 * Checks to see if the given range is in the free space cache. This is really
4076 * just used to check the absence of space, so if there is free space in the
4077 * range at all we will return 1.
4078 */
David Sterba32da53862019-10-29 19:20:18 +01004079int test_check_exists(struct btrfs_block_group *cache,
Josef Bacikdc11dd52013-08-14 15:05:12 -04004080 u64 offset, u64 bytes)
Josef Bacik74255aa2013-03-15 09:47:08 -04004081{
4082 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
4083 struct btrfs_free_space *info;
4084 int ret = 0;
4085
4086 spin_lock(&ctl->tree_lock);
4087 info = tree_search_offset(ctl, offset, 0, 0);
4088 if (!info) {
4089 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
4090 1, 0);
4091 if (!info)
4092 goto out;
4093 }
4094
4095have_info:
4096 if (info->bitmap) {
4097 u64 bit_off, bit_bytes;
4098 struct rb_node *n;
4099 struct btrfs_free_space *tmp;
4100
4101 bit_off = offset;
4102 bit_bytes = ctl->unit;
Josef Bacik0584f712015-10-02 16:12:23 -04004103 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
Josef Bacik74255aa2013-03-15 09:47:08 -04004104 if (!ret) {
4105 if (bit_off == offset) {
4106 ret = 1;
4107 goto out;
4108 } else if (bit_off > offset &&
4109 offset + bytes > bit_off) {
4110 ret = 1;
4111 goto out;
4112 }
4113 }
4114
4115 n = rb_prev(&info->offset_index);
4116 while (n) {
4117 tmp = rb_entry(n, struct btrfs_free_space,
4118 offset_index);
4119 if (tmp->offset + tmp->bytes < offset)
4120 break;
4121 if (offset + bytes < tmp->offset) {
Feifei Xu5473e0c42016-06-01 19:18:23 +08004122 n = rb_prev(&tmp->offset_index);
Josef Bacik74255aa2013-03-15 09:47:08 -04004123 continue;
4124 }
4125 info = tmp;
4126 goto have_info;
4127 }
4128
4129 n = rb_next(&info->offset_index);
4130 while (n) {
4131 tmp = rb_entry(n, struct btrfs_free_space,
4132 offset_index);
4133 if (offset + bytes < tmp->offset)
4134 break;
4135 if (tmp->offset + tmp->bytes < offset) {
Feifei Xu5473e0c42016-06-01 19:18:23 +08004136 n = rb_next(&tmp->offset_index);
Josef Bacik74255aa2013-03-15 09:47:08 -04004137 continue;
4138 }
4139 info = tmp;
4140 goto have_info;
4141 }
4142
Filipe Manana20005522014-08-29 13:35:13 +01004143 ret = 0;
Josef Bacik74255aa2013-03-15 09:47:08 -04004144 goto out;
4145 }
4146
4147 if (info->offset == offset) {
4148 ret = 1;
4149 goto out;
4150 }
4151
4152 if (offset > info->offset && offset < info->offset + info->bytes)
4153 ret = 1;
4154out:
4155 spin_unlock(&ctl->tree_lock);
4156 return ret;
4157}
Josef Bacikdc11dd52013-08-14 15:05:12 -04004158#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */